net/mlx5: normalize function prototypes
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox.
4  */
5
6 #include <sys/queue.h>
7 #include <string.h>
8
9 /* Verbs header. */
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
11 #ifdef PEDANTIC
12 #pragma GCC diagnostic ignored "-Wpedantic"
13 #endif
14 #include <infiniband/verbs.h>
15 #ifdef PEDANTIC
16 #pragma GCC diagnostic error "-Wpedantic"
17 #endif
18
19 #include <rte_ethdev_driver.h>
20 #include <rte_flow.h>
21 #include <rte_flow_driver.h>
22 #include <rte_malloc.h>
23 #include <rte_ip.h>
24
25 #include "mlx5.h"
26 #include "mlx5_defs.h"
27 #include "mlx5_prm.h"
28 #include "mlx5_glue.h"
29
30 /* Define minimal priority for control plane flows. */
31 #define MLX5_CTRL_FLOW_PRIORITY 4
32
33 /* Internet Protocol versions. */
34 #define MLX5_IPV4 4
35 #define MLX5_IPV6 6
36
37 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
38 struct ibv_flow_spec_counter_action {
39         int dummy;
40 };
41 #endif
42
43 /* Dev ops structure defined in mlx5.c */
44 extern const struct eth_dev_ops mlx5_dev_ops;
45 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
46
47 static int
48 mlx5_flow_create_eth(const struct rte_flow_item *item,
49                      const void *default_mask,
50                      void *data);
51
52 static int
53 mlx5_flow_create_vlan(const struct rte_flow_item *item,
54                       const void *default_mask,
55                       void *data);
56
57 static int
58 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
59                       const void *default_mask,
60                       void *data);
61
62 static int
63 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
64                       const void *default_mask,
65                       void *data);
66
67 static int
68 mlx5_flow_create_udp(const struct rte_flow_item *item,
69                      const void *default_mask,
70                      void *data);
71
72 static int
73 mlx5_flow_create_tcp(const struct rte_flow_item *item,
74                      const void *default_mask,
75                      void *data);
76
77 static int
78 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
79                        const void *default_mask,
80                        void *data);
81
82 struct mlx5_flow_parse;
83
84 static void
85 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
86                       unsigned int size);
87
88 static int
89 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
90
91 static int
92 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
93
94 /* Hash RX queue types. */
95 enum hash_rxq_type {
96         HASH_RXQ_TCPV4,
97         HASH_RXQ_UDPV4,
98         HASH_RXQ_IPV4,
99         HASH_RXQ_TCPV6,
100         HASH_RXQ_UDPV6,
101         HASH_RXQ_IPV6,
102         HASH_RXQ_ETH,
103 };
104
105 /* Initialization data for hash RX queue. */
106 struct hash_rxq_init {
107         uint64_t hash_fields; /* Fields that participate in the hash. */
108         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
109         unsigned int flow_priority; /* Flow priority to use. */
110         unsigned int ip_version; /* Internet protocol. */
111 };
112
113 /* Initialization data for hash RX queues. */
114 const struct hash_rxq_init hash_rxq_init[] = {
115         [HASH_RXQ_TCPV4] = {
116                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
117                                 IBV_RX_HASH_DST_IPV4 |
118                                 IBV_RX_HASH_SRC_PORT_TCP |
119                                 IBV_RX_HASH_DST_PORT_TCP),
120                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
121                 .flow_priority = 0,
122                 .ip_version = MLX5_IPV4,
123         },
124         [HASH_RXQ_UDPV4] = {
125                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
126                                 IBV_RX_HASH_DST_IPV4 |
127                                 IBV_RX_HASH_SRC_PORT_UDP |
128                                 IBV_RX_HASH_DST_PORT_UDP),
129                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
130                 .flow_priority = 0,
131                 .ip_version = MLX5_IPV4,
132         },
133         [HASH_RXQ_IPV4] = {
134                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135                                 IBV_RX_HASH_DST_IPV4),
136                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
137                                 ETH_RSS_FRAG_IPV4),
138                 .flow_priority = 1,
139                 .ip_version = MLX5_IPV4,
140         },
141         [HASH_RXQ_TCPV6] = {
142                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
143                                 IBV_RX_HASH_DST_IPV6 |
144                                 IBV_RX_HASH_SRC_PORT_TCP |
145                                 IBV_RX_HASH_DST_PORT_TCP),
146                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
147                 .flow_priority = 0,
148                 .ip_version = MLX5_IPV6,
149         },
150         [HASH_RXQ_UDPV6] = {
151                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
152                                 IBV_RX_HASH_DST_IPV6 |
153                                 IBV_RX_HASH_SRC_PORT_UDP |
154                                 IBV_RX_HASH_DST_PORT_UDP),
155                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
156                 .flow_priority = 0,
157                 .ip_version = MLX5_IPV6,
158         },
159         [HASH_RXQ_IPV6] = {
160                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161                                 IBV_RX_HASH_DST_IPV6),
162                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
163                                 ETH_RSS_FRAG_IPV6),
164                 .flow_priority = 1,
165                 .ip_version = MLX5_IPV6,
166         },
167         [HASH_RXQ_ETH] = {
168                 .hash_fields = 0,
169                 .dpdk_rss_hf = 0,
170                 .flow_priority = 2,
171         },
172 };
173
174 /* Number of entries in hash_rxq_init[]. */
175 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
176
177 /** Structure for holding counter stats. */
178 struct mlx5_flow_counter_stats {
179         uint64_t hits; /**< Number of packets matched by the rule. */
180         uint64_t bytes; /**< Number of bytes matched by the rule. */
181 };
182
183 /** Structure for Drop queue. */
184 struct mlx5_hrxq_drop {
185         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
186         struct ibv_qp *qp; /**< Verbs queue pair. */
187         struct ibv_wq *wq; /**< Verbs work queue. */
188         struct ibv_cq *cq; /**< Verbs completion queue. */
189 };
190
191 /* Flows structures. */
192 struct mlx5_flow {
193         uint64_t hash_fields; /**< Fields that participate in the hash. */
194         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
195         struct ibv_flow *ibv_flow; /**< Verbs flow. */
196         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
197 };
198
199 /* Drop flows structures. */
200 struct mlx5_flow_drop {
201         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
202         struct ibv_flow *ibv_flow; /**< Verbs flow. */
203 };
204
205 struct rte_flow {
206         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
207         uint32_t mark:1; /**< Set if the flow is marked. */
208         uint32_t drop:1; /**< Drop queue. */
209         uint16_t queues_n; /**< Number of entries in queue[]. */
210         uint16_t (*queues)[]; /**< Queues indexes to use. */
211         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
212         uint8_t rss_key[40]; /**< copy of the RSS key. */
213         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
214         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
215         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
216         /**< Flow with Rx queue. */
217 };
218
219 /** Static initializer for items. */
220 #define ITEMS(...) \
221         (const enum rte_flow_item_type []){ \
222                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
223         }
224
225 /** Structure to generate a simple graph of layers supported by the NIC. */
226 struct mlx5_flow_items {
227         /** List of possible actions for these items. */
228         const enum rte_flow_action_type *const actions;
229         /** Bit-masks corresponding to the possibilities for the item. */
230         const void *mask;
231         /**
232          * Default bit-masks to use when item->mask is not provided. When
233          * \default_mask is also NULL, the full supported bit-mask (\mask) is
234          * used instead.
235          */
236         const void *default_mask;
237         /** Bit-masks size in bytes. */
238         const unsigned int mask_sz;
239         /**
240          * Conversion function from rte_flow to NIC specific flow.
241          *
242          * @param item
243          *   rte_flow item to convert.
244          * @param default_mask
245          *   Default bit-masks to use when item->mask is not provided.
246          * @param data
247          *   Internal structure to store the conversion.
248          *
249          * @return
250          *   0 on success, negative value otherwise.
251          */
252         int (*convert)(const struct rte_flow_item *item,
253                        const void *default_mask,
254                        void *data);
255         /** Size in bytes of the destination structure. */
256         const unsigned int dst_sz;
257         /** List of possible following items.  */
258         const enum rte_flow_item_type *const items;
259 };
260
261 /** Valid action for this PMD. */
262 static const enum rte_flow_action_type valid_actions[] = {
263         RTE_FLOW_ACTION_TYPE_DROP,
264         RTE_FLOW_ACTION_TYPE_QUEUE,
265         RTE_FLOW_ACTION_TYPE_MARK,
266         RTE_FLOW_ACTION_TYPE_FLAG,
267 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
268         RTE_FLOW_ACTION_TYPE_COUNT,
269 #endif
270         RTE_FLOW_ACTION_TYPE_END,
271 };
272
273 /** Graph of supported items and associated actions. */
274 static const struct mlx5_flow_items mlx5_flow_items[] = {
275         [RTE_FLOW_ITEM_TYPE_END] = {
276                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
277                                RTE_FLOW_ITEM_TYPE_VXLAN),
278         },
279         [RTE_FLOW_ITEM_TYPE_ETH] = {
280                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
281                                RTE_FLOW_ITEM_TYPE_IPV4,
282                                RTE_FLOW_ITEM_TYPE_IPV6),
283                 .actions = valid_actions,
284                 .mask = &(const struct rte_flow_item_eth){
285                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
286                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
287                         .type = -1,
288                 },
289                 .default_mask = &rte_flow_item_eth_mask,
290                 .mask_sz = sizeof(struct rte_flow_item_eth),
291                 .convert = mlx5_flow_create_eth,
292                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
293         },
294         [RTE_FLOW_ITEM_TYPE_VLAN] = {
295                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
296                                RTE_FLOW_ITEM_TYPE_IPV6),
297                 .actions = valid_actions,
298                 .mask = &(const struct rte_flow_item_vlan){
299                         .tci = -1,
300                 },
301                 .default_mask = &rte_flow_item_vlan_mask,
302                 .mask_sz = sizeof(struct rte_flow_item_vlan),
303                 .convert = mlx5_flow_create_vlan,
304                 .dst_sz = 0,
305         },
306         [RTE_FLOW_ITEM_TYPE_IPV4] = {
307                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
308                                RTE_FLOW_ITEM_TYPE_TCP),
309                 .actions = valid_actions,
310                 .mask = &(const struct rte_flow_item_ipv4){
311                         .hdr = {
312                                 .src_addr = -1,
313                                 .dst_addr = -1,
314                                 .type_of_service = -1,
315                                 .next_proto_id = -1,
316                         },
317                 },
318                 .default_mask = &rte_flow_item_ipv4_mask,
319                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
320                 .convert = mlx5_flow_create_ipv4,
321                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
322         },
323         [RTE_FLOW_ITEM_TYPE_IPV6] = {
324                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
325                                RTE_FLOW_ITEM_TYPE_TCP),
326                 .actions = valid_actions,
327                 .mask = &(const struct rte_flow_item_ipv6){
328                         .hdr = {
329                                 .src_addr = {
330                                         0xff, 0xff, 0xff, 0xff,
331                                         0xff, 0xff, 0xff, 0xff,
332                                         0xff, 0xff, 0xff, 0xff,
333                                         0xff, 0xff, 0xff, 0xff,
334                                 },
335                                 .dst_addr = {
336                                         0xff, 0xff, 0xff, 0xff,
337                                         0xff, 0xff, 0xff, 0xff,
338                                         0xff, 0xff, 0xff, 0xff,
339                                         0xff, 0xff, 0xff, 0xff,
340                                 },
341                                 .vtc_flow = -1,
342                                 .proto = -1,
343                                 .hop_limits = -1,
344                         },
345                 },
346                 .default_mask = &rte_flow_item_ipv6_mask,
347                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
348                 .convert = mlx5_flow_create_ipv6,
349                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
350         },
351         [RTE_FLOW_ITEM_TYPE_UDP] = {
352                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
353                 .actions = valid_actions,
354                 .mask = &(const struct rte_flow_item_udp){
355                         .hdr = {
356                                 .src_port = -1,
357                                 .dst_port = -1,
358                         },
359                 },
360                 .default_mask = &rte_flow_item_udp_mask,
361                 .mask_sz = sizeof(struct rte_flow_item_udp),
362                 .convert = mlx5_flow_create_udp,
363                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
364         },
365         [RTE_FLOW_ITEM_TYPE_TCP] = {
366                 .actions = valid_actions,
367                 .mask = &(const struct rte_flow_item_tcp){
368                         .hdr = {
369                                 .src_port = -1,
370                                 .dst_port = -1,
371                         },
372                 },
373                 .default_mask = &rte_flow_item_tcp_mask,
374                 .mask_sz = sizeof(struct rte_flow_item_tcp),
375                 .convert = mlx5_flow_create_tcp,
376                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
377         },
378         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
379                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
380                 .actions = valid_actions,
381                 .mask = &(const struct rte_flow_item_vxlan){
382                         .vni = "\xff\xff\xff",
383                 },
384                 .default_mask = &rte_flow_item_vxlan_mask,
385                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
386                 .convert = mlx5_flow_create_vxlan,
387                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
388         },
389 };
390
391 /** Structure to pass to the conversion function. */
392 struct mlx5_flow_parse {
393         uint32_t inner; /**< Set once VXLAN is encountered. */
394         uint32_t create:1;
395         /**< Whether resources should remain after a validate. */
396         uint32_t drop:1; /**< Target is a drop queue. */
397         uint32_t mark:1; /**< Mark is present in the flow. */
398         uint32_t count:1; /**< Count is present in the flow. */
399         uint32_t mark_id; /**< Mark identifier. */
400         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
401         uint16_t queues_n; /**< Number of entries in queue[]. */
402         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
403         uint8_t rss_key[40]; /**< copy of the RSS key. */
404         enum hash_rxq_type layer; /**< Last pattern layer detected. */
405         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
406         struct {
407                 struct ibv_flow_attr *ibv_attr;
408                 /**< Pointer to Verbs attributes. */
409                 unsigned int offset;
410                 /**< Current position or total size of the attribute. */
411         } queue[RTE_DIM(hash_rxq_init)];
412 };
413
414 static const struct rte_flow_ops mlx5_flow_ops = {
415         .validate = mlx5_flow_validate,
416         .create = mlx5_flow_create,
417         .destroy = mlx5_flow_destroy,
418         .flush = mlx5_flow_flush,
419 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
420         .query = mlx5_flow_query,
421 #else
422         .query = NULL,
423 #endif
424         .isolate = mlx5_flow_isolate,
425 };
426
427 /* Convert FDIR request to Generic flow. */
428 struct mlx5_fdir {
429         struct rte_flow_attr attr;
430         struct rte_flow_action actions[2];
431         struct rte_flow_item items[4];
432         struct rte_flow_item_eth l2;
433         struct rte_flow_item_eth l2_mask;
434         union {
435                 struct rte_flow_item_ipv4 ipv4;
436                 struct rte_flow_item_ipv6 ipv6;
437         } l3;
438         union {
439                 struct rte_flow_item_udp udp;
440                 struct rte_flow_item_tcp tcp;
441         } l4;
442         struct rte_flow_action_queue queue;
443 };
444
445 /* Verbs specification header. */
446 struct ibv_spec_header {
447         enum ibv_flow_spec_type type;
448         uint16_t size;
449 };
450
451 /**
452  * Check support for a given item.
453  *
454  * @param item[in]
455  *   Item specification.
456  * @param mask[in]
457  *   Bit-masks covering supported fields to compare with spec, last and mask in
458  *   \item.
459  * @param size
460  *   Bit-Mask size in bytes.
461  *
462  * @return
463  *   0 on success.
464  */
465 static int
466 mlx5_flow_item_validate(const struct rte_flow_item *item,
467                         const uint8_t *mask, unsigned int size)
468 {
469         int ret = 0;
470
471         if (!item->spec && (item->mask || item->last))
472                 return -1;
473         if (item->spec && !item->mask) {
474                 unsigned int i;
475                 const uint8_t *spec = item->spec;
476
477                 for (i = 0; i < size; ++i)
478                         if ((spec[i] | mask[i]) != mask[i])
479                                 return -1;
480         }
481         if (item->last && !item->mask) {
482                 unsigned int i;
483                 const uint8_t *spec = item->last;
484
485                 for (i = 0; i < size; ++i)
486                         if ((spec[i] | mask[i]) != mask[i])
487                                 return -1;
488         }
489         if (item->mask) {
490                 unsigned int i;
491                 const uint8_t *spec = item->spec;
492
493                 for (i = 0; i < size; ++i)
494                         if ((spec[i] | mask[i]) != mask[i])
495                                 return -1;
496         }
497         if (item->spec && item->last) {
498                 uint8_t spec[size];
499                 uint8_t last[size];
500                 const uint8_t *apply = mask;
501                 unsigned int i;
502
503                 if (item->mask)
504                         apply = item->mask;
505                 for (i = 0; i < size; ++i) {
506                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
507                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
508                 }
509                 ret = memcmp(spec, last, size);
510         }
511         return ret;
512 }
513
514 /**
515  * Copy the RSS configuration from the user ones, of the rss_conf is null,
516  * uses the driver one.
517  *
518  * @param priv
519  *   Pointer to private structure.
520  * @param parser
521  *   Internal parser structure.
522  * @param rss_conf
523  *   User RSS configuration to save.
524  *
525  * @return
526  *   0 on success, errno value on failure.
527  */
528 static int
529 priv_flow_convert_rss_conf(struct priv *priv __rte_unused,
530                            struct mlx5_flow_parse *parser,
531                            const struct rte_eth_rss_conf *rss_conf)
532 {
533         /*
534          * This function is also called at the beginning of
535          * priv_flow_convert_actions() to initialize the parser with the
536          * device default RSS configuration.
537          */
538         if (rss_conf) {
539                 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
540                         return EINVAL;
541                 if (rss_conf->rss_key_len != 40)
542                         return EINVAL;
543                 if (rss_conf->rss_key_len && rss_conf->rss_key) {
544                         parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
545                         memcpy(parser->rss_key, rss_conf->rss_key,
546                                rss_conf->rss_key_len);
547                         parser->rss_conf.rss_key = parser->rss_key;
548                 }
549                 parser->rss_conf.rss_hf = rss_conf->rss_hf;
550         }
551         return 0;
552 }
553
554 /**
555  * Extract attribute to the parser.
556  *
557  * @param priv
558  *   Pointer to private structure.
559  * @param[in] attr
560  *   Flow rule attributes.
561  * @param[out] error
562  *   Perform verbose error reporting if not NULL.
563  * @param[in, out] parser
564  *   Internal parser structure.
565  *
566  * @return
567  *   0 on success, a negative errno value otherwise and rte_errno is set.
568  */
569 static int
570 priv_flow_convert_attributes(struct priv *priv __rte_unused,
571                              const struct rte_flow_attr *attr,
572                              struct rte_flow_error *error,
573                              struct mlx5_flow_parse *parser __rte_unused)
574 {
575         if (attr->group) {
576                 rte_flow_error_set(error, ENOTSUP,
577                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
578                                    NULL,
579                                    "groups are not supported");
580                 return -rte_errno;
581         }
582         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
583                 rte_flow_error_set(error, ENOTSUP,
584                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
585                                    NULL,
586                                    "priorities are not supported");
587                 return -rte_errno;
588         }
589         if (attr->egress) {
590                 rte_flow_error_set(error, ENOTSUP,
591                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
592                                    NULL,
593                                    "egress is not supported");
594                 return -rte_errno;
595         }
596         if (!attr->ingress) {
597                 rte_flow_error_set(error, ENOTSUP,
598                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
599                                    NULL,
600                                    "only ingress is supported");
601                 return -rte_errno;
602         }
603         return 0;
604 }
605
606 /**
607  * Extract actions request to the parser.
608  *
609  * @param priv
610  *   Pointer to private structure.
611  * @param[in] actions
612  *   Associated actions (list terminated by the END action).
613  * @param[out] error
614  *   Perform verbose error reporting if not NULL.
615  * @param[in, out] parser
616  *   Internal parser structure.
617  *
618  * @return
619  *   0 on success, a negative errno value otherwise and rte_errno is set.
620  */
621 static int
622 priv_flow_convert_actions(struct priv *priv,
623                           const struct rte_flow_action actions[],
624                           struct rte_flow_error *error,
625                           struct mlx5_flow_parse *parser)
626 {
627         /*
628          * Add default RSS configuration necessary for Verbs to create QP even
629          * if no RSS is necessary.
630          */
631         priv_flow_convert_rss_conf(priv, parser,
632                                    (const struct rte_eth_rss_conf *)
633                                    &priv->rss_conf);
634         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
635                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
636                         continue;
637                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
638                         parser->drop = 1;
639                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
640                         const struct rte_flow_action_queue *queue =
641                                 (const struct rte_flow_action_queue *)
642                                 actions->conf;
643                         uint16_t n;
644                         uint16_t found = 0;
645
646                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
647                                 goto exit_action_not_supported;
648                         for (n = 0; n < parser->queues_n; ++n) {
649                                 if (parser->queues[n] == queue->index) {
650                                         found = 1;
651                                         break;
652                                 }
653                         }
654                         if (parser->queues_n > 1 && !found) {
655                                 rte_flow_error_set(error, ENOTSUP,
656                                            RTE_FLOW_ERROR_TYPE_ACTION,
657                                            actions,
658                                            "queue action not in RSS queues");
659                                 return -rte_errno;
660                         }
661                         if (!found) {
662                                 parser->queues_n = 1;
663                                 parser->queues[0] = queue->index;
664                         }
665                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
666                         const struct rte_flow_action_rss *rss =
667                                 (const struct rte_flow_action_rss *)
668                                 actions->conf;
669                         uint16_t n;
670
671                         if (!rss || !rss->num) {
672                                 rte_flow_error_set(error, EINVAL,
673                                                    RTE_FLOW_ERROR_TYPE_ACTION,
674                                                    actions,
675                                                    "no valid queues");
676                                 return -rte_errno;
677                         }
678                         if (parser->queues_n == 1) {
679                                 uint16_t found = 0;
680
681                                 assert(parser->queues_n);
682                                 for (n = 0; n < rss->num; ++n) {
683                                         if (parser->queues[0] ==
684                                             rss->queue[n]) {
685                                                 found = 1;
686                                                 break;
687                                         }
688                                 }
689                                 if (!found) {
690                                         rte_flow_error_set(error, ENOTSUP,
691                                                    RTE_FLOW_ERROR_TYPE_ACTION,
692                                                    actions,
693                                                    "queue action not in RSS"
694                                                    " queues");
695                                         return -rte_errno;
696                                 }
697                         }
698                         for (n = 0; n < rss->num; ++n) {
699                                 if (rss->queue[n] >= priv->rxqs_n) {
700                                         rte_flow_error_set(error, EINVAL,
701                                                    RTE_FLOW_ERROR_TYPE_ACTION,
702                                                    actions,
703                                                    "queue id > number of"
704                                                    " queues");
705                                         return -rte_errno;
706                                 }
707                         }
708                         for (n = 0; n < rss->num; ++n)
709                                 parser->queues[n] = rss->queue[n];
710                         parser->queues_n = rss->num;
711                         if (priv_flow_convert_rss_conf(priv, parser,
712                                                        rss->rss_conf)) {
713                                 rte_flow_error_set(error, EINVAL,
714                                                    RTE_FLOW_ERROR_TYPE_ACTION,
715                                                    actions,
716                                                    "wrong RSS configuration");
717                                 return -rte_errno;
718                         }
719                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
720                         const struct rte_flow_action_mark *mark =
721                                 (const struct rte_flow_action_mark *)
722                                 actions->conf;
723
724                         if (!mark) {
725                                 rte_flow_error_set(error, EINVAL,
726                                                    RTE_FLOW_ERROR_TYPE_ACTION,
727                                                    actions,
728                                                    "mark must be defined");
729                                 return -rte_errno;
730                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
731                                 rte_flow_error_set(error, ENOTSUP,
732                                                    RTE_FLOW_ERROR_TYPE_ACTION,
733                                                    actions,
734                                                    "mark must be between 0"
735                                                    " and 16777199");
736                                 return -rte_errno;
737                         }
738                         parser->mark = 1;
739                         parser->mark_id = mark->id;
740                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
741                         parser->mark = 1;
742                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
743                            priv->config.flow_counter_en) {
744                         parser->count = 1;
745                 } else {
746                         goto exit_action_not_supported;
747                 }
748         }
749         if (parser->drop && parser->mark)
750                 parser->mark = 0;
751         if (!parser->queues_n && !parser->drop) {
752                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
753                                    NULL, "no valid action");
754                 return -rte_errno;
755         }
756         return 0;
757 exit_action_not_supported:
758         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
759                            actions, "action not supported");
760         return -rte_errno;
761 }
762
763 /**
764  * Validate items.
765  *
766  * @param priv
767  *   Pointer to private structure.
768  * @param[in] items
769  *   Pattern specification (list terminated by the END pattern item).
770  * @param[out] error
771  *   Perform verbose error reporting if not NULL.
772  * @param[in, out] parser
773  *   Internal parser structure.
774  *
775  * @return
776  *   0 on success, a negative errno value otherwise and rte_errno is set.
777  */
778 static int
779 priv_flow_convert_items_validate(struct priv *priv __rte_unused,
780                                  const struct rte_flow_item items[],
781                                  struct rte_flow_error *error,
782                                  struct mlx5_flow_parse *parser)
783 {
784         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
785         unsigned int i;
786
787         /* Initialise the offsets to start after verbs attribute. */
788         for (i = 0; i != hash_rxq_init_n; ++i)
789                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
790         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
791                 const struct mlx5_flow_items *token = NULL;
792                 unsigned int n;
793                 int err;
794
795                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
796                         continue;
797                 for (i = 0;
798                      cur_item->items &&
799                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
800                      ++i) {
801                         if (cur_item->items[i] == items->type) {
802                                 token = &mlx5_flow_items[items->type];
803                                 break;
804                         }
805                 }
806                 if (!token)
807                         goto exit_item_not_supported;
808                 cur_item = token;
809                 err = mlx5_flow_item_validate(items,
810                                               (const uint8_t *)cur_item->mask,
811                                               cur_item->mask_sz);
812                 if (err)
813                         goto exit_item_not_supported;
814                 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
815                         if (parser->inner) {
816                                 rte_flow_error_set(error, ENOTSUP,
817                                                    RTE_FLOW_ERROR_TYPE_ITEM,
818                                                    items,
819                                                    "cannot recognize multiple"
820                                                    " VXLAN encapsulations");
821                                 return -rte_errno;
822                         }
823                         parser->inner = IBV_FLOW_SPEC_INNER;
824                 }
825                 if (parser->drop) {
826                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
827                 } else {
828                         for (n = 0; n != hash_rxq_init_n; ++n)
829                                 parser->queue[n].offset += cur_item->dst_sz;
830                 }
831         }
832         if (parser->drop) {
833                 parser->queue[HASH_RXQ_ETH].offset +=
834                         sizeof(struct ibv_flow_spec_action_drop);
835         }
836         if (parser->mark) {
837                 for (i = 0; i != hash_rxq_init_n; ++i)
838                         parser->queue[i].offset +=
839                                 sizeof(struct ibv_flow_spec_action_tag);
840         }
841         if (parser->count) {
842                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
843
844                 for (i = 0; i != hash_rxq_init_n; ++i)
845                         parser->queue[i].offset += size;
846         }
847         return 0;
848 exit_item_not_supported:
849         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
850                            items, "item not supported");
851         return -rte_errno;
852 }
853
854 /**
855  * Allocate memory space to store verbs flow attributes.
856  *
857  * @param priv
858  *   Pointer to private structure.
859  * @param[in] priority
860  *   Flow priority.
861  * @param[in] size
862  *   Amount of byte to allocate.
863  * @param[out] error
864  *   Perform verbose error reporting if not NULL.
865  *
866  * @return
867  *   A verbs flow attribute on success, NULL otherwise.
868  */
869 static struct ibv_flow_attr *
870 priv_flow_convert_allocate(struct priv *priv __rte_unused,
871                            unsigned int priority,
872                            unsigned int size,
873                            struct rte_flow_error *error)
874 {
875         struct ibv_flow_attr *ibv_attr;
876
877         ibv_attr = rte_calloc(__func__, 1, size, 0);
878         if (!ibv_attr) {
879                 rte_flow_error_set(error, ENOMEM,
880                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
881                                    NULL,
882                                    "cannot allocate verbs spec attributes.");
883                 return NULL;
884         }
885         ibv_attr->priority = priority;
886         return ibv_attr;
887 }
888
889 /**
890  * Finalise verbs flow attributes.
891  *
892  * @param priv
893  *   Pointer to private structure.
894  * @param[in, out] parser
895  *   Internal parser structure.
896  */
897 static void
898 priv_flow_convert_finalise(struct priv *priv __rte_unused,
899                            struct mlx5_flow_parse *parser)
900 {
901         const unsigned int ipv4 =
902                 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
903         const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
904         const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
905         const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
906         const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
907         const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
908         unsigned int i;
909
910         /* Remove any other flow not matching the pattern. */
911         if (parser->queues_n == 1) {
912                 for (i = 0; i != hash_rxq_init_n; ++i) {
913                         if (i == HASH_RXQ_ETH)
914                                 continue;
915                         rte_free(parser->queue[i].ibv_attr);
916                         parser->queue[i].ibv_attr = NULL;
917                 }
918                 return;
919         }
920         if (parser->layer == HASH_RXQ_ETH) {
921                 goto fill;
922         } else {
923                 /*
924                  * This layer becomes useless as the pattern define under
925                  * layers.
926                  */
927                 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
928                 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
929         }
930         /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
931         for (i = ohmin; i != (ohmax + 1); ++i) {
932                 if (!parser->queue[i].ibv_attr)
933                         continue;
934                 rte_free(parser->queue[i].ibv_attr);
935                 parser->queue[i].ibv_attr = NULL;
936         }
937         /* Remove impossible flow according to the RSS configuration. */
938         if (hash_rxq_init[parser->layer].dpdk_rss_hf &
939             parser->rss_conf.rss_hf) {
940                 /* Remove any other flow. */
941                 for (i = hmin; i != (hmax + 1); ++i) {
942                         if ((i == parser->layer) ||
943                              (!parser->queue[i].ibv_attr))
944                                 continue;
945                         rte_free(parser->queue[i].ibv_attr);
946                         parser->queue[i].ibv_attr = NULL;
947                 }
948         } else  if (!parser->queue[ip].ibv_attr) {
949                 /* no RSS possible with the current configuration. */
950                 parser->queues_n = 1;
951                 return;
952         }
953 fill:
954         /*
955          * Fill missing layers in verbs specifications, or compute the correct
956          * offset to allocate the memory space for the attributes and
957          * specifications.
958          */
959         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
960                 union {
961                         struct ibv_flow_spec_ipv4_ext ipv4;
962                         struct ibv_flow_spec_ipv6 ipv6;
963                         struct ibv_flow_spec_tcp_udp udp_tcp;
964                 } specs;
965                 void *dst;
966                 uint16_t size;
967
968                 if (i == parser->layer)
969                         continue;
970                 if (parser->layer == HASH_RXQ_ETH) {
971                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
972                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
973                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
974                                         .type = IBV_FLOW_SPEC_IPV4_EXT,
975                                         .size = size,
976                                 };
977                         } else {
978                                 size = sizeof(struct ibv_flow_spec_ipv6);
979                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
980                                         .type = IBV_FLOW_SPEC_IPV6,
981                                         .size = size,
982                                 };
983                         }
984                         if (parser->queue[i].ibv_attr) {
985                                 dst = (void *)((uintptr_t)
986                                                parser->queue[i].ibv_attr +
987                                                parser->queue[i].offset);
988                                 memcpy(dst, &specs, size);
989                                 ++parser->queue[i].ibv_attr->num_of_specs;
990                         }
991                         parser->queue[i].offset += size;
992                 }
993                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
994                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
995                         size = sizeof(struct ibv_flow_spec_tcp_udp);
996                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
997                                 .type = ((i == HASH_RXQ_UDPV4 ||
998                                           i == HASH_RXQ_UDPV6) ?
999                                          IBV_FLOW_SPEC_UDP :
1000                                          IBV_FLOW_SPEC_TCP),
1001                                 .size = size,
1002                         };
1003                         if (parser->queue[i].ibv_attr) {
1004                                 dst = (void *)((uintptr_t)
1005                                                parser->queue[i].ibv_attr +
1006                                                parser->queue[i].offset);
1007                                 memcpy(dst, &specs, size);
1008                                 ++parser->queue[i].ibv_attr->num_of_specs;
1009                         }
1010                         parser->queue[i].offset += size;
1011                 }
1012         }
1013 }
1014
1015 /**
1016  * Validate and convert a flow supported by the NIC.
1017  *
1018  * @param priv
1019  *   Pointer to private structure.
1020  * @param[in] attr
1021  *   Flow rule attributes.
1022  * @param[in] pattern
1023  *   Pattern specification (list terminated by the END pattern item).
1024  * @param[in] actions
1025  *   Associated actions (list terminated by the END action).
1026  * @param[out] error
1027  *   Perform verbose error reporting if not NULL.
1028  * @param[in, out] parser
1029  *   Internal parser structure.
1030  *
1031  * @return
1032  *   0 on success, a negative errno value otherwise and rte_errno is set.
1033  */
1034 static int
1035 priv_flow_convert(struct priv *priv,
1036                   const struct rte_flow_attr *attr,
1037                   const struct rte_flow_item items[],
1038                   const struct rte_flow_action actions[],
1039                   struct rte_flow_error *error,
1040                   struct mlx5_flow_parse *parser)
1041 {
1042         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1043         unsigned int i;
1044         int ret;
1045
1046         /* First step. Validate the attributes, items and actions. */
1047         *parser = (struct mlx5_flow_parse){
1048                 .create = parser->create,
1049                 .layer = HASH_RXQ_ETH,
1050                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1051         };
1052         ret = priv_flow_convert_attributes(priv, attr, error, parser);
1053         if (ret)
1054                 return ret;
1055         ret = priv_flow_convert_actions(priv, actions, error, parser);
1056         if (ret)
1057                 return ret;
1058         ret = priv_flow_convert_items_validate(priv, items, error, parser);
1059         if (ret)
1060                 return ret;
1061         priv_flow_convert_finalise(priv, parser);
1062         /*
1063          * Second step.
1064          * Allocate the memory space to store verbs specifications.
1065          */
1066         if (parser->drop) {
1067                 unsigned int priority =
1068                         attr->priority +
1069                         hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1070                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1071
1072                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1073                         priv_flow_convert_allocate(priv, priority,
1074                                                    offset, error);
1075                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1076                         return ENOMEM;
1077                 parser->queue[HASH_RXQ_ETH].offset =
1078                         sizeof(struct ibv_flow_attr);
1079         } else {
1080                 for (i = 0; i != hash_rxq_init_n; ++i) {
1081                         unsigned int priority =
1082                                 attr->priority +
1083                                 hash_rxq_init[i].flow_priority;
1084                         unsigned int offset;
1085
1086                         if (!(parser->rss_conf.rss_hf &
1087                               hash_rxq_init[i].dpdk_rss_hf) &&
1088                             (i != HASH_RXQ_ETH))
1089                                 continue;
1090                         offset = parser->queue[i].offset;
1091                         parser->queue[i].ibv_attr =
1092                                 priv_flow_convert_allocate(priv, priority,
1093                                                            offset, error);
1094                         if (!parser->queue[i].ibv_attr)
1095                                 goto exit_enomem;
1096                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1097                 }
1098         }
1099         /* Third step. Conversion parse, fill the specifications. */
1100         parser->inner = 0;
1101         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1102                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1103                         continue;
1104                 cur_item = &mlx5_flow_items[items->type];
1105                 ret = cur_item->convert(items,
1106                                         (cur_item->default_mask ?
1107                                          cur_item->default_mask :
1108                                          cur_item->mask),
1109                                         parser);
1110                 if (ret) {
1111                         rte_flow_error_set(error, ret,
1112                                            RTE_FLOW_ERROR_TYPE_ITEM,
1113                                            items, "item not supported");
1114                         goto exit_free;
1115                 }
1116         }
1117         if (parser->mark)
1118                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1119         if (parser->count && parser->create) {
1120                 mlx5_flow_create_count(priv, parser);
1121                 if (!parser->cs)
1122                         goto exit_count_error;
1123         }
1124         /*
1125          * Last step. Complete missing specification to reach the RSS
1126          * configuration.
1127          */
1128         if (!parser->drop) {
1129                 priv_flow_convert_finalise(priv, parser);
1130         } else {
1131                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1132                         attr->priority +
1133                         hash_rxq_init[parser->layer].flow_priority;
1134         }
1135 exit_free:
1136         /* Only verification is expected, all resources should be released. */
1137         if (!parser->create) {
1138                 for (i = 0; i != hash_rxq_init_n; ++i) {
1139                         if (parser->queue[i].ibv_attr) {
1140                                 rte_free(parser->queue[i].ibv_attr);
1141                                 parser->queue[i].ibv_attr = NULL;
1142                         }
1143                 }
1144         }
1145         return ret;
1146 exit_enomem:
1147         for (i = 0; i != hash_rxq_init_n; ++i) {
1148                 if (parser->queue[i].ibv_attr) {
1149                         rte_free(parser->queue[i].ibv_attr);
1150                         parser->queue[i].ibv_attr = NULL;
1151                 }
1152         }
1153         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1154                            NULL, "cannot allocate verbs spec attributes.");
1155         return ret;
1156 exit_count_error:
1157         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1158                            NULL, "cannot create counter.");
1159         return rte_errno;
1160 }
1161
1162 /**
1163  * Copy the specification created into the flow.
1164  *
1165  * @param parser
1166  *   Internal parser structure.
1167  * @param src
1168  *   Create specification.
1169  * @param size
1170  *   Size in bytes of the specification to copy.
1171  */
1172 static void
1173 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1174                       unsigned int size)
1175 {
1176         unsigned int i;
1177         void *dst;
1178
1179         for (i = 0; i != hash_rxq_init_n; ++i) {
1180                 if (!parser->queue[i].ibv_attr)
1181                         continue;
1182                 /* Specification must be the same l3 type or none. */
1183                 if (parser->layer == HASH_RXQ_ETH ||
1184                     (hash_rxq_init[parser->layer].ip_version ==
1185                      hash_rxq_init[i].ip_version) ||
1186                     (hash_rxq_init[i].ip_version == 0)) {
1187                         dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1188                                         parser->queue[i].offset);
1189                         memcpy(dst, src, size);
1190                         ++parser->queue[i].ibv_attr->num_of_specs;
1191                         parser->queue[i].offset += size;
1192                 }
1193         }
1194 }
1195
1196 /**
1197  * Convert Ethernet item to Verbs specification.
1198  *
1199  * @param item[in]
1200  *   Item specification.
1201  * @param default_mask[in]
1202  *   Default bit-masks to use when item->mask is not provided.
1203  * @param data[in, out]
1204  *   User structure.
1205  */
1206 static int
1207 mlx5_flow_create_eth(const struct rte_flow_item *item,
1208                      const void *default_mask,
1209                      void *data)
1210 {
1211         const struct rte_flow_item_eth *spec = item->spec;
1212         const struct rte_flow_item_eth *mask = item->mask;
1213         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1214         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1215         struct ibv_flow_spec_eth eth = {
1216                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1217                 .size = eth_size,
1218         };
1219
1220         /* Don't update layer for the inner pattern. */
1221         if (!parser->inner)
1222                 parser->layer = HASH_RXQ_ETH;
1223         if (spec) {
1224                 unsigned int i;
1225
1226                 if (!mask)
1227                         mask = default_mask;
1228                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1229                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1230                 eth.val.ether_type = spec->type;
1231                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1232                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1233                 eth.mask.ether_type = mask->type;
1234                 /* Remove unwanted bits from values. */
1235                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1236                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1237                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1238                 }
1239                 eth.val.ether_type &= eth.mask.ether_type;
1240         }
1241         mlx5_flow_create_copy(parser, &eth, eth_size);
1242         return 0;
1243 }
1244
1245 /**
1246  * Convert VLAN item to Verbs specification.
1247  *
1248  * @param item[in]
1249  *   Item specification.
1250  * @param default_mask[in]
1251  *   Default bit-masks to use when item->mask is not provided.
1252  * @param data[in, out]
1253  *   User structure.
1254  */
1255 static int
1256 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1257                       const void *default_mask,
1258                       void *data)
1259 {
1260         const struct rte_flow_item_vlan *spec = item->spec;
1261         const struct rte_flow_item_vlan *mask = item->mask;
1262         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1263         struct ibv_flow_spec_eth *eth;
1264         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1265
1266         if (spec) {
1267                 unsigned int i;
1268                 if (!mask)
1269                         mask = default_mask;
1270
1271                 for (i = 0; i != hash_rxq_init_n; ++i) {
1272                         if (!parser->queue[i].ibv_attr)
1273                                 continue;
1274
1275                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1276                                        parser->queue[i].offset - eth_size);
1277                         eth->val.vlan_tag = spec->tci;
1278                         eth->mask.vlan_tag = mask->tci;
1279                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1280                 }
1281         }
1282         return 0;
1283 }
1284
1285 /**
1286  * Convert IPv4 item to Verbs specification.
1287  *
1288  * @param item[in]
1289  *   Item specification.
1290  * @param default_mask[in]
1291  *   Default bit-masks to use when item->mask is not provided.
1292  * @param data[in, out]
1293  *   User structure.
1294  */
1295 static int
1296 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1297                       const void *default_mask,
1298                       void *data)
1299 {
1300         const struct rte_flow_item_ipv4 *spec = item->spec;
1301         const struct rte_flow_item_ipv4 *mask = item->mask;
1302         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1303         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1304         struct ibv_flow_spec_ipv4_ext ipv4 = {
1305                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1306                 .size = ipv4_size,
1307         };
1308
1309         /* Don't update layer for the inner pattern. */
1310         if (!parser->inner)
1311                 parser->layer = HASH_RXQ_IPV4;
1312         if (spec) {
1313                 if (!mask)
1314                         mask = default_mask;
1315                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1316                         .src_ip = spec->hdr.src_addr,
1317                         .dst_ip = spec->hdr.dst_addr,
1318                         .proto = spec->hdr.next_proto_id,
1319                         .tos = spec->hdr.type_of_service,
1320                 };
1321                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1322                         .src_ip = mask->hdr.src_addr,
1323                         .dst_ip = mask->hdr.dst_addr,
1324                         .proto = mask->hdr.next_proto_id,
1325                         .tos = mask->hdr.type_of_service,
1326                 };
1327                 /* Remove unwanted bits from values. */
1328                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1329                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1330                 ipv4.val.proto &= ipv4.mask.proto;
1331                 ipv4.val.tos &= ipv4.mask.tos;
1332         }
1333         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1334         return 0;
1335 }
1336
1337 /**
1338  * Convert IPv6 item to Verbs specification.
1339  *
1340  * @param item[in]
1341  *   Item specification.
1342  * @param default_mask[in]
1343  *   Default bit-masks to use when item->mask is not provided.
1344  * @param data[in, out]
1345  *   User structure.
1346  */
1347 static int
1348 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1349                       const void *default_mask,
1350                       void *data)
1351 {
1352         const struct rte_flow_item_ipv6 *spec = item->spec;
1353         const struct rte_flow_item_ipv6 *mask = item->mask;
1354         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1355         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1356         struct ibv_flow_spec_ipv6 ipv6 = {
1357                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1358                 .size = ipv6_size,
1359         };
1360
1361         /* Don't update layer for the inner pattern. */
1362         if (!parser->inner)
1363                 parser->layer = HASH_RXQ_IPV6;
1364         if (spec) {
1365                 unsigned int i;
1366                 uint32_t vtc_flow_val;
1367                 uint32_t vtc_flow_mask;
1368
1369                 if (!mask)
1370                         mask = default_mask;
1371                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1372                        RTE_DIM(ipv6.val.src_ip));
1373                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1374                        RTE_DIM(ipv6.val.dst_ip));
1375                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1376                        RTE_DIM(ipv6.mask.src_ip));
1377                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1378                        RTE_DIM(ipv6.mask.dst_ip));
1379                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1380                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1381                 ipv6.val.flow_label =
1382                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1383                                          IPV6_HDR_FL_SHIFT);
1384                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1385                                          IPV6_HDR_TC_SHIFT;
1386                 ipv6.val.next_hdr = spec->hdr.proto;
1387                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1388                 ipv6.mask.flow_label =
1389                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1390                                          IPV6_HDR_FL_SHIFT);
1391                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1392                                           IPV6_HDR_TC_SHIFT;
1393                 ipv6.mask.next_hdr = mask->hdr.proto;
1394                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1395                 /* Remove unwanted bits from values. */
1396                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1397                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1398                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1399                 }
1400                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1401                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1402                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1403                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1404         }
1405         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1406         return 0;
1407 }
1408
1409 /**
1410  * Convert UDP item to Verbs specification.
1411  *
1412  * @param item[in]
1413  *   Item specification.
1414  * @param default_mask[in]
1415  *   Default bit-masks to use when item->mask is not provided.
1416  * @param data[in, out]
1417  *   User structure.
1418  */
1419 static int
1420 mlx5_flow_create_udp(const struct rte_flow_item *item,
1421                      const void *default_mask,
1422                      void *data)
1423 {
1424         const struct rte_flow_item_udp *spec = item->spec;
1425         const struct rte_flow_item_udp *mask = item->mask;
1426         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1427         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1428         struct ibv_flow_spec_tcp_udp udp = {
1429                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1430                 .size = udp_size,
1431         };
1432
1433         /* Don't update layer for the inner pattern. */
1434         if (!parser->inner) {
1435                 if (parser->layer == HASH_RXQ_IPV4)
1436                         parser->layer = HASH_RXQ_UDPV4;
1437                 else
1438                         parser->layer = HASH_RXQ_UDPV6;
1439         }
1440         if (spec) {
1441                 if (!mask)
1442                         mask = default_mask;
1443                 udp.val.dst_port = spec->hdr.dst_port;
1444                 udp.val.src_port = spec->hdr.src_port;
1445                 udp.mask.dst_port = mask->hdr.dst_port;
1446                 udp.mask.src_port = mask->hdr.src_port;
1447                 /* Remove unwanted bits from values. */
1448                 udp.val.src_port &= udp.mask.src_port;
1449                 udp.val.dst_port &= udp.mask.dst_port;
1450         }
1451         mlx5_flow_create_copy(parser, &udp, udp_size);
1452         return 0;
1453 }
1454
1455 /**
1456  * Convert TCP item to Verbs specification.
1457  *
1458  * @param item[in]
1459  *   Item specification.
1460  * @param default_mask[in]
1461  *   Default bit-masks to use when item->mask is not provided.
1462  * @param data[in, out]
1463  *   User structure.
1464  */
1465 static int
1466 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1467                      const void *default_mask,
1468                      void *data)
1469 {
1470         const struct rte_flow_item_tcp *spec = item->spec;
1471         const struct rte_flow_item_tcp *mask = item->mask;
1472         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1473         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1474         struct ibv_flow_spec_tcp_udp tcp = {
1475                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1476                 .size = tcp_size,
1477         };
1478
1479         /* Don't update layer for the inner pattern. */
1480         if (!parser->inner) {
1481                 if (parser->layer == HASH_RXQ_IPV4)
1482                         parser->layer = HASH_RXQ_TCPV4;
1483                 else
1484                         parser->layer = HASH_RXQ_TCPV6;
1485         }
1486         if (spec) {
1487                 if (!mask)
1488                         mask = default_mask;
1489                 tcp.val.dst_port = spec->hdr.dst_port;
1490                 tcp.val.src_port = spec->hdr.src_port;
1491                 tcp.mask.dst_port = mask->hdr.dst_port;
1492                 tcp.mask.src_port = mask->hdr.src_port;
1493                 /* Remove unwanted bits from values. */
1494                 tcp.val.src_port &= tcp.mask.src_port;
1495                 tcp.val.dst_port &= tcp.mask.dst_port;
1496         }
1497         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1498         return 0;
1499 }
1500
1501 /**
1502  * Convert VXLAN item to Verbs specification.
1503  *
1504  * @param item[in]
1505  *   Item specification.
1506  * @param default_mask[in]
1507  *   Default bit-masks to use when item->mask is not provided.
1508  * @param data[in, out]
1509  *   User structure.
1510  */
1511 static int
1512 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1513                        const void *default_mask,
1514                        void *data)
1515 {
1516         const struct rte_flow_item_vxlan *spec = item->spec;
1517         const struct rte_flow_item_vxlan *mask = item->mask;
1518         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1519         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1520         struct ibv_flow_spec_tunnel vxlan = {
1521                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1522                 .size = size,
1523         };
1524         union vni {
1525                 uint32_t vlan_id;
1526                 uint8_t vni[4];
1527         } id;
1528
1529         id.vni[0] = 0;
1530         parser->inner = IBV_FLOW_SPEC_INNER;
1531         if (spec) {
1532                 if (!mask)
1533                         mask = default_mask;
1534                 memcpy(&id.vni[1], spec->vni, 3);
1535                 vxlan.val.tunnel_id = id.vlan_id;
1536                 memcpy(&id.vni[1], mask->vni, 3);
1537                 vxlan.mask.tunnel_id = id.vlan_id;
1538                 /* Remove unwanted bits from values. */
1539                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1540         }
1541         /*
1542          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1543          * layer is defined in the Verbs specification it is interpreted as
1544          * wildcard and all packets will match this rule, if it follows a full
1545          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1546          * before will also match this rule.
1547          * To avoid such situation, VNI 0 is currently refused.
1548          */
1549         if (!vxlan.val.tunnel_id)
1550                 return EINVAL;
1551         mlx5_flow_create_copy(parser, &vxlan, size);
1552         return 0;
1553 }
1554
1555 /**
1556  * Convert mark/flag action to Verbs specification.
1557  *
1558  * @param parser
1559  *   Internal parser structure.
1560  * @param mark_id
1561  *   Mark identifier.
1562  */
1563 static int
1564 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1565 {
1566         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1567         struct ibv_flow_spec_action_tag tag = {
1568                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1569                 .size = size,
1570                 .tag_id = mlx5_flow_mark_set(mark_id),
1571         };
1572
1573         assert(parser->mark);
1574         mlx5_flow_create_copy(parser, &tag, size);
1575         return 0;
1576 }
1577
1578 /**
1579  * Convert count action to Verbs specification.
1580  *
1581  * @param priv
1582  *   Pointer to private structure.
1583  * @param parser
1584  *   Pointer to MLX5 flow parser structure.
1585  *
1586  * @return
1587  *   0 on success, errno value on failure.
1588  */
1589 static int
1590 mlx5_flow_create_count(struct priv *priv __rte_unused,
1591                        struct mlx5_flow_parse *parser __rte_unused)
1592 {
1593 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1594         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1595         struct ibv_counter_set_init_attr init_attr = {0};
1596         struct ibv_flow_spec_counter_action counter = {
1597                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1598                 .size = size,
1599                 .counter_set_handle = 0,
1600         };
1601
1602         init_attr.counter_set_id = 0;
1603         parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1604         if (!parser->cs)
1605                 return EINVAL;
1606         counter.counter_set_handle = parser->cs->handle;
1607         mlx5_flow_create_copy(parser, &counter, size);
1608 #endif
1609         return 0;
1610 }
1611
1612 /**
1613  * Complete flow rule creation with a drop queue.
1614  *
1615  * @param priv
1616  *   Pointer to private structure.
1617  * @param parser
1618  *   Internal parser structure.
1619  * @param flow
1620  *   Pointer to the rte_flow.
1621  * @param[out] error
1622  *   Perform verbose error reporting if not NULL.
1623  *
1624  * @return
1625  *   0 on success, errno value on failure.
1626  */
1627 static int
1628 priv_flow_create_action_queue_drop(struct priv *priv,
1629                                    struct mlx5_flow_parse *parser,
1630                                    struct rte_flow *flow,
1631                                    struct rte_flow_error *error)
1632 {
1633         struct ibv_flow_spec_action_drop *drop;
1634         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1635         int err = 0;
1636
1637         assert(priv->pd);
1638         assert(priv->ctx);
1639         flow->drop = 1;
1640         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1641                         parser->queue[HASH_RXQ_ETH].offset);
1642         *drop = (struct ibv_flow_spec_action_drop){
1643                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1644                         .size = size,
1645         };
1646         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1647         parser->queue[HASH_RXQ_ETH].offset += size;
1648         flow->frxq[HASH_RXQ_ETH].ibv_attr =
1649                 parser->queue[HASH_RXQ_ETH].ibv_attr;
1650         if (parser->count)
1651                 flow->cs = parser->cs;
1652         if (!priv->dev->data->dev_started)
1653                 return 0;
1654         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1655         flow->frxq[HASH_RXQ_ETH].ibv_flow =
1656                 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1657                                        flow->frxq[HASH_RXQ_ETH].ibv_attr);
1658         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1659                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1660                                    NULL, "flow rule creation failure");
1661                 err = ENOMEM;
1662                 goto error;
1663         }
1664         return 0;
1665 error:
1666         assert(flow);
1667         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1668                 claim_zero(mlx5_glue->destroy_flow
1669                            (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1670                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1671         }
1672         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1673                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1674                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1675         }
1676         if (flow->cs) {
1677                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1678                 flow->cs = NULL;
1679                 parser->cs = NULL;
1680         }
1681         return err;
1682 }
1683
1684 /**
1685  * Create hash Rx queues when RSS is enabled.
1686  *
1687  * @param priv
1688  *   Pointer to private structure.
1689  * @param parser
1690  *   Internal parser structure.
1691  * @param flow
1692  *   Pointer to the rte_flow.
1693  * @param[out] error
1694  *   Perform verbose error reporting if not NULL.
1695  *
1696  * @return
1697  *   0 on success, a errno value otherwise and rte_errno is set.
1698  */
1699 static int
1700 priv_flow_create_action_queue_rss(struct priv *priv,
1701                                   struct mlx5_flow_parse *parser,
1702                                   struct rte_flow *flow,
1703                                   struct rte_flow_error *error)
1704 {
1705         unsigned int i;
1706
1707         for (i = 0; i != hash_rxq_init_n; ++i) {
1708                 uint64_t hash_fields;
1709
1710                 if (!parser->queue[i].ibv_attr)
1711                         continue;
1712                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1713                 parser->queue[i].ibv_attr = NULL;
1714                 hash_fields = hash_rxq_init[i].hash_fields;
1715                 if (!priv->dev->data->dev_started)
1716                         continue;
1717                 flow->frxq[i].hrxq =
1718                         mlx5_priv_hrxq_get(priv,
1719                                            parser->rss_conf.rss_key,
1720                                            parser->rss_conf.rss_key_len,
1721                                            hash_fields,
1722                                            parser->queues,
1723                                            parser->queues_n);
1724                 if (flow->frxq[i].hrxq)
1725                         continue;
1726                 flow->frxq[i].hrxq =
1727                         mlx5_priv_hrxq_new(priv,
1728                                            parser->rss_conf.rss_key,
1729                                            parser->rss_conf.rss_key_len,
1730                                            hash_fields,
1731                                            parser->queues,
1732                                            parser->queues_n);
1733                 if (!flow->frxq[i].hrxq) {
1734                         rte_flow_error_set(error, ENOMEM,
1735                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1736                                            NULL, "cannot create hash rxq");
1737                         return ENOMEM;
1738                 }
1739         }
1740         return 0;
1741 }
1742
1743 /**
1744  * Complete flow rule creation.
1745  *
1746  * @param priv
1747  *   Pointer to private structure.
1748  * @param parser
1749  *   Internal parser structure.
1750  * @param flow
1751  *   Pointer to the rte_flow.
1752  * @param[out] error
1753  *   Perform verbose error reporting if not NULL.
1754  *
1755  * @return
1756  *   0 on success, a errno value otherwise and rte_errno is set.
1757  */
1758 static int
1759 priv_flow_create_action_queue(struct priv *priv,
1760                               struct mlx5_flow_parse *parser,
1761                               struct rte_flow *flow,
1762                               struct rte_flow_error *error)
1763 {
1764         int err = 0;
1765         unsigned int i;
1766         unsigned int flows_n = 0;
1767
1768         assert(priv->pd);
1769         assert(priv->ctx);
1770         assert(!parser->drop);
1771         err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1772         if (err)
1773                 goto error;
1774         if (parser->count)
1775                 flow->cs = parser->cs;
1776         if (!priv->dev->data->dev_started)
1777                 return 0;
1778         for (i = 0; i != hash_rxq_init_n; ++i) {
1779                 if (!flow->frxq[i].hrxq)
1780                         continue;
1781                 flow->frxq[i].ibv_flow =
1782                         mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1783                                                flow->frxq[i].ibv_attr);
1784                 if (!flow->frxq[i].ibv_flow) {
1785                         rte_flow_error_set(error, ENOMEM,
1786                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1787                                            NULL, "flow rule creation failure");
1788                         err = ENOMEM;
1789                         goto error;
1790                 }
1791                 ++flows_n;
1792                 DEBUG("%p type %d QP %p ibv_flow %p",
1793                       (void *)flow, i,
1794                       (void *)flow->frxq[i].hrxq,
1795                       (void *)flow->frxq[i].ibv_flow);
1796         }
1797         if (!flows_n) {
1798                 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1799                                    NULL, "internal error in flow creation");
1800                 goto error;
1801         }
1802         for (i = 0; i != parser->queues_n; ++i) {
1803                 struct mlx5_rxq_data *q =
1804                         (*priv->rxqs)[parser->queues[i]];
1805
1806                 q->mark |= parser->mark;
1807         }
1808         return 0;
1809 error:
1810         assert(flow);
1811         for (i = 0; i != hash_rxq_init_n; ++i) {
1812                 if (flow->frxq[i].ibv_flow) {
1813                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1814
1815                         claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1816                 }
1817                 if (flow->frxq[i].hrxq)
1818                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1819                 if (flow->frxq[i].ibv_attr)
1820                         rte_free(flow->frxq[i].ibv_attr);
1821         }
1822         if (flow->cs) {
1823                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1824                 flow->cs = NULL;
1825                 parser->cs = NULL;
1826         }
1827         return err;
1828 }
1829
1830 /**
1831  * Convert a flow.
1832  *
1833  * @param priv
1834  *   Pointer to private structure.
1835  * @param list
1836  *   Pointer to a TAILQ flow list.
1837  * @param[in] attr
1838  *   Flow rule attributes.
1839  * @param[in] pattern
1840  *   Pattern specification (list terminated by the END pattern item).
1841  * @param[in] actions
1842  *   Associated actions (list terminated by the END action).
1843  * @param[out] error
1844  *   Perform verbose error reporting if not NULL.
1845  *
1846  * @return
1847  *   A flow on success, NULL otherwise.
1848  */
1849 static struct rte_flow *
1850 priv_flow_create(struct priv *priv,
1851                  struct mlx5_flows *list,
1852                  const struct rte_flow_attr *attr,
1853                  const struct rte_flow_item items[],
1854                  const struct rte_flow_action actions[],
1855                  struct rte_flow_error *error)
1856 {
1857         struct mlx5_flow_parse parser = { .create = 1, };
1858         struct rte_flow *flow = NULL;
1859         unsigned int i;
1860         int err;
1861
1862         err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1863         if (err)
1864                 goto exit;
1865         flow = rte_calloc(__func__, 1,
1866                           sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1867                           0);
1868         if (!flow) {
1869                 rte_flow_error_set(error, ENOMEM,
1870                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1871                                    NULL,
1872                                    "cannot allocate flow memory");
1873                 return NULL;
1874         }
1875         /* Copy queues configuration. */
1876         flow->queues = (uint16_t (*)[])(flow + 1);
1877         memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1878         flow->queues_n = parser.queues_n;
1879         flow->mark = parser.mark;
1880         /* Copy RSS configuration. */
1881         flow->rss_conf = parser.rss_conf;
1882         flow->rss_conf.rss_key = flow->rss_key;
1883         memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1884         /* finalise the flow. */
1885         if (parser.drop)
1886                 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1887                                                          error);
1888         else
1889                 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1890         if (err)
1891                 goto exit;
1892         TAILQ_INSERT_TAIL(list, flow, next);
1893         DEBUG("Flow created %p", (void *)flow);
1894         return flow;
1895 exit:
1896         ERROR("flow creation error: %s", error->message);
1897         for (i = 0; i != hash_rxq_init_n; ++i) {
1898                 if (parser.queue[i].ibv_attr)
1899                         rte_free(parser.queue[i].ibv_attr);
1900         }
1901         rte_free(flow);
1902         return NULL;
1903 }
1904
1905 /**
1906  * Validate a flow supported by the NIC.
1907  *
1908  * @see rte_flow_validate()
1909  * @see rte_flow_ops
1910  */
1911 int
1912 mlx5_flow_validate(struct rte_eth_dev *dev,
1913                    const struct rte_flow_attr *attr,
1914                    const struct rte_flow_item items[],
1915                    const struct rte_flow_action actions[],
1916                    struct rte_flow_error *error)
1917 {
1918         struct priv *priv = dev->data->dev_private;
1919         int ret;
1920         struct mlx5_flow_parse parser = { .create = 0, };
1921
1922         priv_lock(priv);
1923         ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1924         priv_unlock(priv);
1925         return ret;
1926 }
1927
1928 /**
1929  * Create a flow.
1930  *
1931  * @see rte_flow_create()
1932  * @see rte_flow_ops
1933  */
1934 struct rte_flow *
1935 mlx5_flow_create(struct rte_eth_dev *dev,
1936                  const struct rte_flow_attr *attr,
1937                  const struct rte_flow_item items[],
1938                  const struct rte_flow_action actions[],
1939                  struct rte_flow_error *error)
1940 {
1941         struct priv *priv = dev->data->dev_private;
1942         struct rte_flow *flow;
1943
1944         priv_lock(priv);
1945         flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1946                                 error);
1947         priv_unlock(priv);
1948         return flow;
1949 }
1950
1951 /**
1952  * Destroy a flow.
1953  *
1954  * @param priv
1955  *   Pointer to private structure.
1956  * @param list
1957  *   Pointer to a TAILQ flow list.
1958  * @param[in] flow
1959  *   Flow to destroy.
1960  */
1961 static void
1962 priv_flow_destroy(struct priv *priv,
1963                   struct mlx5_flows *list,
1964                   struct rte_flow *flow)
1965 {
1966         unsigned int i;
1967
1968         if (flow->drop || !flow->mark)
1969                 goto free;
1970         for (i = 0; i != flow->queues_n; ++i) {
1971                 struct rte_flow *tmp;
1972                 int mark = 0;
1973
1974                 /*
1975                  * To remove the mark from the queue, the queue must not be
1976                  * present in any other marked flow (RSS or not).
1977                  */
1978                 TAILQ_FOREACH(tmp, list, next) {
1979                         unsigned int j;
1980                         uint16_t *tqs = NULL;
1981                         uint16_t tq_n = 0;
1982
1983                         if (!tmp->mark)
1984                                 continue;
1985                         for (j = 0; j != hash_rxq_init_n; ++j) {
1986                                 if (!tmp->frxq[j].hrxq)
1987                                         continue;
1988                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
1989                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1990                         }
1991                         if (!tq_n)
1992                                 continue;
1993                         for (j = 0; (j != tq_n) && !mark; j++)
1994                                 if (tqs[j] == (*flow->queues)[i])
1995                                         mark = 1;
1996                 }
1997                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
1998         }
1999 free:
2000         if (flow->drop) {
2001                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2002                         claim_zero(mlx5_glue->destroy_flow
2003                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2004                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2005         } else {
2006                 for (i = 0; i != hash_rxq_init_n; ++i) {
2007                         struct mlx5_flow *frxq = &flow->frxq[i];
2008
2009                         if (frxq->ibv_flow)
2010                                 claim_zero(mlx5_glue->destroy_flow
2011                                            (frxq->ibv_flow));
2012                         if (frxq->hrxq)
2013                                 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2014                         if (frxq->ibv_attr)
2015                                 rte_free(frxq->ibv_attr);
2016                 }
2017         }
2018         if (flow->cs) {
2019                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2020                 flow->cs = NULL;
2021         }
2022         TAILQ_REMOVE(list, flow, next);
2023         DEBUG("Flow destroyed %p", (void *)flow);
2024         rte_free(flow);
2025 }
2026
2027 /**
2028  * Destroy all flows.
2029  *
2030  * @param priv
2031  *   Pointer to private structure.
2032  * @param list
2033  *   Pointer to a TAILQ flow list.
2034  */
2035 void
2036 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2037 {
2038         while (!TAILQ_EMPTY(list)) {
2039                 struct rte_flow *flow;
2040
2041                 flow = TAILQ_FIRST(list);
2042                 priv_flow_destroy(priv, list, flow);
2043         }
2044 }
2045
2046 /**
2047  * Create drop queue.
2048  *
2049  * @param priv
2050  *   Pointer to private structure.
2051  *
2052  * @return
2053  *   0 on success.
2054  */
2055 int
2056 priv_flow_create_drop_queue(struct priv *priv)
2057 {
2058         struct mlx5_hrxq_drop *fdq = NULL;
2059
2060         assert(priv->pd);
2061         assert(priv->ctx);
2062         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2063         if (!fdq) {
2064                 WARN("cannot allocate memory for drop queue");
2065                 goto error;
2066         }
2067         fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2068         if (!fdq->cq) {
2069                 WARN("cannot allocate CQ for drop queue");
2070                 goto error;
2071         }
2072         fdq->wq = mlx5_glue->create_wq
2073                 (priv->ctx,
2074                  &(struct ibv_wq_init_attr){
2075                         .wq_type = IBV_WQT_RQ,
2076                         .max_wr = 1,
2077                         .max_sge = 1,
2078                         .pd = priv->pd,
2079                         .cq = fdq->cq,
2080                  });
2081         if (!fdq->wq) {
2082                 WARN("cannot allocate WQ for drop queue");
2083                 goto error;
2084         }
2085         fdq->ind_table = mlx5_glue->create_rwq_ind_table
2086                 (priv->ctx,
2087                  &(struct ibv_rwq_ind_table_init_attr){
2088                         .log_ind_tbl_size = 0,
2089                         .ind_tbl = &fdq->wq,
2090                         .comp_mask = 0,
2091                  });
2092         if (!fdq->ind_table) {
2093                 WARN("cannot allocate indirection table for drop queue");
2094                 goto error;
2095         }
2096         fdq->qp = mlx5_glue->create_qp_ex
2097                 (priv->ctx,
2098                  &(struct ibv_qp_init_attr_ex){
2099                         .qp_type = IBV_QPT_RAW_PACKET,
2100                         .comp_mask =
2101                                 IBV_QP_INIT_ATTR_PD |
2102                                 IBV_QP_INIT_ATTR_IND_TABLE |
2103                                 IBV_QP_INIT_ATTR_RX_HASH,
2104                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2105                                 .rx_hash_function =
2106                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2107                                 .rx_hash_key_len = rss_hash_default_key_len,
2108                                 .rx_hash_key = rss_hash_default_key,
2109                                 .rx_hash_fields_mask = 0,
2110                                 },
2111                         .rwq_ind_tbl = fdq->ind_table,
2112                         .pd = priv->pd
2113                  });
2114         if (!fdq->qp) {
2115                 WARN("cannot allocate QP for drop queue");
2116                 goto error;
2117         }
2118         priv->flow_drop_queue = fdq;
2119         return 0;
2120 error:
2121         if (fdq->qp)
2122                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2123         if (fdq->ind_table)
2124                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2125         if (fdq->wq)
2126                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2127         if (fdq->cq)
2128                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2129         if (fdq)
2130                 rte_free(fdq);
2131         priv->flow_drop_queue = NULL;
2132         return -1;
2133 }
2134
2135 /**
2136  * Delete drop queue.
2137  *
2138  * @param priv
2139  *   Pointer to private structure.
2140  */
2141 void
2142 priv_flow_delete_drop_queue(struct priv *priv)
2143 {
2144         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2145
2146         if (!fdq)
2147                 return;
2148         if (fdq->qp)
2149                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2150         if (fdq->ind_table)
2151                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2152         if (fdq->wq)
2153                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2154         if (fdq->cq)
2155                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2156         rte_free(fdq);
2157         priv->flow_drop_queue = NULL;
2158 }
2159
2160 /**
2161  * Remove all flows.
2162  *
2163  * @param priv
2164  *   Pointer to private structure.
2165  * @param list
2166  *   Pointer to a TAILQ flow list.
2167  */
2168 void
2169 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2170 {
2171         struct rte_flow *flow;
2172
2173         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2174                 unsigned int i;
2175                 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2176
2177                 if (flow->drop) {
2178                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2179                                 continue;
2180                         claim_zero(mlx5_glue->destroy_flow
2181                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2182                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2183                         DEBUG("Flow %p removed", (void *)flow);
2184                         /* Next flow. */
2185                         continue;
2186                 }
2187                 /* Verify the flow has not already been cleaned. */
2188                 for (i = 0; i != hash_rxq_init_n; ++i) {
2189                         if (!flow->frxq[i].ibv_flow)
2190                                 continue;
2191                         /*
2192                          * Indirection table may be necessary to remove the
2193                          * flags in the Rx queues.
2194                          * This helps to speed-up the process by avoiding
2195                          * another loop.
2196                          */
2197                         ind_tbl = flow->frxq[i].hrxq->ind_table;
2198                         break;
2199                 }
2200                 if (i == hash_rxq_init_n)
2201                         return;
2202                 if (flow->mark) {
2203                         assert(ind_tbl);
2204                         for (i = 0; i != ind_tbl->queues_n; ++i)
2205                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2206                 }
2207                 for (i = 0; i != hash_rxq_init_n; ++i) {
2208                         if (!flow->frxq[i].ibv_flow)
2209                                 continue;
2210                         claim_zero(mlx5_glue->destroy_flow
2211                                    (flow->frxq[i].ibv_flow));
2212                         flow->frxq[i].ibv_flow = NULL;
2213                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2214                         flow->frxq[i].hrxq = NULL;
2215                 }
2216                 DEBUG("Flow %p removed", (void *)flow);
2217         }
2218 }
2219
2220 /**
2221  * Add all flows.
2222  *
2223  * @param priv
2224  *   Pointer to private structure.
2225  * @param list
2226  *   Pointer to a TAILQ flow list.
2227  *
2228  * @return
2229  *   0 on success, a errno value otherwise and rte_errno is set.
2230  */
2231 int
2232 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2233 {
2234         struct rte_flow *flow;
2235
2236         TAILQ_FOREACH(flow, list, next) {
2237                 unsigned int i;
2238
2239                 if (flow->drop) {
2240                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2241                                 mlx5_glue->create_flow
2242                                 (priv->flow_drop_queue->qp,
2243                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2244                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2245                                 DEBUG("Flow %p cannot be applied",
2246                                       (void *)flow);
2247                                 rte_errno = EINVAL;
2248                                 return rte_errno;
2249                         }
2250                         DEBUG("Flow %p applied", (void *)flow);
2251                         /* Next flow. */
2252                         continue;
2253                 }
2254                 for (i = 0; i != hash_rxq_init_n; ++i) {
2255                         if (!flow->frxq[i].ibv_attr)
2256                                 continue;
2257                         flow->frxq[i].hrxq =
2258                                 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2259                                                    flow->rss_conf.rss_key_len,
2260                                                    hash_rxq_init[i].hash_fields,
2261                                                    (*flow->queues),
2262                                                    flow->queues_n);
2263                         if (flow->frxq[i].hrxq)
2264                                 goto flow_create;
2265                         flow->frxq[i].hrxq =
2266                                 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2267                                                    flow->rss_conf.rss_key_len,
2268                                                    hash_rxq_init[i].hash_fields,
2269                                                    (*flow->queues),
2270                                                    flow->queues_n);
2271                         if (!flow->frxq[i].hrxq) {
2272                                 DEBUG("Flow %p cannot be applied",
2273                                       (void *)flow);
2274                                 rte_errno = EINVAL;
2275                                 return rte_errno;
2276                         }
2277 flow_create:
2278                         flow->frxq[i].ibv_flow =
2279                                 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2280                                                        flow->frxq[i].ibv_attr);
2281                         if (!flow->frxq[i].ibv_flow) {
2282                                 DEBUG("Flow %p cannot be applied",
2283                                       (void *)flow);
2284                                 rte_errno = EINVAL;
2285                                 return rte_errno;
2286                         }
2287                         DEBUG("Flow %p applied", (void *)flow);
2288                 }
2289                 if (!flow->mark)
2290                         continue;
2291                 for (i = 0; i != flow->queues_n; ++i)
2292                         (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2293         }
2294         return 0;
2295 }
2296
2297 /**
2298  * Verify the flow list is empty
2299  *
2300  * @param priv
2301  *  Pointer to private structure.
2302  *
2303  * @return the number of flows not released.
2304  */
2305 int
2306 priv_flow_verify(struct priv *priv)
2307 {
2308         struct rte_flow *flow;
2309         int ret = 0;
2310
2311         TAILQ_FOREACH(flow, &priv->flows, next) {
2312                 DEBUG("%p: flow %p still referenced", (void *)priv,
2313                       (void *)flow);
2314                 ++ret;
2315         }
2316         return ret;
2317 }
2318
2319 /**
2320  * Enable a control flow configured from the control plane.
2321  *
2322  * @param dev
2323  *   Pointer to Ethernet device.
2324  * @param eth_spec
2325  *   An Ethernet flow spec to apply.
2326  * @param eth_mask
2327  *   An Ethernet flow mask to apply.
2328  * @param vlan_spec
2329  *   A VLAN flow spec to apply.
2330  * @param vlan_mask
2331  *   A VLAN flow mask to apply.
2332  *
2333  * @return
2334  *   0 on success.
2335  */
2336 int
2337 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2338                     struct rte_flow_item_eth *eth_spec,
2339                     struct rte_flow_item_eth *eth_mask,
2340                     struct rte_flow_item_vlan *vlan_spec,
2341                     struct rte_flow_item_vlan *vlan_mask)
2342 {
2343         struct priv *priv = dev->data->dev_private;
2344         const struct rte_flow_attr attr = {
2345                 .ingress = 1,
2346                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2347         };
2348         struct rte_flow_item items[] = {
2349                 {
2350                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2351                         .spec = eth_spec,
2352                         .last = NULL,
2353                         .mask = eth_mask,
2354                 },
2355                 {
2356                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2357                                 RTE_FLOW_ITEM_TYPE_END,
2358                         .spec = vlan_spec,
2359                         .last = NULL,
2360                         .mask = vlan_mask,
2361                 },
2362                 {
2363                         .type = RTE_FLOW_ITEM_TYPE_END,
2364                 },
2365         };
2366         struct rte_flow_action actions[] = {
2367                 {
2368                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2369                 },
2370                 {
2371                         .type = RTE_FLOW_ACTION_TYPE_END,
2372                 },
2373         };
2374         struct rte_flow *flow;
2375         struct rte_flow_error error;
2376         unsigned int i;
2377         union {
2378                 struct rte_flow_action_rss rss;
2379                 struct {
2380                         const struct rte_eth_rss_conf *rss_conf;
2381                         uint16_t num;
2382                         uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2383                 } local;
2384         } action_rss;
2385
2386         if (!priv->reta_idx_n)
2387                 return EINVAL;
2388         for (i = 0; i != priv->reta_idx_n; ++i)
2389                 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2390         action_rss.local.rss_conf = &priv->rss_conf;
2391         action_rss.local.num = priv->reta_idx_n;
2392         actions[0].conf = (const void *)&action_rss.rss;
2393         flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2394                                 &error);
2395         if (!flow)
2396                 return rte_errno;
2397         return 0;
2398 }
2399
2400 /**
2401  * Enable a flow control configured from the control plane.
2402  *
2403  * @param dev
2404  *   Pointer to Ethernet device.
2405  * @param eth_spec
2406  *   An Ethernet flow spec to apply.
2407  * @param eth_mask
2408  *   An Ethernet flow mask to apply.
2409  *
2410  * @return
2411  *   0 on success.
2412  */
2413 int
2414 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2415                struct rte_flow_item_eth *eth_spec,
2416                struct rte_flow_item_eth *eth_mask)
2417 {
2418         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2419 }
2420
2421 /**
2422  * Destroy a flow.
2423  *
2424  * @see rte_flow_destroy()
2425  * @see rte_flow_ops
2426  */
2427 int
2428 mlx5_flow_destroy(struct rte_eth_dev *dev,
2429                   struct rte_flow *flow,
2430                   struct rte_flow_error *error __rte_unused)
2431 {
2432         struct priv *priv = dev->data->dev_private;
2433
2434         priv_lock(priv);
2435         priv_flow_destroy(priv, &priv->flows, flow);
2436         priv_unlock(priv);
2437         return 0;
2438 }
2439
2440 /**
2441  * Destroy all flows.
2442  *
2443  * @see rte_flow_flush()
2444  * @see rte_flow_ops
2445  */
2446 int
2447 mlx5_flow_flush(struct rte_eth_dev *dev,
2448                 struct rte_flow_error *error __rte_unused)
2449 {
2450         struct priv *priv = dev->data->dev_private;
2451
2452         priv_lock(priv);
2453         priv_flow_flush(priv, &priv->flows);
2454         priv_unlock(priv);
2455         return 0;
2456 }
2457
2458 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2459 /**
2460  * Query flow counter.
2461  *
2462  * @param cs
2463  *   the counter set.
2464  * @param counter_value
2465  *   returned data from the counter.
2466  *
2467  * @return
2468  *   0 on success, a errno value otherwise and rte_errno is set.
2469  */
2470 static int
2471 priv_flow_query_count(struct ibv_counter_set *cs,
2472                       struct mlx5_flow_counter_stats *counter_stats,
2473                       struct rte_flow_query_count *query_count,
2474                       struct rte_flow_error *error)
2475 {
2476         uint64_t counters[2];
2477         struct ibv_query_counter_set_attr query_cs_attr = {
2478                 .cs = cs,
2479                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2480         };
2481         struct ibv_counter_set_data query_out = {
2482                 .out = counters,
2483                 .outlen = 2 * sizeof(uint64_t),
2484         };
2485         int res = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2486
2487         if (res) {
2488                 rte_flow_error_set(error, -res,
2489                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2490                                    NULL,
2491                                    "cannot read counter");
2492                 return -res;
2493         }
2494         query_count->hits_set = 1;
2495         query_count->bytes_set = 1;
2496         query_count->hits = counters[0] - counter_stats->hits;
2497         query_count->bytes = counters[1] - counter_stats->bytes;
2498         if (query_count->reset) {
2499                 counter_stats->hits = counters[0];
2500                 counter_stats->bytes = counters[1];
2501         }
2502         return 0;
2503 }
2504
2505 /**
2506  * Query a flows.
2507  *
2508  * @see rte_flow_query()
2509  * @see rte_flow_ops
2510  */
2511 int
2512 mlx5_flow_query(struct rte_eth_dev *dev,
2513                 struct rte_flow *flow,
2514                 enum rte_flow_action_type action __rte_unused,
2515                 void *data,
2516                 struct rte_flow_error *error)
2517 {
2518         struct priv *priv = dev->data->dev_private;
2519         int res = EINVAL;
2520
2521         priv_lock(priv);
2522         if (flow->cs) {
2523                 res = priv_flow_query_count(flow->cs,
2524                                         &flow->counter_stats,
2525                                         (struct rte_flow_query_count *)data,
2526                                         error);
2527         } else {
2528                 rte_flow_error_set(error, res,
2529                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2530                                    NULL,
2531                                    "no counter found for flow");
2532         }
2533         priv_unlock(priv);
2534         return -res;
2535 }
2536 #endif
2537
2538 /**
2539  * Isolated mode.
2540  *
2541  * @see rte_flow_isolate()
2542  * @see rte_flow_ops
2543  */
2544 int
2545 mlx5_flow_isolate(struct rte_eth_dev *dev,
2546                   int enable,
2547                   struct rte_flow_error *error)
2548 {
2549         struct priv *priv = dev->data->dev_private;
2550
2551         priv_lock(priv);
2552         if (dev->data->dev_started) {
2553                 rte_flow_error_set(error, EBUSY,
2554                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2555                                    NULL,
2556                                    "port must be stopped first");
2557                 priv_unlock(priv);
2558                 return -rte_errno;
2559         }
2560         priv->isolated = !!enable;
2561         if (enable)
2562                 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2563         else
2564                 priv->dev->dev_ops = &mlx5_dev_ops;
2565         priv_unlock(priv);
2566         return 0;
2567 }
2568
2569 /**
2570  * Convert a flow director filter to a generic flow.
2571  *
2572  * @param priv
2573  *   Private structure.
2574  * @param fdir_filter
2575  *   Flow director filter to add.
2576  * @param attributes
2577  *   Generic flow parameters structure.
2578  *
2579  * @return
2580  *  0 on success, errno value on error.
2581  */
2582 static int
2583 priv_fdir_filter_convert(struct priv *priv,
2584                          const struct rte_eth_fdir_filter *fdir_filter,
2585                          struct mlx5_fdir *attributes)
2586 {
2587         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2588
2589         /* Validate queue number. */
2590         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2591                 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2592                 return EINVAL;
2593         }
2594         attributes->attr.ingress = 1;
2595         attributes->items[0] = (struct rte_flow_item) {
2596                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2597                 .spec = &attributes->l2,
2598                 .mask = &attributes->l2_mask,
2599         };
2600         switch (fdir_filter->action.behavior) {
2601         case RTE_ETH_FDIR_ACCEPT:
2602                 attributes->actions[0] = (struct rte_flow_action){
2603                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2604                         .conf = &attributes->queue,
2605                 };
2606                 break;
2607         case RTE_ETH_FDIR_REJECT:
2608                 attributes->actions[0] = (struct rte_flow_action){
2609                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2610                 };
2611                 break;
2612         default:
2613                 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2614                 return ENOTSUP;
2615         }
2616         attributes->queue.index = fdir_filter->action.rx_queue;
2617         switch (fdir_filter->input.flow_type) {
2618         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2619                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2620                         .src_addr = input->flow.udp4_flow.ip.src_ip,
2621                         .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2622                         .time_to_live = input->flow.udp4_flow.ip.ttl,
2623                         .type_of_service = input->flow.udp4_flow.ip.tos,
2624                         .next_proto_id = input->flow.udp4_flow.ip.proto,
2625                 };
2626                 attributes->l4.udp.hdr = (struct udp_hdr){
2627                         .src_port = input->flow.udp4_flow.src_port,
2628                         .dst_port = input->flow.udp4_flow.dst_port,
2629                 };
2630                 attributes->items[1] = (struct rte_flow_item){
2631                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2632                         .spec = &attributes->l3,
2633                         .mask = &attributes->l3,
2634                 };
2635                 attributes->items[2] = (struct rte_flow_item){
2636                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2637                         .spec = &attributes->l4,
2638                         .mask = &attributes->l4,
2639                 };
2640                 break;
2641         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2642                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2643                         .src_addr = input->flow.tcp4_flow.ip.src_ip,
2644                         .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2645                         .time_to_live = input->flow.tcp4_flow.ip.ttl,
2646                         .type_of_service = input->flow.tcp4_flow.ip.tos,
2647                         .next_proto_id = input->flow.tcp4_flow.ip.proto,
2648                 };
2649                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2650                         .src_port = input->flow.tcp4_flow.src_port,
2651                         .dst_port = input->flow.tcp4_flow.dst_port,
2652                 };
2653                 attributes->items[1] = (struct rte_flow_item){
2654                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2655                         .spec = &attributes->l3,
2656                         .mask = &attributes->l3,
2657                 };
2658                 attributes->items[2] = (struct rte_flow_item){
2659                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2660                         .spec = &attributes->l4,
2661                         .mask = &attributes->l4,
2662                 };
2663                 break;
2664         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2665                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2666                         .src_addr = input->flow.ip4_flow.src_ip,
2667                         .dst_addr = input->flow.ip4_flow.dst_ip,
2668                         .time_to_live = input->flow.ip4_flow.ttl,
2669                         .type_of_service = input->flow.ip4_flow.tos,
2670                         .next_proto_id = input->flow.ip4_flow.proto,
2671                 };
2672                 attributes->items[1] = (struct rte_flow_item){
2673                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2674                         .spec = &attributes->l3,
2675                         .mask = &attributes->l3,
2676                 };
2677                 break;
2678         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2679                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2680                         .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2681                         .proto = input->flow.udp6_flow.ip.proto,
2682                 };
2683                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2684                        input->flow.udp6_flow.ip.src_ip,
2685                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2686                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2687                        input->flow.udp6_flow.ip.dst_ip,
2688                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2689                 attributes->l4.udp.hdr = (struct udp_hdr){
2690                         .src_port = input->flow.udp6_flow.src_port,
2691                         .dst_port = input->flow.udp6_flow.dst_port,
2692                 };
2693                 attributes->items[1] = (struct rte_flow_item){
2694                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2695                         .spec = &attributes->l3,
2696                         .mask = &attributes->l3,
2697                 };
2698                 attributes->items[2] = (struct rte_flow_item){
2699                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2700                         .spec = &attributes->l4,
2701                         .mask = &attributes->l4,
2702                 };
2703                 break;
2704         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2705                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2706                         .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2707                         .proto = input->flow.tcp6_flow.ip.proto,
2708                 };
2709                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2710                        input->flow.tcp6_flow.ip.src_ip,
2711                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2712                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2713                        input->flow.tcp6_flow.ip.dst_ip,
2714                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2715                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2716                         .src_port = input->flow.tcp6_flow.src_port,
2717                         .dst_port = input->flow.tcp6_flow.dst_port,
2718                 };
2719                 attributes->items[1] = (struct rte_flow_item){
2720                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2721                         .spec = &attributes->l3,
2722                         .mask = &attributes->l3,
2723                 };
2724                 attributes->items[2] = (struct rte_flow_item){
2725                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2726                         .spec = &attributes->l4,
2727                         .mask = &attributes->l4,
2728                 };
2729                 break;
2730         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2731                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2732                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2733                         .proto = input->flow.ipv6_flow.proto,
2734                 };
2735                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2736                        input->flow.ipv6_flow.src_ip,
2737                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2738                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2739                        input->flow.ipv6_flow.dst_ip,
2740                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2741                 attributes->items[1] = (struct rte_flow_item){
2742                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2743                         .spec = &attributes->l3,
2744                         .mask = &attributes->l3,
2745                 };
2746                 break;
2747         default:
2748                 ERROR("invalid flow type%d",
2749                       fdir_filter->input.flow_type);
2750                 return ENOTSUP;
2751         }
2752         return 0;
2753 }
2754
2755 /**
2756  * Add new flow director filter and store it in list.
2757  *
2758  * @param priv
2759  *   Private structure.
2760  * @param fdir_filter
2761  *   Flow director filter to add.
2762  *
2763  * @return
2764  *   0 on success, errno value on failure.
2765  */
2766 static int
2767 priv_fdir_filter_add(struct priv *priv,
2768                      const struct rte_eth_fdir_filter *fdir_filter)
2769 {
2770         struct mlx5_fdir attributes = {
2771                 .attr.group = 0,
2772                 .l2_mask = {
2773                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2774                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2775                         .type = 0,
2776                 },
2777         };
2778         struct mlx5_flow_parse parser = {
2779                 .layer = HASH_RXQ_ETH,
2780         };
2781         struct rte_flow_error error;
2782         struct rte_flow *flow;
2783         int ret;
2784
2785         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2786         if (ret)
2787                 return -ret;
2788         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2789                                 attributes.actions, &error, &parser);
2790         if (ret)
2791                 return -ret;
2792         flow = priv_flow_create(priv,
2793                                 &priv->flows,
2794                                 &attributes.attr,
2795                                 attributes.items,
2796                                 attributes.actions,
2797                                 &error);
2798         if (flow) {
2799                 DEBUG("FDIR created %p", (void *)flow);
2800                 return 0;
2801         }
2802         return ENOTSUP;
2803 }
2804
2805 /**
2806  * Delete specific filter.
2807  *
2808  * @param priv
2809  *   Private structure.
2810  * @param fdir_filter
2811  *   Filter to be deleted.
2812  *
2813  * @return
2814  *   0 on success, errno value on failure.
2815  */
2816 static int
2817 priv_fdir_filter_delete(struct priv *priv,
2818                         const struct rte_eth_fdir_filter *fdir_filter)
2819 {
2820         struct mlx5_fdir attributes = {
2821                 .attr.group = 0,
2822         };
2823         struct mlx5_flow_parse parser = {
2824                 .create = 1,
2825                 .layer = HASH_RXQ_ETH,
2826         };
2827         struct rte_flow_error error;
2828         struct rte_flow *flow;
2829         unsigned int i;
2830         int ret;
2831
2832         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2833         if (ret)
2834                 return -ret;
2835         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2836                                 attributes.actions, &error, &parser);
2837         if (ret)
2838                 goto exit;
2839         /*
2840          * Special case for drop action which is only set in the
2841          * specifications when the flow is created.  In this situation the
2842          * drop specification is missing.
2843          */
2844         if (parser.drop) {
2845                 struct ibv_flow_spec_action_drop *drop;
2846
2847                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2848                                 parser.queue[HASH_RXQ_ETH].offset);
2849                 *drop = (struct ibv_flow_spec_action_drop){
2850                         .type = IBV_FLOW_SPEC_ACTION_DROP,
2851                         .size = sizeof(struct ibv_flow_spec_action_drop),
2852                 };
2853                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2854         }
2855         TAILQ_FOREACH(flow, &priv->flows, next) {
2856                 struct ibv_flow_attr *attr;
2857                 struct ibv_spec_header *attr_h;
2858                 void *spec;
2859                 struct ibv_flow_attr *flow_attr;
2860                 struct ibv_spec_header *flow_h;
2861                 void *flow_spec;
2862                 unsigned int specs_n;
2863
2864                 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2865                 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2866                 /* Compare first the attributes. */
2867                 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2868                         continue;
2869                 if (attr->num_of_specs == 0)
2870                         continue;
2871                 spec = (void *)((uintptr_t)attr +
2872                                 sizeof(struct ibv_flow_attr));
2873                 flow_spec = (void *)((uintptr_t)flow_attr +
2874                                      sizeof(struct ibv_flow_attr));
2875                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2876                 for (i = 0; i != specs_n; ++i) {
2877                         attr_h = spec;
2878                         flow_h = flow_spec;
2879                         if (memcmp(spec, flow_spec,
2880                                    RTE_MIN(attr_h->size, flow_h->size)))
2881                                 goto wrong_flow;
2882                         spec = (void *)((uintptr_t)spec + attr_h->size);
2883                         flow_spec = (void *)((uintptr_t)flow_spec +
2884                                              flow_h->size);
2885                 }
2886                 /* At this point, the flow match. */
2887                 break;
2888 wrong_flow:
2889                 /* The flow does not match. */
2890                 continue;
2891         }
2892         if (flow)
2893                 priv_flow_destroy(priv, &priv->flows, flow);
2894 exit:
2895         for (i = 0; i != hash_rxq_init_n; ++i) {
2896                 if (parser.queue[i].ibv_attr)
2897                         rte_free(parser.queue[i].ibv_attr);
2898         }
2899         return -ret;
2900 }
2901
2902 /**
2903  * Update queue for specific filter.
2904  *
2905  * @param priv
2906  *   Private structure.
2907  * @param fdir_filter
2908  *   Filter to be updated.
2909  *
2910  * @return
2911  *   0 on success, errno value on failure.
2912  */
2913 static int
2914 priv_fdir_filter_update(struct priv *priv,
2915                         const struct rte_eth_fdir_filter *fdir_filter)
2916 {
2917         int ret;
2918
2919         ret = priv_fdir_filter_delete(priv, fdir_filter);
2920         if (ret)
2921                 return ret;
2922         ret = priv_fdir_filter_add(priv, fdir_filter);
2923         return ret;
2924 }
2925
2926 /**
2927  * Flush all filters.
2928  *
2929  * @param priv
2930  *   Private structure.
2931  */
2932 static void
2933 priv_fdir_filter_flush(struct priv *priv)
2934 {
2935         priv_flow_flush(priv, &priv->flows);
2936 }
2937
2938 /**
2939  * Get flow director information.
2940  *
2941  * @param priv
2942  *   Private structure.
2943  * @param[out] fdir_info
2944  *   Resulting flow director information.
2945  */
2946 static void
2947 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2948 {
2949         struct rte_eth_fdir_masks *mask =
2950                 &priv->dev->data->dev_conf.fdir_conf.mask;
2951
2952         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2953         fdir_info->guarant_spc = 0;
2954         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2955         fdir_info->max_flexpayload = 0;
2956         fdir_info->flow_types_mask[0] = 0;
2957         fdir_info->flex_payload_unit = 0;
2958         fdir_info->max_flex_payload_segment_num = 0;
2959         fdir_info->flex_payload_limit = 0;
2960         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2961 }
2962
2963 /**
2964  * Deal with flow director operations.
2965  *
2966  * @param priv
2967  *   Pointer to private structure.
2968  * @param filter_op
2969  *   Operation to perform.
2970  * @param arg
2971  *   Pointer to operation-specific structure.
2972  *
2973  * @return
2974  *   0 on success, errno value on failure.
2975  */
2976 static int
2977 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2978 {
2979         enum rte_fdir_mode fdir_mode =
2980                 priv->dev->data->dev_conf.fdir_conf.mode;
2981         int ret = 0;
2982
2983         if (filter_op == RTE_ETH_FILTER_NOP)
2984                 return 0;
2985         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2986             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2987                 ERROR("%p: flow director mode %d not supported",
2988                       (void *)priv, fdir_mode);
2989                 return EINVAL;
2990         }
2991         switch (filter_op) {
2992         case RTE_ETH_FILTER_ADD:
2993                 ret = priv_fdir_filter_add(priv, arg);
2994                 break;
2995         case RTE_ETH_FILTER_UPDATE:
2996                 ret = priv_fdir_filter_update(priv, arg);
2997                 break;
2998         case RTE_ETH_FILTER_DELETE:
2999                 ret = priv_fdir_filter_delete(priv, arg);
3000                 break;
3001         case RTE_ETH_FILTER_FLUSH:
3002                 priv_fdir_filter_flush(priv);
3003                 break;
3004         case RTE_ETH_FILTER_INFO:
3005                 priv_fdir_info_get(priv, arg);
3006                 break;
3007         default:
3008                 DEBUG("%p: unknown operation %u", (void *)priv,
3009                       filter_op);
3010                 ret = EINVAL;
3011                 break;
3012         }
3013         return ret;
3014 }
3015
3016 /**
3017  * Manage filter operations.
3018  *
3019  * @param dev
3020  *   Pointer to Ethernet device structure.
3021  * @param filter_type
3022  *   Filter type.
3023  * @param filter_op
3024  *   Operation to perform.
3025  * @param arg
3026  *   Pointer to operation-specific structure.
3027  *
3028  * @return
3029  *   0 on success, negative errno value on failure.
3030  */
3031 int
3032 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3033                      enum rte_filter_type filter_type,
3034                      enum rte_filter_op filter_op,
3035                      void *arg)
3036 {
3037         int ret = EINVAL;
3038         struct priv *priv = dev->data->dev_private;
3039
3040         switch (filter_type) {
3041         case RTE_ETH_FILTER_GENERIC:
3042                 if (filter_op != RTE_ETH_FILTER_GET)
3043                         return -EINVAL;
3044                 *(const void **)arg = &mlx5_flow_ops;
3045                 return 0;
3046         case RTE_ETH_FILTER_FDIR:
3047                 priv_lock(priv);
3048                 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3049                 priv_unlock(priv);
3050                 break;
3051         default:
3052                 ERROR("%p: filter type (%d) not supported",
3053                       (void *)dev, filter_type);
3054                 break;
3055         }
3056         return -ret;
3057 }