net/mlx5: fix port stop by verify flows are still present
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox.
4  */
5
6 #include <sys/queue.h>
7 #include <string.h>
8
9 /* Verbs header. */
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
11 #ifdef PEDANTIC
12 #pragma GCC diagnostic ignored "-Wpedantic"
13 #endif
14 #include <infiniband/verbs.h>
15 #ifdef PEDANTIC
16 #pragma GCC diagnostic error "-Wpedantic"
17 #endif
18
19 #include <rte_ethdev_driver.h>
20 #include <rte_flow.h>
21 #include <rte_flow_driver.h>
22 #include <rte_malloc.h>
23 #include <rte_ip.h>
24
25 #include "mlx5.h"
26 #include "mlx5_defs.h"
27 #include "mlx5_prm.h"
28 #include "mlx5_glue.h"
29
30 /* Define minimal priority for control plane flows. */
31 #define MLX5_CTRL_FLOW_PRIORITY 4
32
33 /* Internet Protocol versions. */
34 #define MLX5_IPV4 4
35 #define MLX5_IPV6 6
36
37 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
38 struct ibv_flow_spec_counter_action {
39         int dummy;
40 };
41 #endif
42
43 /* Dev ops structure defined in mlx5.c */
44 extern const struct eth_dev_ops mlx5_dev_ops;
45 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
46
47 static int
48 mlx5_flow_create_eth(const struct rte_flow_item *item,
49                      const void *default_mask,
50                      void *data);
51
52 static int
53 mlx5_flow_create_vlan(const struct rte_flow_item *item,
54                       const void *default_mask,
55                       void *data);
56
57 static int
58 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
59                       const void *default_mask,
60                       void *data);
61
62 static int
63 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
64                       const void *default_mask,
65                       void *data);
66
67 static int
68 mlx5_flow_create_udp(const struct rte_flow_item *item,
69                      const void *default_mask,
70                      void *data);
71
72 static int
73 mlx5_flow_create_tcp(const struct rte_flow_item *item,
74                      const void *default_mask,
75                      void *data);
76
77 static int
78 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
79                        const void *default_mask,
80                        void *data);
81
82 struct mlx5_flow_parse;
83
84 static void
85 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
86                       unsigned int size);
87
88 static int
89 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
90
91 static int
92 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
93
94 /* Hash RX queue types. */
95 enum hash_rxq_type {
96         HASH_RXQ_TCPV4,
97         HASH_RXQ_UDPV4,
98         HASH_RXQ_IPV4,
99         HASH_RXQ_TCPV6,
100         HASH_RXQ_UDPV6,
101         HASH_RXQ_IPV6,
102         HASH_RXQ_ETH,
103 };
104
105 /* Initialization data for hash RX queue. */
106 struct hash_rxq_init {
107         uint64_t hash_fields; /* Fields that participate in the hash. */
108         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
109         unsigned int flow_priority; /* Flow priority to use. */
110         unsigned int ip_version; /* Internet protocol. */
111 };
112
113 /* Initialization data for hash RX queues. */
114 const struct hash_rxq_init hash_rxq_init[] = {
115         [HASH_RXQ_TCPV4] = {
116                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
117                                 IBV_RX_HASH_DST_IPV4 |
118                                 IBV_RX_HASH_SRC_PORT_TCP |
119                                 IBV_RX_HASH_DST_PORT_TCP),
120                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
121                 .flow_priority = 0,
122                 .ip_version = MLX5_IPV4,
123         },
124         [HASH_RXQ_UDPV4] = {
125                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
126                                 IBV_RX_HASH_DST_IPV4 |
127                                 IBV_RX_HASH_SRC_PORT_UDP |
128                                 IBV_RX_HASH_DST_PORT_UDP),
129                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
130                 .flow_priority = 0,
131                 .ip_version = MLX5_IPV4,
132         },
133         [HASH_RXQ_IPV4] = {
134                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135                                 IBV_RX_HASH_DST_IPV4),
136                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
137                                 ETH_RSS_FRAG_IPV4),
138                 .flow_priority = 1,
139                 .ip_version = MLX5_IPV4,
140         },
141         [HASH_RXQ_TCPV6] = {
142                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
143                                 IBV_RX_HASH_DST_IPV6 |
144                                 IBV_RX_HASH_SRC_PORT_TCP |
145                                 IBV_RX_HASH_DST_PORT_TCP),
146                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
147                 .flow_priority = 0,
148                 .ip_version = MLX5_IPV6,
149         },
150         [HASH_RXQ_UDPV6] = {
151                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
152                                 IBV_RX_HASH_DST_IPV6 |
153                                 IBV_RX_HASH_SRC_PORT_UDP |
154                                 IBV_RX_HASH_DST_PORT_UDP),
155                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
156                 .flow_priority = 0,
157                 .ip_version = MLX5_IPV6,
158         },
159         [HASH_RXQ_IPV6] = {
160                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161                                 IBV_RX_HASH_DST_IPV6),
162                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
163                                 ETH_RSS_FRAG_IPV6),
164                 .flow_priority = 1,
165                 .ip_version = MLX5_IPV6,
166         },
167         [HASH_RXQ_ETH] = {
168                 .hash_fields = 0,
169                 .dpdk_rss_hf = 0,
170                 .flow_priority = 2,
171         },
172 };
173
174 /* Number of entries in hash_rxq_init[]. */
175 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
176
177 /** Structure for holding counter stats. */
178 struct mlx5_flow_counter_stats {
179         uint64_t hits; /**< Number of packets matched by the rule. */
180         uint64_t bytes; /**< Number of bytes matched by the rule. */
181 };
182
183 /** Structure for Drop queue. */
184 struct mlx5_hrxq_drop {
185         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
186         struct ibv_qp *qp; /**< Verbs queue pair. */
187         struct ibv_wq *wq; /**< Verbs work queue. */
188         struct ibv_cq *cq; /**< Verbs completion queue. */
189 };
190
191 /* Flows structures. */
192 struct mlx5_flow {
193         uint64_t hash_fields; /**< Fields that participate in the hash. */
194         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
195         struct ibv_flow *ibv_flow; /**< Verbs flow. */
196         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
197 };
198
199 /* Drop flows structures. */
200 struct mlx5_flow_drop {
201         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
202         struct ibv_flow *ibv_flow; /**< Verbs flow. */
203 };
204
205 struct rte_flow {
206         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
207         uint32_t mark:1; /**< Set if the flow is marked. */
208         uint32_t drop:1; /**< Drop queue. */
209         uint16_t queues_n; /**< Number of entries in queue[]. */
210         uint16_t (*queues)[]; /**< Queues indexes to use. */
211         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
212         uint8_t rss_key[40]; /**< copy of the RSS key. */
213         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
214         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
215         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
216         /**< Flow with Rx queue. */
217 };
218
219 /** Static initializer for items. */
220 #define ITEMS(...) \
221         (const enum rte_flow_item_type []){ \
222                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
223         }
224
225 /** Structure to generate a simple graph of layers supported by the NIC. */
226 struct mlx5_flow_items {
227         /** List of possible actions for these items. */
228         const enum rte_flow_action_type *const actions;
229         /** Bit-masks corresponding to the possibilities for the item. */
230         const void *mask;
231         /**
232          * Default bit-masks to use when item->mask is not provided. When
233          * \default_mask is also NULL, the full supported bit-mask (\mask) is
234          * used instead.
235          */
236         const void *default_mask;
237         /** Bit-masks size in bytes. */
238         const unsigned int mask_sz;
239         /**
240          * Conversion function from rte_flow to NIC specific flow.
241          *
242          * @param item
243          *   rte_flow item to convert.
244          * @param default_mask
245          *   Default bit-masks to use when item->mask is not provided.
246          * @param data
247          *   Internal structure to store the conversion.
248          *
249          * @return
250          *   0 on success, negative value otherwise.
251          */
252         int (*convert)(const struct rte_flow_item *item,
253                        const void *default_mask,
254                        void *data);
255         /** Size in bytes of the destination structure. */
256         const unsigned int dst_sz;
257         /** List of possible following items.  */
258         const enum rte_flow_item_type *const items;
259 };
260
261 /** Valid action for this PMD. */
262 static const enum rte_flow_action_type valid_actions[] = {
263         RTE_FLOW_ACTION_TYPE_DROP,
264         RTE_FLOW_ACTION_TYPE_QUEUE,
265         RTE_FLOW_ACTION_TYPE_MARK,
266         RTE_FLOW_ACTION_TYPE_FLAG,
267 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
268         RTE_FLOW_ACTION_TYPE_COUNT,
269 #endif
270         RTE_FLOW_ACTION_TYPE_END,
271 };
272
273 /** Graph of supported items and associated actions. */
274 static const struct mlx5_flow_items mlx5_flow_items[] = {
275         [RTE_FLOW_ITEM_TYPE_END] = {
276                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
277                                RTE_FLOW_ITEM_TYPE_VXLAN),
278         },
279         [RTE_FLOW_ITEM_TYPE_ETH] = {
280                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
281                                RTE_FLOW_ITEM_TYPE_IPV4,
282                                RTE_FLOW_ITEM_TYPE_IPV6),
283                 .actions = valid_actions,
284                 .mask = &(const struct rte_flow_item_eth){
285                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
286                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
287                         .type = -1,
288                 },
289                 .default_mask = &rte_flow_item_eth_mask,
290                 .mask_sz = sizeof(struct rte_flow_item_eth),
291                 .convert = mlx5_flow_create_eth,
292                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
293         },
294         [RTE_FLOW_ITEM_TYPE_VLAN] = {
295                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
296                                RTE_FLOW_ITEM_TYPE_IPV6),
297                 .actions = valid_actions,
298                 .mask = &(const struct rte_flow_item_vlan){
299                         .tci = -1,
300                 },
301                 .default_mask = &rte_flow_item_vlan_mask,
302                 .mask_sz = sizeof(struct rte_flow_item_vlan),
303                 .convert = mlx5_flow_create_vlan,
304                 .dst_sz = 0,
305         },
306         [RTE_FLOW_ITEM_TYPE_IPV4] = {
307                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
308                                RTE_FLOW_ITEM_TYPE_TCP),
309                 .actions = valid_actions,
310                 .mask = &(const struct rte_flow_item_ipv4){
311                         .hdr = {
312                                 .src_addr = -1,
313                                 .dst_addr = -1,
314                                 .type_of_service = -1,
315                                 .next_proto_id = -1,
316                                 .time_to_live = -1,
317                         },
318                 },
319                 .default_mask = &rte_flow_item_ipv4_mask,
320                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
321                 .convert = mlx5_flow_create_ipv4,
322                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
323         },
324         [RTE_FLOW_ITEM_TYPE_IPV6] = {
325                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
326                                RTE_FLOW_ITEM_TYPE_TCP),
327                 .actions = valid_actions,
328                 .mask = &(const struct rte_flow_item_ipv6){
329                         .hdr = {
330                                 .src_addr = {
331                                         0xff, 0xff, 0xff, 0xff,
332                                         0xff, 0xff, 0xff, 0xff,
333                                         0xff, 0xff, 0xff, 0xff,
334                                         0xff, 0xff, 0xff, 0xff,
335                                 },
336                                 .dst_addr = {
337                                         0xff, 0xff, 0xff, 0xff,
338                                         0xff, 0xff, 0xff, 0xff,
339                                         0xff, 0xff, 0xff, 0xff,
340                                         0xff, 0xff, 0xff, 0xff,
341                                 },
342                                 .vtc_flow = -1,
343                                 .proto = -1,
344                                 .hop_limits = -1,
345                         },
346                 },
347                 .default_mask = &rte_flow_item_ipv6_mask,
348                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
349                 .convert = mlx5_flow_create_ipv6,
350                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
351         },
352         [RTE_FLOW_ITEM_TYPE_UDP] = {
353                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
354                 .actions = valid_actions,
355                 .mask = &(const struct rte_flow_item_udp){
356                         .hdr = {
357                                 .src_port = -1,
358                                 .dst_port = -1,
359                         },
360                 },
361                 .default_mask = &rte_flow_item_udp_mask,
362                 .mask_sz = sizeof(struct rte_flow_item_udp),
363                 .convert = mlx5_flow_create_udp,
364                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
365         },
366         [RTE_FLOW_ITEM_TYPE_TCP] = {
367                 .actions = valid_actions,
368                 .mask = &(const struct rte_flow_item_tcp){
369                         .hdr = {
370                                 .src_port = -1,
371                                 .dst_port = -1,
372                         },
373                 },
374                 .default_mask = &rte_flow_item_tcp_mask,
375                 .mask_sz = sizeof(struct rte_flow_item_tcp),
376                 .convert = mlx5_flow_create_tcp,
377                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
378         },
379         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
380                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
381                 .actions = valid_actions,
382                 .mask = &(const struct rte_flow_item_vxlan){
383                         .vni = "\xff\xff\xff",
384                 },
385                 .default_mask = &rte_flow_item_vxlan_mask,
386                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
387                 .convert = mlx5_flow_create_vxlan,
388                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
389         },
390 };
391
392 /** Structure to pass to the conversion function. */
393 struct mlx5_flow_parse {
394         uint32_t inner; /**< Set once VXLAN is encountered. */
395         uint32_t allmulti:1; /**< Set once allmulti dst MAC is encountered. */
396         uint32_t create:1;
397         /**< Whether resources should remain after a validate. */
398         uint32_t drop:1; /**< Target is a drop queue. */
399         uint32_t mark:1; /**< Mark is present in the flow. */
400         uint32_t count:1; /**< Count is present in the flow. */
401         uint32_t mark_id; /**< Mark identifier. */
402         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
403         uint16_t queues_n; /**< Number of entries in queue[]. */
404         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
405         uint8_t rss_key[40]; /**< copy of the RSS key. */
406         enum hash_rxq_type layer; /**< Last pattern layer detected. */
407         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
408         struct {
409                 struct ibv_flow_attr *ibv_attr;
410                 /**< Pointer to Verbs attributes. */
411                 unsigned int offset;
412                 /**< Current position or total size of the attribute. */
413         } queue[RTE_DIM(hash_rxq_init)];
414 };
415
416 static const struct rte_flow_ops mlx5_flow_ops = {
417         .validate = mlx5_flow_validate,
418         .create = mlx5_flow_create,
419         .destroy = mlx5_flow_destroy,
420         .flush = mlx5_flow_flush,
421 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
422         .query = mlx5_flow_query,
423 #else
424         .query = NULL,
425 #endif
426         .isolate = mlx5_flow_isolate,
427 };
428
429 /* Convert FDIR request to Generic flow. */
430 struct mlx5_fdir {
431         struct rte_flow_attr attr;
432         struct rte_flow_action actions[2];
433         struct rte_flow_item items[4];
434         struct rte_flow_item_eth l2;
435         struct rte_flow_item_eth l2_mask;
436         union {
437                 struct rte_flow_item_ipv4 ipv4;
438                 struct rte_flow_item_ipv6 ipv6;
439         } l3;
440         union {
441                 struct rte_flow_item_udp udp;
442                 struct rte_flow_item_tcp tcp;
443         } l4;
444         struct rte_flow_action_queue queue;
445 };
446
447 /* Verbs specification header. */
448 struct ibv_spec_header {
449         enum ibv_flow_spec_type type;
450         uint16_t size;
451 };
452
453 /**
454  * Check support for a given item.
455  *
456  * @param item[in]
457  *   Item specification.
458  * @param mask[in]
459  *   Bit-masks covering supported fields to compare with spec, last and mask in
460  *   \item.
461  * @param size
462  *   Bit-Mask size in bytes.
463  *
464  * @return
465  *   0 on success.
466  */
467 static int
468 mlx5_flow_item_validate(const struct rte_flow_item *item,
469                         const uint8_t *mask, unsigned int size)
470 {
471         int ret = 0;
472
473         if (!item->spec && (item->mask || item->last))
474                 return -1;
475         if (item->spec && !item->mask) {
476                 unsigned int i;
477                 const uint8_t *spec = item->spec;
478
479                 for (i = 0; i < size; ++i)
480                         if ((spec[i] | mask[i]) != mask[i])
481                                 return -1;
482         }
483         if (item->last && !item->mask) {
484                 unsigned int i;
485                 const uint8_t *spec = item->last;
486
487                 for (i = 0; i < size; ++i)
488                         if ((spec[i] | mask[i]) != mask[i])
489                                 return -1;
490         }
491         if (item->mask) {
492                 unsigned int i;
493                 const uint8_t *spec = item->spec;
494
495                 for (i = 0; i < size; ++i)
496                         if ((spec[i] | mask[i]) != mask[i])
497                                 return -1;
498         }
499         if (item->spec && item->last) {
500                 uint8_t spec[size];
501                 uint8_t last[size];
502                 const uint8_t *apply = mask;
503                 unsigned int i;
504
505                 if (item->mask)
506                         apply = item->mask;
507                 for (i = 0; i < size; ++i) {
508                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
509                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
510                 }
511                 ret = memcmp(spec, last, size);
512         }
513         return ret;
514 }
515
516 /**
517  * Copy the RSS configuration from the user ones.
518  *
519  * @param priv
520  *   Pointer to private structure.
521  * @param parser
522  *   Internal parser structure.
523  * @param rss_conf
524  *   User RSS configuration to save.
525  *
526  * @return
527  *   0 on success, errno value on failure.
528  */
529 static int
530 priv_flow_convert_rss_conf(struct priv *priv,
531                            struct mlx5_flow_parse *parser,
532                            const struct rte_eth_rss_conf *rss_conf)
533 {
534         const struct rte_eth_rss_conf *rss;
535
536         if (rss_conf) {
537                 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
538                         return EINVAL;
539                 rss = rss_conf;
540         } else {
541                 rss = &priv->rss_conf;
542         }
543         if (rss->rss_key_len > 40)
544                 return EINVAL;
545         parser->rss_conf.rss_key_len = rss->rss_key_len;
546         parser->rss_conf.rss_hf = rss->rss_hf;
547         memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
548         parser->rss_conf.rss_key = parser->rss_key;
549         return 0;
550 }
551
552 /**
553  * Extract attribute to the parser.
554  *
555  * @param priv
556  *   Pointer to private structure.
557  * @param[in] attr
558  *   Flow rule attributes.
559  * @param[out] error
560  *   Perform verbose error reporting if not NULL.
561  * @param[in, out] parser
562  *   Internal parser structure.
563  *
564  * @return
565  *   0 on success, a negative errno value otherwise and rte_errno is set.
566  */
567 static int
568 priv_flow_convert_attributes(struct priv *priv,
569                              const struct rte_flow_attr *attr,
570                              struct rte_flow_error *error,
571                              struct mlx5_flow_parse *parser)
572 {
573         (void)priv;
574         (void)parser;
575         if (attr->group) {
576                 rte_flow_error_set(error, ENOTSUP,
577                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
578                                    NULL,
579                                    "groups are not supported");
580                 return -rte_errno;
581         }
582         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
583                 rte_flow_error_set(error, ENOTSUP,
584                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
585                                    NULL,
586                                    "priorities are not supported");
587                 return -rte_errno;
588         }
589         if (attr->egress) {
590                 rte_flow_error_set(error, ENOTSUP,
591                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
592                                    NULL,
593                                    "egress is not supported");
594                 return -rte_errno;
595         }
596         if (!attr->ingress) {
597                 rte_flow_error_set(error, ENOTSUP,
598                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
599                                    NULL,
600                                    "only ingress is supported");
601                 return -rte_errno;
602         }
603         return 0;
604 }
605
606 /**
607  * Extract actions request to the parser.
608  *
609  * @param priv
610  *   Pointer to private structure.
611  * @param[in] actions
612  *   Associated actions (list terminated by the END action).
613  * @param[out] error
614  *   Perform verbose error reporting if not NULL.
615  * @param[in, out] parser
616  *   Internal parser structure.
617  *
618  * @return
619  *   0 on success, a negative errno value otherwise and rte_errno is set.
620  */
621 static int
622 priv_flow_convert_actions(struct priv *priv,
623                           const struct rte_flow_action actions[],
624                           struct rte_flow_error *error,
625                           struct mlx5_flow_parse *parser)
626 {
627         /*
628          * Add default RSS configuration necessary for Verbs to create QP even
629          * if no RSS is necessary.
630          */
631         priv_flow_convert_rss_conf(priv, parser,
632                                    (const struct rte_eth_rss_conf *)
633                                    &priv->rss_conf);
634         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
635                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
636                         continue;
637                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
638                         parser->drop = 1;
639                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
640                         const struct rte_flow_action_queue *queue =
641                                 (const struct rte_flow_action_queue *)
642                                 actions->conf;
643                         uint16_t n;
644                         uint16_t found = 0;
645
646                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
647                                 goto exit_action_not_supported;
648                         for (n = 0; n < parser->queues_n; ++n) {
649                                 if (parser->queues[n] == queue->index) {
650                                         found = 1;
651                                         break;
652                                 }
653                         }
654                         if (parser->queues_n > 1 && !found) {
655                                 rte_flow_error_set(error, ENOTSUP,
656                                            RTE_FLOW_ERROR_TYPE_ACTION,
657                                            actions,
658                                            "queue action not in RSS queues");
659                                 return -rte_errno;
660                         }
661                         if (!found) {
662                                 parser->queues_n = 1;
663                                 parser->queues[0] = queue->index;
664                         }
665                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
666                         const struct rte_flow_action_rss *rss =
667                                 (const struct rte_flow_action_rss *)
668                                 actions->conf;
669                         uint16_t n;
670
671                         if (!rss || !rss->num) {
672                                 rte_flow_error_set(error, EINVAL,
673                                                    RTE_FLOW_ERROR_TYPE_ACTION,
674                                                    actions,
675                                                    "no valid queues");
676                                 return -rte_errno;
677                         }
678                         if (parser->queues_n == 1) {
679                                 uint16_t found = 0;
680
681                                 assert(parser->queues_n);
682                                 for (n = 0; n < rss->num; ++n) {
683                                         if (parser->queues[0] ==
684                                             rss->queue[n]) {
685                                                 found = 1;
686                                                 break;
687                                         }
688                                 }
689                                 if (!found) {
690                                         rte_flow_error_set(error, ENOTSUP,
691                                                    RTE_FLOW_ERROR_TYPE_ACTION,
692                                                    actions,
693                                                    "queue action not in RSS"
694                                                    " queues");
695                                         return -rte_errno;
696                                 }
697                         }
698                         for (n = 0; n < rss->num; ++n) {
699                                 if (rss->queue[n] >= priv->rxqs_n) {
700                                         rte_flow_error_set(error, EINVAL,
701                                                    RTE_FLOW_ERROR_TYPE_ACTION,
702                                                    actions,
703                                                    "queue id > number of"
704                                                    " queues");
705                                         return -rte_errno;
706                                 }
707                         }
708                         for (n = 0; n < rss->num; ++n)
709                                 parser->queues[n] = rss->queue[n];
710                         parser->queues_n = rss->num;
711                         if (priv_flow_convert_rss_conf(priv, parser,
712                                                        rss->rss_conf)) {
713                                 rte_flow_error_set(error, EINVAL,
714                                                    RTE_FLOW_ERROR_TYPE_ACTION,
715                                                    actions,
716                                                    "wrong RSS configuration");
717                                 return -rte_errno;
718                         }
719                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
720                         const struct rte_flow_action_mark *mark =
721                                 (const struct rte_flow_action_mark *)
722                                 actions->conf;
723
724                         if (!mark) {
725                                 rte_flow_error_set(error, EINVAL,
726                                                    RTE_FLOW_ERROR_TYPE_ACTION,
727                                                    actions,
728                                                    "mark must be defined");
729                                 return -rte_errno;
730                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
731                                 rte_flow_error_set(error, ENOTSUP,
732                                                    RTE_FLOW_ERROR_TYPE_ACTION,
733                                                    actions,
734                                                    "mark must be between 0"
735                                                    " and 16777199");
736                                 return -rte_errno;
737                         }
738                         parser->mark = 1;
739                         parser->mark_id = mark->id;
740                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
741                         parser->mark = 1;
742                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
743                            priv->config.flow_counter_en) {
744                         parser->count = 1;
745                 } else {
746                         goto exit_action_not_supported;
747                 }
748         }
749         if (parser->drop && parser->mark)
750                 parser->mark = 0;
751         if (!parser->queues_n && !parser->drop) {
752                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
753                                    NULL, "no valid action");
754                 return -rte_errno;
755         }
756         return 0;
757 exit_action_not_supported:
758         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
759                            actions, "action not supported");
760         return -rte_errno;
761 }
762
763 /**
764  * Validate items.
765  *
766  * @param priv
767  *   Pointer to private structure.
768  * @param[in] items
769  *   Pattern specification (list terminated by the END pattern item).
770  * @param[out] error
771  *   Perform verbose error reporting if not NULL.
772  * @param[in, out] parser
773  *   Internal parser structure.
774  *
775  * @return
776  *   0 on success, a negative errno value otherwise and rte_errno is set.
777  */
778 static int
779 priv_flow_convert_items_validate(struct priv *priv,
780                                  const struct rte_flow_item items[],
781                                  struct rte_flow_error *error,
782                                  struct mlx5_flow_parse *parser)
783 {
784         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
785         unsigned int i;
786
787         (void)priv;
788         /* Initialise the offsets to start after verbs attribute. */
789         for (i = 0; i != hash_rxq_init_n; ++i)
790                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
791         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
792                 const struct mlx5_flow_items *token = NULL;
793                 unsigned int n;
794                 int err;
795
796                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
797                         continue;
798                 for (i = 0;
799                      cur_item->items &&
800                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
801                      ++i) {
802                         if (cur_item->items[i] == items->type) {
803                                 token = &mlx5_flow_items[items->type];
804                                 break;
805                         }
806                 }
807                 if (!token)
808                         goto exit_item_not_supported;
809                 cur_item = token;
810                 err = mlx5_flow_item_validate(items,
811                                               (const uint8_t *)cur_item->mask,
812                                               cur_item->mask_sz);
813                 if (err)
814                         goto exit_item_not_supported;
815                 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
816                         if (parser->inner) {
817                                 rte_flow_error_set(error, ENOTSUP,
818                                                    RTE_FLOW_ERROR_TYPE_ITEM,
819                                                    items,
820                                                    "cannot recognize multiple"
821                                                    " VXLAN encapsulations");
822                                 return -rte_errno;
823                         }
824                         parser->inner = IBV_FLOW_SPEC_INNER;
825                 }
826                 if (parser->drop || parser->queues_n == 1) {
827                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
828                 } else {
829                         for (n = 0; n != hash_rxq_init_n; ++n)
830                                 parser->queue[n].offset += cur_item->dst_sz;
831                 }
832         }
833         if (parser->drop) {
834                 parser->queue[HASH_RXQ_ETH].offset +=
835                         sizeof(struct ibv_flow_spec_action_drop);
836         }
837         if (parser->mark) {
838                 for (i = 0; i != hash_rxq_init_n; ++i)
839                         parser->queue[i].offset +=
840                                 sizeof(struct ibv_flow_spec_action_tag);
841         }
842         if (parser->count) {
843                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
844
845                 for (i = 0; i != hash_rxq_init_n; ++i)
846                         parser->queue[i].offset += size;
847         }
848         return 0;
849 exit_item_not_supported:
850         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
851                            items, "item not supported");
852         return -rte_errno;
853 }
854
855 /**
856  * Allocate memory space to store verbs flow attributes.
857  *
858  * @param priv
859  *   Pointer to private structure.
860  * @param[in] priority
861  *   Flow priority.
862  * @param[in] size
863  *   Amount of byte to allocate.
864  * @param[out] error
865  *   Perform verbose error reporting if not NULL.
866  *
867  * @return
868  *   A verbs flow attribute on success, NULL otherwise.
869  */
870 static struct ibv_flow_attr*
871 priv_flow_convert_allocate(struct priv *priv,
872                            unsigned int priority,
873                            unsigned int size,
874                            struct rte_flow_error *error)
875 {
876         struct ibv_flow_attr *ibv_attr;
877
878         (void)priv;
879         ibv_attr = rte_calloc(__func__, 1, size, 0);
880         if (!ibv_attr) {
881                 rte_flow_error_set(error, ENOMEM,
882                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
883                                    NULL,
884                                    "cannot allocate verbs spec attributes.");
885                 return NULL;
886         }
887         ibv_attr->priority = priority;
888         return ibv_attr;
889 }
890
891 /**
892  * Finalise verbs flow attributes.
893  *
894  * @param priv
895  *   Pointer to private structure.
896  * @param[in, out] parser
897  *   Internal parser structure.
898  */
899 static void
900 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
901 {
902         const unsigned int ipv4 =
903                 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
904         const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
905         const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
906         const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
907         const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
908         const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
909         unsigned int i;
910
911         (void)priv;
912         if (parser->layer == HASH_RXQ_ETH) {
913                 goto fill;
914         } else {
915                 /*
916                  * This layer becomes useless as the pattern define under
917                  * layers.
918                  */
919                 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
920                 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
921         }
922         /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
923         for (i = ohmin; i != (ohmax + 1); ++i) {
924                 if (!parser->queue[i].ibv_attr)
925                         continue;
926                 rte_free(parser->queue[i].ibv_attr);
927                 parser->queue[i].ibv_attr = NULL;
928         }
929         /* Remove impossible flow according to the RSS configuration. */
930         if (hash_rxq_init[parser->layer].dpdk_rss_hf &
931             parser->rss_conf.rss_hf) {
932                 /* Remove any other flow. */
933                 for (i = hmin; i != (hmax + 1); ++i) {
934                         if ((i == parser->layer) ||
935                              (!parser->queue[i].ibv_attr))
936                                 continue;
937                         rte_free(parser->queue[i].ibv_attr);
938                         parser->queue[i].ibv_attr = NULL;
939                 }
940         } else  if (!parser->queue[ip].ibv_attr) {
941                 /* no RSS possible with the current configuration. */
942                 parser->queues_n = 1;
943                 return;
944         }
945 fill:
946         /*
947          * Fill missing layers in verbs specifications, or compute the correct
948          * offset to allocate the memory space for the attributes and
949          * specifications.
950          */
951         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
952                 union {
953                         struct ibv_flow_spec_ipv4_ext ipv4;
954                         struct ibv_flow_spec_ipv6 ipv6;
955                         struct ibv_flow_spec_tcp_udp udp_tcp;
956                 } specs;
957                 void *dst;
958                 uint16_t size;
959
960                 if (i == parser->layer)
961                         continue;
962                 if (parser->layer == HASH_RXQ_ETH) {
963                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
964                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
965                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
966                                         .type = IBV_FLOW_SPEC_IPV4_EXT,
967                                         .size = size,
968                                 };
969                         } else {
970                                 size = sizeof(struct ibv_flow_spec_ipv6);
971                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
972                                         .type = IBV_FLOW_SPEC_IPV6,
973                                         .size = size,
974                                 };
975                         }
976                         if (parser->queue[i].ibv_attr) {
977                                 dst = (void *)((uintptr_t)
978                                                parser->queue[i].ibv_attr +
979                                                parser->queue[i].offset);
980                                 memcpy(dst, &specs, size);
981                                 ++parser->queue[i].ibv_attr->num_of_specs;
982                         }
983                         parser->queue[i].offset += size;
984                 }
985                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
986                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
987                         size = sizeof(struct ibv_flow_spec_tcp_udp);
988                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
989                                 .type = ((i == HASH_RXQ_UDPV4 ||
990                                           i == HASH_RXQ_UDPV6) ?
991                                          IBV_FLOW_SPEC_UDP :
992                                          IBV_FLOW_SPEC_TCP),
993                                 .size = size,
994                         };
995                         if (parser->queue[i].ibv_attr) {
996                                 dst = (void *)((uintptr_t)
997                                                parser->queue[i].ibv_attr +
998                                                parser->queue[i].offset);
999                                 memcpy(dst, &specs, size);
1000                                 ++parser->queue[i].ibv_attr->num_of_specs;
1001                         }
1002                         parser->queue[i].offset += size;
1003                 }
1004         }
1005 }
1006
1007 /**
1008  * Validate and convert a flow supported by the NIC.
1009  *
1010  * @param priv
1011  *   Pointer to private structure.
1012  * @param[in] attr
1013  *   Flow rule attributes.
1014  * @param[in] pattern
1015  *   Pattern specification (list terminated by the END pattern item).
1016  * @param[in] actions
1017  *   Associated actions (list terminated by the END action).
1018  * @param[out] error
1019  *   Perform verbose error reporting if not NULL.
1020  * @param[in, out] parser
1021  *   Internal parser structure.
1022  *
1023  * @return
1024  *   0 on success, a negative errno value otherwise and rte_errno is set.
1025  */
1026 static int
1027 priv_flow_convert(struct priv *priv,
1028                   const struct rte_flow_attr *attr,
1029                   const struct rte_flow_item items[],
1030                   const struct rte_flow_action actions[],
1031                   struct rte_flow_error *error,
1032                   struct mlx5_flow_parse *parser)
1033 {
1034         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1035         unsigned int i;
1036         int ret;
1037
1038         /* First step. Validate the attributes, items and actions. */
1039         *parser = (struct mlx5_flow_parse){
1040                 .create = parser->create,
1041                 .layer = HASH_RXQ_ETH,
1042                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1043         };
1044         ret = priv_flow_convert_attributes(priv, attr, error, parser);
1045         if (ret)
1046                 return ret;
1047         ret = priv_flow_convert_actions(priv, actions, error, parser);
1048         if (ret)
1049                 return ret;
1050         ret = priv_flow_convert_items_validate(priv, items, error, parser);
1051         if (ret)
1052                 return ret;
1053         priv_flow_convert_finalise(priv, parser);
1054         /*
1055          * Second step.
1056          * Allocate the memory space to store verbs specifications.
1057          */
1058         if (parser->drop || parser->queues_n == 1) {
1059                 unsigned int priority =
1060                         attr->priority +
1061                         hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1062                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1063
1064                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1065                         priv_flow_convert_allocate(priv, priority,
1066                                                    offset, error);
1067                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1068                         return ENOMEM;
1069                 parser->queue[HASH_RXQ_ETH].offset =
1070                         sizeof(struct ibv_flow_attr);
1071         } else {
1072                 for (i = 0; i != hash_rxq_init_n; ++i) {
1073                         unsigned int priority =
1074                                 attr->priority +
1075                                 hash_rxq_init[i].flow_priority;
1076                         unsigned int offset;
1077
1078                         if (!(parser->rss_conf.rss_hf &
1079                               hash_rxq_init[i].dpdk_rss_hf) &&
1080                             (i != HASH_RXQ_ETH))
1081                                 continue;
1082                         offset = parser->queue[i].offset;
1083                         parser->queue[i].ibv_attr =
1084                                 priv_flow_convert_allocate(priv, priority,
1085                                                            offset, error);
1086                         if (!parser->queue[i].ibv_attr)
1087                                 goto exit_enomem;
1088                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1089                 }
1090         }
1091         /* Third step. Conversion parse, fill the specifications. */
1092         parser->inner = 0;
1093         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1094                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1095                         continue;
1096                 cur_item = &mlx5_flow_items[items->type];
1097                 ret = cur_item->convert(items,
1098                                         (cur_item->default_mask ?
1099                                          cur_item->default_mask :
1100                                          cur_item->mask),
1101                                         parser);
1102                 if (ret) {
1103                         rte_flow_error_set(error, ret,
1104                                            RTE_FLOW_ERROR_TYPE_ITEM,
1105                                            items, "item not supported");
1106                         goto exit_free;
1107                 }
1108         }
1109         if (parser->mark)
1110                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1111         if (parser->count && parser->create) {
1112                 mlx5_flow_create_count(priv, parser);
1113                 if (!parser->cs)
1114                         goto exit_count_error;
1115         }
1116         /*
1117          * Last step. Complete missing specification to reach the RSS
1118          * configuration.
1119          */
1120         if (parser->queues_n > 1) {
1121                 priv_flow_convert_finalise(priv, parser);
1122         } else {
1123                 /*
1124                  * Action queue have their priority overridden with
1125                  * Ethernet priority, this priority needs to be adjusted to
1126                  * their most specific layer priority.
1127                  */
1128                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1129                         attr->priority +
1130                         hash_rxq_init[parser->layer].flow_priority;
1131         }
1132         if (parser->allmulti &&
1133             parser->layer == HASH_RXQ_ETH) {
1134                 for (i = 0; i != hash_rxq_init_n; ++i) {
1135                         if (!parser->queue[i].ibv_attr)
1136                                 continue;
1137                         if (parser->queue[i].ibv_attr->num_of_specs != 1)
1138                                 break;
1139                         parser->queue[i].ibv_attr->type =
1140                                                 IBV_FLOW_ATTR_MC_DEFAULT;
1141                 }
1142         }
1143 exit_free:
1144         /* Only verification is expected, all resources should be released. */
1145         if (!parser->create) {
1146                 for (i = 0; i != hash_rxq_init_n; ++i) {
1147                         if (parser->queue[i].ibv_attr) {
1148                                 rte_free(parser->queue[i].ibv_attr);
1149                                 parser->queue[i].ibv_attr = NULL;
1150                         }
1151                 }
1152         }
1153         return ret;
1154 exit_enomem:
1155         for (i = 0; i != hash_rxq_init_n; ++i) {
1156                 if (parser->queue[i].ibv_attr) {
1157                         rte_free(parser->queue[i].ibv_attr);
1158                         parser->queue[i].ibv_attr = NULL;
1159                 }
1160         }
1161         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1162                            NULL, "cannot allocate verbs spec attributes.");
1163         return ret;
1164 exit_count_error:
1165         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1166                            NULL, "cannot create counter.");
1167         return rte_errno;
1168 }
1169
1170 /**
1171  * Copy the specification created into the flow.
1172  *
1173  * @param parser
1174  *   Internal parser structure.
1175  * @param src
1176  *   Create specification.
1177  * @param size
1178  *   Size in bytes of the specification to copy.
1179  */
1180 static void
1181 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1182                       unsigned int size)
1183 {
1184         unsigned int i;
1185         void *dst;
1186
1187         for (i = 0; i != hash_rxq_init_n; ++i) {
1188                 if (!parser->queue[i].ibv_attr)
1189                         continue;
1190                 /* Specification must be the same l3 type or none. */
1191                 if (parser->layer == HASH_RXQ_ETH ||
1192                     (hash_rxq_init[parser->layer].ip_version ==
1193                      hash_rxq_init[i].ip_version) ||
1194                     (hash_rxq_init[i].ip_version == 0)) {
1195                         dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1196                                         parser->queue[i].offset);
1197                         memcpy(dst, src, size);
1198                         ++parser->queue[i].ibv_attr->num_of_specs;
1199                         parser->queue[i].offset += size;
1200                 }
1201         }
1202 }
1203
1204 /**
1205  * Convert Ethernet item to Verbs specification.
1206  *
1207  * @param item[in]
1208  *   Item specification.
1209  * @param default_mask[in]
1210  *   Default bit-masks to use when item->mask is not provided.
1211  * @param data[in, out]
1212  *   User structure.
1213  */
1214 static int
1215 mlx5_flow_create_eth(const struct rte_flow_item *item,
1216                      const void *default_mask,
1217                      void *data)
1218 {
1219         const struct rte_flow_item_eth *spec = item->spec;
1220         const struct rte_flow_item_eth *mask = item->mask;
1221         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1222         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1223         struct ibv_flow_spec_eth eth = {
1224                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1225                 .size = eth_size,
1226         };
1227
1228         /* Don't update layer for the inner pattern. */
1229         if (!parser->inner)
1230                 parser->layer = HASH_RXQ_ETH;
1231         if (spec) {
1232                 unsigned int i;
1233
1234                 if (!mask)
1235                         mask = default_mask;
1236                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1237                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1238                 eth.val.ether_type = spec->type;
1239                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1240                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1241                 eth.mask.ether_type = mask->type;
1242                 /* Remove unwanted bits from values. */
1243                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1244                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1245                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1246                 }
1247                 eth.val.ether_type &= eth.mask.ether_type;
1248         }
1249         mlx5_flow_create_copy(parser, &eth, eth_size);
1250         parser->allmulti = eth.val.dst_mac[0] & 1;
1251         return 0;
1252 }
1253
1254 /**
1255  * Convert VLAN item to Verbs specification.
1256  *
1257  * @param item[in]
1258  *   Item specification.
1259  * @param default_mask[in]
1260  *   Default bit-masks to use when item->mask is not provided.
1261  * @param data[in, out]
1262  *   User structure.
1263  */
1264 static int
1265 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1266                       const void *default_mask,
1267                       void *data)
1268 {
1269         const struct rte_flow_item_vlan *spec = item->spec;
1270         const struct rte_flow_item_vlan *mask = item->mask;
1271         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1272         struct ibv_flow_spec_eth *eth;
1273         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1274
1275         if (spec) {
1276                 unsigned int i;
1277                 if (!mask)
1278                         mask = default_mask;
1279
1280                 for (i = 0; i != hash_rxq_init_n; ++i) {
1281                         if (!parser->queue[i].ibv_attr)
1282                                 continue;
1283
1284                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1285                                        parser->queue[i].offset - eth_size);
1286                         eth->val.vlan_tag = spec->tci;
1287                         eth->mask.vlan_tag = mask->tci;
1288                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1289                 }
1290         }
1291         return 0;
1292 }
1293
1294 /**
1295  * Convert IPv4 item to Verbs specification.
1296  *
1297  * @param item[in]
1298  *   Item specification.
1299  * @param default_mask[in]
1300  *   Default bit-masks to use when item->mask is not provided.
1301  * @param data[in, out]
1302  *   User structure.
1303  */
1304 static int
1305 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1306                       const void *default_mask,
1307                       void *data)
1308 {
1309         const struct rte_flow_item_ipv4 *spec = item->spec;
1310         const struct rte_flow_item_ipv4 *mask = item->mask;
1311         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1312         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1313         struct ibv_flow_spec_ipv4_ext ipv4 = {
1314                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1315                 .size = ipv4_size,
1316         };
1317
1318         /* Don't update layer for the inner pattern. */
1319         if (!parser->inner)
1320                 parser->layer = HASH_RXQ_IPV4;
1321         if (spec) {
1322                 if (!mask)
1323                         mask = default_mask;
1324                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1325                         .src_ip = spec->hdr.src_addr,
1326                         .dst_ip = spec->hdr.dst_addr,
1327                         .proto = spec->hdr.next_proto_id,
1328                         .tos = spec->hdr.type_of_service,
1329                 };
1330                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1331                         .src_ip = mask->hdr.src_addr,
1332                         .dst_ip = mask->hdr.dst_addr,
1333                         .proto = mask->hdr.next_proto_id,
1334                         .tos = mask->hdr.type_of_service,
1335                 };
1336                 /* Remove unwanted bits from values. */
1337                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1338                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1339                 ipv4.val.proto &= ipv4.mask.proto;
1340                 ipv4.val.tos &= ipv4.mask.tos;
1341         }
1342         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1343         return 0;
1344 }
1345
1346 /**
1347  * Convert IPv6 item to Verbs specification.
1348  *
1349  * @param item[in]
1350  *   Item specification.
1351  * @param default_mask[in]
1352  *   Default bit-masks to use when item->mask is not provided.
1353  * @param data[in, out]
1354  *   User structure.
1355  */
1356 static int
1357 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1358                       const void *default_mask,
1359                       void *data)
1360 {
1361         const struct rte_flow_item_ipv6 *spec = item->spec;
1362         const struct rte_flow_item_ipv6 *mask = item->mask;
1363         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1364         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1365         struct ibv_flow_spec_ipv6 ipv6 = {
1366                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1367                 .size = ipv6_size,
1368         };
1369
1370         /* Don't update layer for the inner pattern. */
1371         if (!parser->inner)
1372                 parser->layer = HASH_RXQ_IPV6;
1373         if (spec) {
1374                 unsigned int i;
1375                 uint32_t vtc_flow_val;
1376                 uint32_t vtc_flow_mask;
1377
1378                 if (!mask)
1379                         mask = default_mask;
1380                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1381                        RTE_DIM(ipv6.val.src_ip));
1382                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1383                        RTE_DIM(ipv6.val.dst_ip));
1384                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1385                        RTE_DIM(ipv6.mask.src_ip));
1386                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1387                        RTE_DIM(ipv6.mask.dst_ip));
1388                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1389                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1390                 ipv6.val.flow_label =
1391                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1392                                          IPV6_HDR_FL_SHIFT);
1393                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1394                                          IPV6_HDR_TC_SHIFT;
1395                 ipv6.val.next_hdr = spec->hdr.proto;
1396                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1397                 ipv6.mask.flow_label =
1398                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1399                                          IPV6_HDR_FL_SHIFT);
1400                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1401                                           IPV6_HDR_TC_SHIFT;
1402                 ipv6.mask.next_hdr = mask->hdr.proto;
1403                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1404                 /* Remove unwanted bits from values. */
1405                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1406                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1407                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1408                 }
1409                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1410                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1411                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1412                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1413         }
1414         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1415         return 0;
1416 }
1417
1418 /**
1419  * Convert UDP item to Verbs specification.
1420  *
1421  * @param item[in]
1422  *   Item specification.
1423  * @param default_mask[in]
1424  *   Default bit-masks to use when item->mask is not provided.
1425  * @param data[in, out]
1426  *   User structure.
1427  */
1428 static int
1429 mlx5_flow_create_udp(const struct rte_flow_item *item,
1430                      const void *default_mask,
1431                      void *data)
1432 {
1433         const struct rte_flow_item_udp *spec = item->spec;
1434         const struct rte_flow_item_udp *mask = item->mask;
1435         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1436         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1437         struct ibv_flow_spec_tcp_udp udp = {
1438                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1439                 .size = udp_size,
1440         };
1441
1442         /* Don't update layer for the inner pattern. */
1443         if (!parser->inner) {
1444                 if (parser->layer == HASH_RXQ_IPV4)
1445                         parser->layer = HASH_RXQ_UDPV4;
1446                 else
1447                         parser->layer = HASH_RXQ_UDPV6;
1448         }
1449         if (spec) {
1450                 if (!mask)
1451                         mask = default_mask;
1452                 udp.val.dst_port = spec->hdr.dst_port;
1453                 udp.val.src_port = spec->hdr.src_port;
1454                 udp.mask.dst_port = mask->hdr.dst_port;
1455                 udp.mask.src_port = mask->hdr.src_port;
1456                 /* Remove unwanted bits from values. */
1457                 udp.val.src_port &= udp.mask.src_port;
1458                 udp.val.dst_port &= udp.mask.dst_port;
1459         }
1460         mlx5_flow_create_copy(parser, &udp, udp_size);
1461         return 0;
1462 }
1463
1464 /**
1465  * Convert TCP item to Verbs specification.
1466  *
1467  * @param item[in]
1468  *   Item specification.
1469  * @param default_mask[in]
1470  *   Default bit-masks to use when item->mask is not provided.
1471  * @param data[in, out]
1472  *   User structure.
1473  */
1474 static int
1475 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1476                      const void *default_mask,
1477                      void *data)
1478 {
1479         const struct rte_flow_item_tcp *spec = item->spec;
1480         const struct rte_flow_item_tcp *mask = item->mask;
1481         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1482         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1483         struct ibv_flow_spec_tcp_udp tcp = {
1484                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1485                 .size = tcp_size,
1486         };
1487
1488         /* Don't update layer for the inner pattern. */
1489         if (!parser->inner) {
1490                 if (parser->layer == HASH_RXQ_IPV4)
1491                         parser->layer = HASH_RXQ_TCPV4;
1492                 else
1493                         parser->layer = HASH_RXQ_TCPV6;
1494         }
1495         if (spec) {
1496                 if (!mask)
1497                         mask = default_mask;
1498                 tcp.val.dst_port = spec->hdr.dst_port;
1499                 tcp.val.src_port = spec->hdr.src_port;
1500                 tcp.mask.dst_port = mask->hdr.dst_port;
1501                 tcp.mask.src_port = mask->hdr.src_port;
1502                 /* Remove unwanted bits from values. */
1503                 tcp.val.src_port &= tcp.mask.src_port;
1504                 tcp.val.dst_port &= tcp.mask.dst_port;
1505         }
1506         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1507         return 0;
1508 }
1509
1510 /**
1511  * Convert VXLAN item to Verbs specification.
1512  *
1513  * @param item[in]
1514  *   Item specification.
1515  * @param default_mask[in]
1516  *   Default bit-masks to use when item->mask is not provided.
1517  * @param data[in, out]
1518  *   User structure.
1519  */
1520 static int
1521 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1522                        const void *default_mask,
1523                        void *data)
1524 {
1525         const struct rte_flow_item_vxlan *spec = item->spec;
1526         const struct rte_flow_item_vxlan *mask = item->mask;
1527         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1528         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1529         struct ibv_flow_spec_tunnel vxlan = {
1530                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1531                 .size = size,
1532         };
1533         union vni {
1534                 uint32_t vlan_id;
1535                 uint8_t vni[4];
1536         } id;
1537
1538         id.vni[0] = 0;
1539         parser->inner = IBV_FLOW_SPEC_INNER;
1540         if (spec) {
1541                 if (!mask)
1542                         mask = default_mask;
1543                 memcpy(&id.vni[1], spec->vni, 3);
1544                 vxlan.val.tunnel_id = id.vlan_id;
1545                 memcpy(&id.vni[1], mask->vni, 3);
1546                 vxlan.mask.tunnel_id = id.vlan_id;
1547                 /* Remove unwanted bits from values. */
1548                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1549         }
1550         /*
1551          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1552          * layer is defined in the Verbs specification it is interpreted as
1553          * wildcard and all packets will match this rule, if it follows a full
1554          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1555          * before will also match this rule.
1556          * To avoid such situation, VNI 0 is currently refused.
1557          */
1558         if (!vxlan.val.tunnel_id)
1559                 return EINVAL;
1560         mlx5_flow_create_copy(parser, &vxlan, size);
1561         return 0;
1562 }
1563
1564 /**
1565  * Convert mark/flag action to Verbs specification.
1566  *
1567  * @param parser
1568  *   Internal parser structure.
1569  * @param mark_id
1570  *   Mark identifier.
1571  */
1572 static int
1573 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1574 {
1575         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1576         struct ibv_flow_spec_action_tag tag = {
1577                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1578                 .size = size,
1579                 .tag_id = mlx5_flow_mark_set(mark_id),
1580         };
1581
1582         assert(parser->mark);
1583         mlx5_flow_create_copy(parser, &tag, size);
1584         return 0;
1585 }
1586
1587 /**
1588  * Convert count action to Verbs specification.
1589  *
1590  * @param priv
1591  *   Pointer to private structure.
1592  * @param parser
1593  *   Pointer to MLX5 flow parser structure.
1594  *
1595  * @return
1596  *   0 on success, errno value on failure.
1597  */
1598 static int
1599 mlx5_flow_create_count(struct priv *priv __rte_unused,
1600                        struct mlx5_flow_parse *parser __rte_unused)
1601 {
1602 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1603         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1604         struct ibv_counter_set_init_attr init_attr = {0};
1605         struct ibv_flow_spec_counter_action counter = {
1606                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1607                 .size = size,
1608                 .counter_set_handle = 0,
1609         };
1610
1611         init_attr.counter_set_id = 0;
1612         parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1613         if (!parser->cs)
1614                 return EINVAL;
1615         counter.counter_set_handle = parser->cs->handle;
1616         mlx5_flow_create_copy(parser, &counter, size);
1617 #endif
1618         return 0;
1619 }
1620
1621 /**
1622  * Complete flow rule creation with a drop queue.
1623  *
1624  * @param priv
1625  *   Pointer to private structure.
1626  * @param parser
1627  *   Internal parser structure.
1628  * @param flow
1629  *   Pointer to the rte_flow.
1630  * @param[out] error
1631  *   Perform verbose error reporting if not NULL.
1632  *
1633  * @return
1634  *   0 on success, errno value on failure.
1635  */
1636 static int
1637 priv_flow_create_action_queue_drop(struct priv *priv,
1638                                    struct mlx5_flow_parse *parser,
1639                                    struct rte_flow *flow,
1640                                    struct rte_flow_error *error)
1641 {
1642         struct ibv_flow_spec_action_drop *drop;
1643         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1644         int err = 0;
1645
1646         assert(priv->pd);
1647         assert(priv->ctx);
1648         flow->drop = 1;
1649         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1650                         parser->queue[HASH_RXQ_ETH].offset);
1651         *drop = (struct ibv_flow_spec_action_drop){
1652                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1653                         .size = size,
1654         };
1655         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1656         parser->queue[HASH_RXQ_ETH].offset += size;
1657         flow->frxq[HASH_RXQ_ETH].ibv_attr =
1658                 parser->queue[HASH_RXQ_ETH].ibv_attr;
1659         if (parser->count)
1660                 flow->cs = parser->cs;
1661         if (!priv->dev->data->dev_started)
1662                 return 0;
1663         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1664         flow->frxq[HASH_RXQ_ETH].ibv_flow =
1665                 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1666                                        flow->frxq[HASH_RXQ_ETH].ibv_attr);
1667         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1668                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1669                                    NULL, "flow rule creation failure");
1670                 err = ENOMEM;
1671                 goto error;
1672         }
1673         return 0;
1674 error:
1675         assert(flow);
1676         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1677                 claim_zero(mlx5_glue->destroy_flow
1678                            (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1679                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1680         }
1681         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1682                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1683                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1684         }
1685         if (flow->cs) {
1686                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1687                 flow->cs = NULL;
1688                 parser->cs = NULL;
1689         }
1690         return err;
1691 }
1692
1693 /**
1694  * Create hash Rx queues when RSS is enabled.
1695  *
1696  * @param priv
1697  *   Pointer to private structure.
1698  * @param parser
1699  *   Internal parser structure.
1700  * @param flow
1701  *   Pointer to the rte_flow.
1702  * @param[out] error
1703  *   Perform verbose error reporting if not NULL.
1704  *
1705  * @return
1706  *   0 on success, a errno value otherwise and rte_errno is set.
1707  */
1708 static int
1709 priv_flow_create_action_queue_rss(struct priv *priv,
1710                                   struct mlx5_flow_parse *parser,
1711                                   struct rte_flow *flow,
1712                                   struct rte_flow_error *error)
1713 {
1714         unsigned int i;
1715
1716         for (i = 0; i != hash_rxq_init_n; ++i) {
1717                 uint64_t hash_fields;
1718
1719                 if (!parser->queue[i].ibv_attr)
1720                         continue;
1721                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1722                 parser->queue[i].ibv_attr = NULL;
1723                 hash_fields = hash_rxq_init[i].hash_fields;
1724                 if (!priv->dev->data->dev_started)
1725                         continue;
1726                 flow->frxq[i].hrxq =
1727                         mlx5_priv_hrxq_get(priv,
1728                                            parser->rss_conf.rss_key,
1729                                            parser->rss_conf.rss_key_len,
1730                                            hash_fields,
1731                                            parser->queues,
1732                                            parser->queues_n);
1733                 if (flow->frxq[i].hrxq)
1734                         continue;
1735                 flow->frxq[i].hrxq =
1736                         mlx5_priv_hrxq_new(priv,
1737                                            parser->rss_conf.rss_key,
1738                                            parser->rss_conf.rss_key_len,
1739                                            hash_fields,
1740                                            parser->queues,
1741                                            parser->queues_n);
1742                 if (!flow->frxq[i].hrxq) {
1743                         rte_flow_error_set(error, ENOMEM,
1744                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1745                                            NULL, "cannot create hash rxq");
1746                         return ENOMEM;
1747                 }
1748         }
1749         return 0;
1750 }
1751
1752 /**
1753  * Complete flow rule creation.
1754  *
1755  * @param priv
1756  *   Pointer to private structure.
1757  * @param parser
1758  *   Internal parser structure.
1759  * @param flow
1760  *   Pointer to the rte_flow.
1761  * @param[out] error
1762  *   Perform verbose error reporting if not NULL.
1763  *
1764  * @return
1765  *   0 on success, a errno value otherwise and rte_errno is set.
1766  */
1767 static int
1768 priv_flow_create_action_queue(struct priv *priv,
1769                               struct mlx5_flow_parse *parser,
1770                               struct rte_flow *flow,
1771                               struct rte_flow_error *error)
1772 {
1773         int err = 0;
1774         unsigned int i;
1775
1776         assert(priv->pd);
1777         assert(priv->ctx);
1778         assert(!parser->drop);
1779         err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1780         if (err)
1781                 goto error;
1782         if (parser->count)
1783                 flow->cs = parser->cs;
1784         if (!priv->dev->data->dev_started)
1785                 return 0;
1786         for (i = 0; i != hash_rxq_init_n; ++i) {
1787                 if (!flow->frxq[i].hrxq)
1788                         continue;
1789                 flow->frxq[i].ibv_flow =
1790                         mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1791                                                flow->frxq[i].ibv_attr);
1792                 if (!flow->frxq[i].ibv_flow) {
1793                         rte_flow_error_set(error, ENOMEM,
1794                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1795                                            NULL, "flow rule creation failure");
1796                         err = ENOMEM;
1797                         goto error;
1798                 }
1799                 DEBUG("%p type %d QP %p ibv_flow %p",
1800                       (void *)flow, i,
1801                       (void *)flow->frxq[i].hrxq,
1802                       (void *)flow->frxq[i].ibv_flow);
1803         }
1804         for (i = 0; i != parser->queues_n; ++i) {
1805                 struct mlx5_rxq_data *q =
1806                         (*priv->rxqs)[parser->queues[i]];
1807
1808                 q->mark |= parser->mark;
1809         }
1810         return 0;
1811 error:
1812         assert(flow);
1813         for (i = 0; i != hash_rxq_init_n; ++i) {
1814                 if (flow->frxq[i].ibv_flow) {
1815                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1816
1817                         claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1818                 }
1819                 if (flow->frxq[i].hrxq)
1820                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1821                 if (flow->frxq[i].ibv_attr)
1822                         rte_free(flow->frxq[i].ibv_attr);
1823         }
1824         if (flow->cs) {
1825                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1826                 flow->cs = NULL;
1827                 parser->cs = NULL;
1828         }
1829         return err;
1830 }
1831
1832 /**
1833  * Convert a flow.
1834  *
1835  * @param priv
1836  *   Pointer to private structure.
1837  * @param list
1838  *   Pointer to a TAILQ flow list.
1839  * @param[in] attr
1840  *   Flow rule attributes.
1841  * @param[in] pattern
1842  *   Pattern specification (list terminated by the END pattern item).
1843  * @param[in] actions
1844  *   Associated actions (list terminated by the END action).
1845  * @param[out] error
1846  *   Perform verbose error reporting if not NULL.
1847  *
1848  * @return
1849  *   A flow on success, NULL otherwise.
1850  */
1851 static struct rte_flow *
1852 priv_flow_create(struct priv *priv,
1853                  struct mlx5_flows *list,
1854                  const struct rte_flow_attr *attr,
1855                  const struct rte_flow_item items[],
1856                  const struct rte_flow_action actions[],
1857                  struct rte_flow_error *error)
1858 {
1859         struct mlx5_flow_parse parser = { .create = 1, };
1860         struct rte_flow *flow = NULL;
1861         unsigned int i;
1862         int err;
1863
1864         err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1865         if (err)
1866                 goto exit;
1867         flow = rte_calloc(__func__, 1,
1868                           sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1869                           0);
1870         if (!flow) {
1871                 rte_flow_error_set(error, ENOMEM,
1872                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1873                                    NULL,
1874                                    "cannot allocate flow memory");
1875                 return NULL;
1876         }
1877         /* Copy queues configuration. */
1878         flow->queues = (uint16_t (*)[])(flow + 1);
1879         memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1880         flow->queues_n = parser.queues_n;
1881         flow->mark = parser.mark;
1882         /* Copy RSS configuration. */
1883         flow->rss_conf = parser.rss_conf;
1884         flow->rss_conf.rss_key = flow->rss_key;
1885         memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1886         /* finalise the flow. */
1887         if (parser.drop)
1888                 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1889                                                          error);
1890         else
1891                 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1892         if (err)
1893                 goto exit;
1894         TAILQ_INSERT_TAIL(list, flow, next);
1895         DEBUG("Flow created %p", (void *)flow);
1896         return flow;
1897 exit:
1898         for (i = 0; i != hash_rxq_init_n; ++i) {
1899                 if (parser.queue[i].ibv_attr)
1900                         rte_free(parser.queue[i].ibv_attr);
1901         }
1902         rte_free(flow);
1903         return NULL;
1904 }
1905
1906 /**
1907  * Validate a flow supported by the NIC.
1908  *
1909  * @see rte_flow_validate()
1910  * @see rte_flow_ops
1911  */
1912 int
1913 mlx5_flow_validate(struct rte_eth_dev *dev,
1914                    const struct rte_flow_attr *attr,
1915                    const struct rte_flow_item items[],
1916                    const struct rte_flow_action actions[],
1917                    struct rte_flow_error *error)
1918 {
1919         struct priv *priv = dev->data->dev_private;
1920         int ret;
1921         struct mlx5_flow_parse parser = { .create = 0, };
1922
1923         priv_lock(priv);
1924         ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1925         priv_unlock(priv);
1926         return ret;
1927 }
1928
1929 /**
1930  * Create a flow.
1931  *
1932  * @see rte_flow_create()
1933  * @see rte_flow_ops
1934  */
1935 struct rte_flow *
1936 mlx5_flow_create(struct rte_eth_dev *dev,
1937                  const struct rte_flow_attr *attr,
1938                  const struct rte_flow_item items[],
1939                  const struct rte_flow_action actions[],
1940                  struct rte_flow_error *error)
1941 {
1942         struct priv *priv = dev->data->dev_private;
1943         struct rte_flow *flow;
1944
1945         priv_lock(priv);
1946         flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1947                                 error);
1948         priv_unlock(priv);
1949         return flow;
1950 }
1951
1952 /**
1953  * Destroy a flow.
1954  *
1955  * @param priv
1956  *   Pointer to private structure.
1957  * @param list
1958  *   Pointer to a TAILQ flow list.
1959  * @param[in] flow
1960  *   Flow to destroy.
1961  */
1962 static void
1963 priv_flow_destroy(struct priv *priv,
1964                   struct mlx5_flows *list,
1965                   struct rte_flow *flow)
1966 {
1967         unsigned int i;
1968
1969         if (flow->drop || !flow->mark)
1970                 goto free;
1971         for (i = 0; i != flow->queues_n; ++i) {
1972                 struct rte_flow *tmp;
1973                 int mark = 0;
1974
1975                 /*
1976                  * To remove the mark from the queue, the queue must not be
1977                  * present in any other marked flow (RSS or not).
1978                  */
1979                 TAILQ_FOREACH(tmp, list, next) {
1980                         unsigned int j;
1981                         uint16_t *tqs = NULL;
1982                         uint16_t tq_n = 0;
1983
1984                         if (!tmp->mark)
1985                                 continue;
1986                         for (j = 0; j != hash_rxq_init_n; ++j) {
1987                                 if (!tmp->frxq[j].hrxq)
1988                                         continue;
1989                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
1990                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1991                         }
1992                         if (!tq_n)
1993                                 continue;
1994                         for (j = 0; (j != tq_n) && !mark; j++)
1995                                 if (tqs[j] == (*flow->queues)[i])
1996                                         mark = 1;
1997                 }
1998                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
1999         }
2000 free:
2001         if (flow->drop) {
2002                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2003                         claim_zero(mlx5_glue->destroy_flow
2004                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2005                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2006         } else {
2007                 for (i = 0; i != hash_rxq_init_n; ++i) {
2008                         struct mlx5_flow *frxq = &flow->frxq[i];
2009
2010                         if (frxq->ibv_flow)
2011                                 claim_zero(mlx5_glue->destroy_flow
2012                                            (frxq->ibv_flow));
2013                         if (frxq->hrxq)
2014                                 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2015                         if (frxq->ibv_attr)
2016                                 rte_free(frxq->ibv_attr);
2017                 }
2018         }
2019         if (flow->cs) {
2020                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2021                 flow->cs = NULL;
2022         }
2023         TAILQ_REMOVE(list, flow, next);
2024         DEBUG("Flow destroyed %p", (void *)flow);
2025         rte_free(flow);
2026 }
2027
2028 /**
2029  * Destroy all flows.
2030  *
2031  * @param priv
2032  *   Pointer to private structure.
2033  * @param list
2034  *   Pointer to a TAILQ flow list.
2035  */
2036 void
2037 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2038 {
2039         while (!TAILQ_EMPTY(list)) {
2040                 struct rte_flow *flow;
2041
2042                 flow = TAILQ_FIRST(list);
2043                 priv_flow_destroy(priv, list, flow);
2044         }
2045 }
2046
2047 /**
2048  * Create drop queue.
2049  *
2050  * @param priv
2051  *   Pointer to private structure.
2052  *
2053  * @return
2054  *   0 on success.
2055  */
2056 int
2057 priv_flow_create_drop_queue(struct priv *priv)
2058 {
2059         struct mlx5_hrxq_drop *fdq = NULL;
2060
2061         assert(priv->pd);
2062         assert(priv->ctx);
2063         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2064         if (!fdq) {
2065                 WARN("cannot allocate memory for drop queue");
2066                 goto error;
2067         }
2068         fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2069         if (!fdq->cq) {
2070                 WARN("cannot allocate CQ for drop queue");
2071                 goto error;
2072         }
2073         fdq->wq = mlx5_glue->create_wq
2074                 (priv->ctx,
2075                  &(struct ibv_wq_init_attr){
2076                         .wq_type = IBV_WQT_RQ,
2077                         .max_wr = 1,
2078                         .max_sge = 1,
2079                         .pd = priv->pd,
2080                         .cq = fdq->cq,
2081                  });
2082         if (!fdq->wq) {
2083                 WARN("cannot allocate WQ for drop queue");
2084                 goto error;
2085         }
2086         fdq->ind_table = mlx5_glue->create_rwq_ind_table
2087                 (priv->ctx,
2088                  &(struct ibv_rwq_ind_table_init_attr){
2089                         .log_ind_tbl_size = 0,
2090                         .ind_tbl = &fdq->wq,
2091                         .comp_mask = 0,
2092                  });
2093         if (!fdq->ind_table) {
2094                 WARN("cannot allocate indirection table for drop queue");
2095                 goto error;
2096         }
2097         fdq->qp = mlx5_glue->create_qp_ex
2098                 (priv->ctx,
2099                  &(struct ibv_qp_init_attr_ex){
2100                         .qp_type = IBV_QPT_RAW_PACKET,
2101                         .comp_mask =
2102                                 IBV_QP_INIT_ATTR_PD |
2103                                 IBV_QP_INIT_ATTR_IND_TABLE |
2104                                 IBV_QP_INIT_ATTR_RX_HASH,
2105                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2106                                 .rx_hash_function =
2107                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2108                                 .rx_hash_key_len = rss_hash_default_key_len,
2109                                 .rx_hash_key = rss_hash_default_key,
2110                                 .rx_hash_fields_mask = 0,
2111                                 },
2112                         .rwq_ind_tbl = fdq->ind_table,
2113                         .pd = priv->pd
2114                  });
2115         if (!fdq->qp) {
2116                 WARN("cannot allocate QP for drop queue");
2117                 goto error;
2118         }
2119         priv->flow_drop_queue = fdq;
2120         return 0;
2121 error:
2122         if (fdq->qp)
2123                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2124         if (fdq->ind_table)
2125                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2126         if (fdq->wq)
2127                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2128         if (fdq->cq)
2129                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2130         if (fdq)
2131                 rte_free(fdq);
2132         priv->flow_drop_queue = NULL;
2133         return -1;
2134 }
2135
2136 /**
2137  * Delete drop queue.
2138  *
2139  * @param priv
2140  *   Pointer to private structure.
2141  */
2142 void
2143 priv_flow_delete_drop_queue(struct priv *priv)
2144 {
2145         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2146
2147         if (!fdq)
2148                 return;
2149         if (fdq->qp)
2150                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2151         if (fdq->ind_table)
2152                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2153         if (fdq->wq)
2154                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2155         if (fdq->cq)
2156                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2157         rte_free(fdq);
2158         priv->flow_drop_queue = NULL;
2159 }
2160
2161 /**
2162  * Remove all flows.
2163  *
2164  * @param priv
2165  *   Pointer to private structure.
2166  * @param list
2167  *   Pointer to a TAILQ flow list.
2168  */
2169 void
2170 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2171 {
2172         struct rte_flow *flow;
2173
2174         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2175                 unsigned int i;
2176                 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2177
2178                 if (flow->drop) {
2179                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2180                                 continue;
2181                         claim_zero(mlx5_glue->destroy_flow
2182                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2183                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2184                         DEBUG("Flow %p removed", (void *)flow);
2185                         /* Next flow. */
2186                         continue;
2187                 }
2188                 /* Verify the flow has not already been cleaned. */
2189                 for (i = 0; i != hash_rxq_init_n; ++i) {
2190                         if (!flow->frxq[i].ibv_flow)
2191                                 continue;
2192                         /*
2193                          * Indirection table may be necessary to remove the
2194                          * flags in the Rx queues.
2195                          * This helps to speed-up the process by avoiding
2196                          * another loop.
2197                          */
2198                         ind_tbl = flow->frxq[i].hrxq->ind_table;
2199                         break;
2200                 }
2201                 if (i == hash_rxq_init_n)
2202                         return;
2203                 if (flow->mark) {
2204                         assert(ind_tbl);
2205                         for (i = 0; i != ind_tbl->queues_n; ++i)
2206                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2207                 }
2208                 for (i = 0; i != hash_rxq_init_n; ++i) {
2209                         if (!flow->frxq[i].ibv_flow)
2210                                 continue;
2211                         claim_zero(mlx5_glue->destroy_flow
2212                                    (flow->frxq[i].ibv_flow));
2213                         flow->frxq[i].ibv_flow = NULL;
2214                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2215                         flow->frxq[i].hrxq = NULL;
2216                 }
2217                 DEBUG("Flow %p removed", (void *)flow);
2218         }
2219 }
2220
2221 /**
2222  * Add all flows.
2223  *
2224  * @param priv
2225  *   Pointer to private structure.
2226  * @param list
2227  *   Pointer to a TAILQ flow list.
2228  *
2229  * @return
2230  *   0 on success, a errno value otherwise and rte_errno is set.
2231  */
2232 int
2233 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2234 {
2235         struct rte_flow *flow;
2236
2237         TAILQ_FOREACH(flow, list, next) {
2238                 unsigned int i;
2239
2240                 if (flow->drop) {
2241                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2242                                 mlx5_glue->create_flow
2243                                 (priv->flow_drop_queue->qp,
2244                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2245                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2246                                 DEBUG("Flow %p cannot be applied",
2247                                       (void *)flow);
2248                                 rte_errno = EINVAL;
2249                                 return rte_errno;
2250                         }
2251                         DEBUG("Flow %p applied", (void *)flow);
2252                         /* Next flow. */
2253                         continue;
2254                 }
2255                 for (i = 0; i != hash_rxq_init_n; ++i) {
2256                         if (!flow->frxq[i].ibv_attr)
2257                                 continue;
2258                         flow->frxq[i].hrxq =
2259                                 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2260                                                    flow->rss_conf.rss_key_len,
2261                                                    hash_rxq_init[i].hash_fields,
2262                                                    (*flow->queues),
2263                                                    flow->queues_n);
2264                         if (flow->frxq[i].hrxq)
2265                                 goto flow_create;
2266                         flow->frxq[i].hrxq =
2267                                 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2268                                                    flow->rss_conf.rss_key_len,
2269                                                    hash_rxq_init[i].hash_fields,
2270                                                    (*flow->queues),
2271                                                    flow->queues_n);
2272                         if (!flow->frxq[i].hrxq) {
2273                                 DEBUG("Flow %p cannot be applied",
2274                                       (void *)flow);
2275                                 rte_errno = EINVAL;
2276                                 return rte_errno;
2277                         }
2278 flow_create:
2279                         flow->frxq[i].ibv_flow =
2280                                 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2281                                                        flow->frxq[i].ibv_attr);
2282                         if (!flow->frxq[i].ibv_flow) {
2283                                 DEBUG("Flow %p cannot be applied",
2284                                       (void *)flow);
2285                                 rte_errno = EINVAL;
2286                                 return rte_errno;
2287                         }
2288                         DEBUG("Flow %p applied", (void *)flow);
2289                 }
2290                 if (!flow->mark)
2291                         continue;
2292                 for (i = 0; i != flow->queues_n; ++i)
2293                         (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2294         }
2295         return 0;
2296 }
2297
2298 /**
2299  * Verify the flow list is empty
2300  *
2301  * @param priv
2302  *  Pointer to private structure.
2303  *
2304  * @return the number of flows not released.
2305  */
2306 int
2307 priv_flow_verify(struct priv *priv)
2308 {
2309         struct rte_flow *flow;
2310         int ret = 0;
2311
2312         TAILQ_FOREACH(flow, &priv->flows, next) {
2313                 DEBUG("%p: flow %p still referenced", (void *)priv,
2314                       (void *)flow);
2315                 ++ret;
2316         }
2317         return ret;
2318 }
2319
2320 /**
2321  * Enable a control flow configured from the control plane.
2322  *
2323  * @param dev
2324  *   Pointer to Ethernet device.
2325  * @param eth_spec
2326  *   An Ethernet flow spec to apply.
2327  * @param eth_mask
2328  *   An Ethernet flow mask to apply.
2329  * @param vlan_spec
2330  *   A VLAN flow spec to apply.
2331  * @param vlan_mask
2332  *   A VLAN flow mask to apply.
2333  *
2334  * @return
2335  *   0 on success.
2336  */
2337 int
2338 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2339                     struct rte_flow_item_eth *eth_spec,
2340                     struct rte_flow_item_eth *eth_mask,
2341                     struct rte_flow_item_vlan *vlan_spec,
2342                     struct rte_flow_item_vlan *vlan_mask)
2343 {
2344         struct priv *priv = dev->data->dev_private;
2345         const struct rte_flow_attr attr = {
2346                 .ingress = 1,
2347                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2348         };
2349         struct rte_flow_item items[] = {
2350                 {
2351                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2352                         .spec = eth_spec,
2353                         .last = NULL,
2354                         .mask = eth_mask,
2355                 },
2356                 {
2357                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2358                                 RTE_FLOW_ITEM_TYPE_END,
2359                         .spec = vlan_spec,
2360                         .last = NULL,
2361                         .mask = vlan_mask,
2362                 },
2363                 {
2364                         .type = RTE_FLOW_ITEM_TYPE_END,
2365                 },
2366         };
2367         struct rte_flow_action actions[] = {
2368                 {
2369                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2370                 },
2371                 {
2372                         .type = RTE_FLOW_ACTION_TYPE_END,
2373                 },
2374         };
2375         struct rte_flow *flow;
2376         struct rte_flow_error error;
2377         unsigned int i;
2378         union {
2379                 struct rte_flow_action_rss rss;
2380                 struct {
2381                         const struct rte_eth_rss_conf *rss_conf;
2382                         uint16_t num;
2383                         uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2384                 } local;
2385         } action_rss;
2386
2387         if (!priv->reta_idx_n)
2388                 return EINVAL;
2389         for (i = 0; i != priv->reta_idx_n; ++i)
2390                 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2391         action_rss.local.rss_conf = &priv->rss_conf;
2392         action_rss.local.num = priv->reta_idx_n;
2393         actions[0].conf = (const void *)&action_rss.rss;
2394         flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2395                                 &error);
2396         if (!flow)
2397                 return rte_errno;
2398         return 0;
2399 }
2400
2401 /**
2402  * Enable a flow control configured from the control plane.
2403  *
2404  * @param dev
2405  *   Pointer to Ethernet device.
2406  * @param eth_spec
2407  *   An Ethernet flow spec to apply.
2408  * @param eth_mask
2409  *   An Ethernet flow mask to apply.
2410  *
2411  * @return
2412  *   0 on success.
2413  */
2414 int
2415 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2416                struct rte_flow_item_eth *eth_spec,
2417                struct rte_flow_item_eth *eth_mask)
2418 {
2419         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2420 }
2421
2422 /**
2423  * Destroy a flow.
2424  *
2425  * @see rte_flow_destroy()
2426  * @see rte_flow_ops
2427  */
2428 int
2429 mlx5_flow_destroy(struct rte_eth_dev *dev,
2430                   struct rte_flow *flow,
2431                   struct rte_flow_error *error)
2432 {
2433         struct priv *priv = dev->data->dev_private;
2434
2435         (void)error;
2436         priv_lock(priv);
2437         priv_flow_destroy(priv, &priv->flows, flow);
2438         priv_unlock(priv);
2439         return 0;
2440 }
2441
2442 /**
2443  * Destroy all flows.
2444  *
2445  * @see rte_flow_flush()
2446  * @see rte_flow_ops
2447  */
2448 int
2449 mlx5_flow_flush(struct rte_eth_dev *dev,
2450                 struct rte_flow_error *error)
2451 {
2452         struct priv *priv = dev->data->dev_private;
2453
2454         (void)error;
2455         priv_lock(priv);
2456         priv_flow_flush(priv, &priv->flows);
2457         priv_unlock(priv);
2458         return 0;
2459 }
2460
2461 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2462 /**
2463  * Query flow counter.
2464  *
2465  * @param cs
2466  *   the counter set.
2467  * @param counter_value
2468  *   returned data from the counter.
2469  *
2470  * @return
2471  *   0 on success, a errno value otherwise and rte_errno is set.
2472  */
2473 static int
2474 priv_flow_query_count(struct ibv_counter_set *cs,
2475                       struct mlx5_flow_counter_stats *counter_stats,
2476                       struct rte_flow_query_count *query_count,
2477                       struct rte_flow_error *error)
2478 {
2479         uint64_t counters[2];
2480         struct ibv_query_counter_set_attr query_cs_attr = {
2481                 .cs = cs,
2482                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2483         };
2484         struct ibv_counter_set_data query_out = {
2485                 .out = counters,
2486                 .outlen = 2 * sizeof(uint64_t),
2487         };
2488         int res = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2489
2490         if (res) {
2491                 rte_flow_error_set(error, -res,
2492                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2493                                    NULL,
2494                                    "cannot read counter");
2495                 return -res;
2496         }
2497         query_count->hits_set = 1;
2498         query_count->bytes_set = 1;
2499         query_count->hits = counters[0] - counter_stats->hits;
2500         query_count->bytes = counters[1] - counter_stats->bytes;
2501         if (query_count->reset) {
2502                 counter_stats->hits = counters[0];
2503                 counter_stats->bytes = counters[1];
2504         }
2505         return 0;
2506 }
2507
2508 /**
2509  * Query a flows.
2510  *
2511  * @see rte_flow_query()
2512  * @see rte_flow_ops
2513  */
2514 int
2515 mlx5_flow_query(struct rte_eth_dev *dev,
2516                 struct rte_flow *flow,
2517                 enum rte_flow_action_type action __rte_unused,
2518                 void *data,
2519                 struct rte_flow_error *error)
2520 {
2521         struct priv *priv = dev->data->dev_private;
2522         int res = EINVAL;
2523
2524         priv_lock(priv);
2525         if (flow->cs) {
2526                 res = priv_flow_query_count(flow->cs,
2527                                         &flow->counter_stats,
2528                                         (struct rte_flow_query_count *)data,
2529                                         error);
2530         } else {
2531                 rte_flow_error_set(error, res,
2532                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2533                                    NULL,
2534                                    "no counter found for flow");
2535         }
2536         priv_unlock(priv);
2537         return -res;
2538 }
2539 #endif
2540
2541 /**
2542  * Isolated mode.
2543  *
2544  * @see rte_flow_isolate()
2545  * @see rte_flow_ops
2546  */
2547 int
2548 mlx5_flow_isolate(struct rte_eth_dev *dev,
2549                   int enable,
2550                   struct rte_flow_error *error)
2551 {
2552         struct priv *priv = dev->data->dev_private;
2553
2554         priv_lock(priv);
2555         if (dev->data->dev_started) {
2556                 rte_flow_error_set(error, EBUSY,
2557                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2558                                    NULL,
2559                                    "port must be stopped first");
2560                 priv_unlock(priv);
2561                 return -rte_errno;
2562         }
2563         priv->isolated = !!enable;
2564         if (enable)
2565                 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2566         else
2567                 priv->dev->dev_ops = &mlx5_dev_ops;
2568         priv_unlock(priv);
2569         return 0;
2570 }
2571
2572 /**
2573  * Convert a flow director filter to a generic flow.
2574  *
2575  * @param priv
2576  *   Private structure.
2577  * @param fdir_filter
2578  *   Flow director filter to add.
2579  * @param attributes
2580  *   Generic flow parameters structure.
2581  *
2582  * @return
2583  *  0 on success, errno value on error.
2584  */
2585 static int
2586 priv_fdir_filter_convert(struct priv *priv,
2587                          const struct rte_eth_fdir_filter *fdir_filter,
2588                          struct mlx5_fdir *attributes)
2589 {
2590         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2591
2592         /* Validate queue number. */
2593         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2594                 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2595                 return EINVAL;
2596         }
2597         attributes->attr.ingress = 1;
2598         attributes->items[0] = (struct rte_flow_item) {
2599                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2600                 .spec = &attributes->l2,
2601                 .mask = &attributes->l2_mask,
2602         };
2603         switch (fdir_filter->action.behavior) {
2604         case RTE_ETH_FDIR_ACCEPT:
2605                 attributes->actions[0] = (struct rte_flow_action){
2606                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2607                         .conf = &attributes->queue,
2608                 };
2609                 break;
2610         case RTE_ETH_FDIR_REJECT:
2611                 attributes->actions[0] = (struct rte_flow_action){
2612                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2613                 };
2614                 break;
2615         default:
2616                 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2617                 return ENOTSUP;
2618         }
2619         attributes->queue.index = fdir_filter->action.rx_queue;
2620         switch (fdir_filter->input.flow_type) {
2621         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2622                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2623                         .src_addr = input->flow.udp4_flow.ip.src_ip,
2624                         .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2625                         .time_to_live = input->flow.udp4_flow.ip.ttl,
2626                         .type_of_service = input->flow.udp4_flow.ip.tos,
2627                         .next_proto_id = input->flow.udp4_flow.ip.proto,
2628                 };
2629                 attributes->l4.udp.hdr = (struct udp_hdr){
2630                         .src_port = input->flow.udp4_flow.src_port,
2631                         .dst_port = input->flow.udp4_flow.dst_port,
2632                 };
2633                 attributes->items[1] = (struct rte_flow_item){
2634                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2635                         .spec = &attributes->l3,
2636                         .mask = &attributes->l3,
2637                 };
2638                 attributes->items[2] = (struct rte_flow_item){
2639                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2640                         .spec = &attributes->l4,
2641                         .mask = &attributes->l4,
2642                 };
2643                 break;
2644         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2645                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2646                         .src_addr = input->flow.tcp4_flow.ip.src_ip,
2647                         .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2648                         .time_to_live = input->flow.tcp4_flow.ip.ttl,
2649                         .type_of_service = input->flow.tcp4_flow.ip.tos,
2650                         .next_proto_id = input->flow.tcp4_flow.ip.proto,
2651                 };
2652                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2653                         .src_port = input->flow.tcp4_flow.src_port,
2654                         .dst_port = input->flow.tcp4_flow.dst_port,
2655                 };
2656                 attributes->items[1] = (struct rte_flow_item){
2657                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2658                         .spec = &attributes->l3,
2659                         .mask = &attributes->l3,
2660                 };
2661                 attributes->items[2] = (struct rte_flow_item){
2662                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2663                         .spec = &attributes->l4,
2664                         .mask = &attributes->l4,
2665                 };
2666                 break;
2667         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2668                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2669                         .src_addr = input->flow.ip4_flow.src_ip,
2670                         .dst_addr = input->flow.ip4_flow.dst_ip,
2671                         .time_to_live = input->flow.ip4_flow.ttl,
2672                         .type_of_service = input->flow.ip4_flow.tos,
2673                         .next_proto_id = input->flow.ip4_flow.proto,
2674                 };
2675                 attributes->items[1] = (struct rte_flow_item){
2676                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2677                         .spec = &attributes->l3,
2678                         .mask = &attributes->l3,
2679                 };
2680                 break;
2681         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2682                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2683                         .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2684                         .proto = input->flow.udp6_flow.ip.proto,
2685                 };
2686                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2687                        input->flow.udp6_flow.ip.src_ip,
2688                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2689                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2690                        input->flow.udp6_flow.ip.dst_ip,
2691                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2692                 attributes->l4.udp.hdr = (struct udp_hdr){
2693                         .src_port = input->flow.udp6_flow.src_port,
2694                         .dst_port = input->flow.udp6_flow.dst_port,
2695                 };
2696                 attributes->items[1] = (struct rte_flow_item){
2697                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2698                         .spec = &attributes->l3,
2699                         .mask = &attributes->l3,
2700                 };
2701                 attributes->items[2] = (struct rte_flow_item){
2702                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2703                         .spec = &attributes->l4,
2704                         .mask = &attributes->l4,
2705                 };
2706                 break;
2707         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2708                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2709                         .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2710                         .proto = input->flow.tcp6_flow.ip.proto,
2711                 };
2712                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2713                        input->flow.tcp6_flow.ip.src_ip,
2714                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2715                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2716                        input->flow.tcp6_flow.ip.dst_ip,
2717                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2718                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2719                         .src_port = input->flow.tcp6_flow.src_port,
2720                         .dst_port = input->flow.tcp6_flow.dst_port,
2721                 };
2722                 attributes->items[1] = (struct rte_flow_item){
2723                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2724                         .spec = &attributes->l3,
2725                         .mask = &attributes->l3,
2726                 };
2727                 attributes->items[2] = (struct rte_flow_item){
2728                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2729                         .spec = &attributes->l4,
2730                         .mask = &attributes->l4,
2731                 };
2732                 break;
2733         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2734                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2735                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2736                         .proto = input->flow.ipv6_flow.proto,
2737                 };
2738                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2739                        input->flow.ipv6_flow.src_ip,
2740                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2741                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2742                        input->flow.ipv6_flow.dst_ip,
2743                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2744                 attributes->items[1] = (struct rte_flow_item){
2745                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2746                         .spec = &attributes->l3,
2747                         .mask = &attributes->l3,
2748                 };
2749                 break;
2750         default:
2751                 ERROR("invalid flow type%d",
2752                       fdir_filter->input.flow_type);
2753                 return ENOTSUP;
2754         }
2755         return 0;
2756 }
2757
2758 /**
2759  * Add new flow director filter and store it in list.
2760  *
2761  * @param priv
2762  *   Private structure.
2763  * @param fdir_filter
2764  *   Flow director filter to add.
2765  *
2766  * @return
2767  *   0 on success, errno value on failure.
2768  */
2769 static int
2770 priv_fdir_filter_add(struct priv *priv,
2771                      const struct rte_eth_fdir_filter *fdir_filter)
2772 {
2773         struct mlx5_fdir attributes = {
2774                 .attr.group = 0,
2775                 .l2_mask = {
2776                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2777                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2778                         .type = 0,
2779                 },
2780         };
2781         struct mlx5_flow_parse parser = {
2782                 .layer = HASH_RXQ_ETH,
2783         };
2784         struct rte_flow_error error;
2785         struct rte_flow *flow;
2786         int ret;
2787
2788         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2789         if (ret)
2790                 return -ret;
2791         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2792                                 attributes.actions, &error, &parser);
2793         if (ret)
2794                 return -ret;
2795         flow = priv_flow_create(priv,
2796                                 &priv->flows,
2797                                 &attributes.attr,
2798                                 attributes.items,
2799                                 attributes.actions,
2800                                 &error);
2801         if (flow) {
2802                 DEBUG("FDIR created %p", (void *)flow);
2803                 return 0;
2804         }
2805         return ENOTSUP;
2806 }
2807
2808 /**
2809  * Delete specific filter.
2810  *
2811  * @param priv
2812  *   Private structure.
2813  * @param fdir_filter
2814  *   Filter to be deleted.
2815  *
2816  * @return
2817  *   0 on success, errno value on failure.
2818  */
2819 static int
2820 priv_fdir_filter_delete(struct priv *priv,
2821                         const struct rte_eth_fdir_filter *fdir_filter)
2822 {
2823         struct mlx5_fdir attributes = {
2824                 .attr.group = 0,
2825         };
2826         struct mlx5_flow_parse parser = {
2827                 .create = 1,
2828                 .layer = HASH_RXQ_ETH,
2829         };
2830         struct rte_flow_error error;
2831         struct rte_flow *flow;
2832         unsigned int i;
2833         int ret;
2834
2835         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2836         if (ret)
2837                 return -ret;
2838         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2839                                 attributes.actions, &error, &parser);
2840         if (ret)
2841                 goto exit;
2842         /*
2843          * Special case for drop action which is only set in the
2844          * specifications when the flow is created.  In this situation the
2845          * drop specification is missing.
2846          */
2847         if (parser.drop) {
2848                 struct ibv_flow_spec_action_drop *drop;
2849
2850                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2851                                 parser.queue[HASH_RXQ_ETH].offset);
2852                 *drop = (struct ibv_flow_spec_action_drop){
2853                         .type = IBV_FLOW_SPEC_ACTION_DROP,
2854                         .size = sizeof(struct ibv_flow_spec_action_drop),
2855                 };
2856                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2857         }
2858         TAILQ_FOREACH(flow, &priv->flows, next) {
2859                 struct ibv_flow_attr *attr;
2860                 struct ibv_spec_header *attr_h;
2861                 void *spec;
2862                 struct ibv_flow_attr *flow_attr;
2863                 struct ibv_spec_header *flow_h;
2864                 void *flow_spec;
2865                 unsigned int specs_n;
2866
2867                 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2868                 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2869                 /* Compare first the attributes. */
2870                 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2871                         continue;
2872                 if (attr->num_of_specs == 0)
2873                         continue;
2874                 spec = (void *)((uintptr_t)attr +
2875                                 sizeof(struct ibv_flow_attr));
2876                 flow_spec = (void *)((uintptr_t)flow_attr +
2877                                      sizeof(struct ibv_flow_attr));
2878                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2879                 for (i = 0; i != specs_n; ++i) {
2880                         attr_h = spec;
2881                         flow_h = flow_spec;
2882                         if (memcmp(spec, flow_spec,
2883                                    RTE_MIN(attr_h->size, flow_h->size)))
2884                                 goto wrong_flow;
2885                         spec = (void *)((uintptr_t)spec + attr_h->size);
2886                         flow_spec = (void *)((uintptr_t)flow_spec +
2887                                              flow_h->size);
2888                 }
2889                 /* At this point, the flow match. */
2890                 break;
2891 wrong_flow:
2892                 /* The flow does not match. */
2893                 continue;
2894         }
2895         if (flow)
2896                 priv_flow_destroy(priv, &priv->flows, flow);
2897 exit:
2898         for (i = 0; i != hash_rxq_init_n; ++i) {
2899                 if (parser.queue[i].ibv_attr)
2900                         rte_free(parser.queue[i].ibv_attr);
2901         }
2902         return -ret;
2903 }
2904
2905 /**
2906  * Update queue for specific filter.
2907  *
2908  * @param priv
2909  *   Private structure.
2910  * @param fdir_filter
2911  *   Filter to be updated.
2912  *
2913  * @return
2914  *   0 on success, errno value on failure.
2915  */
2916 static int
2917 priv_fdir_filter_update(struct priv *priv,
2918                         const struct rte_eth_fdir_filter *fdir_filter)
2919 {
2920         int ret;
2921
2922         ret = priv_fdir_filter_delete(priv, fdir_filter);
2923         if (ret)
2924                 return ret;
2925         ret = priv_fdir_filter_add(priv, fdir_filter);
2926         return ret;
2927 }
2928
2929 /**
2930  * Flush all filters.
2931  *
2932  * @param priv
2933  *   Private structure.
2934  */
2935 static void
2936 priv_fdir_filter_flush(struct priv *priv)
2937 {
2938         priv_flow_flush(priv, &priv->flows);
2939 }
2940
2941 /**
2942  * Get flow director information.
2943  *
2944  * @param priv
2945  *   Private structure.
2946  * @param[out] fdir_info
2947  *   Resulting flow director information.
2948  */
2949 static void
2950 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2951 {
2952         struct rte_eth_fdir_masks *mask =
2953                 &priv->dev->data->dev_conf.fdir_conf.mask;
2954
2955         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2956         fdir_info->guarant_spc = 0;
2957         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2958         fdir_info->max_flexpayload = 0;
2959         fdir_info->flow_types_mask[0] = 0;
2960         fdir_info->flex_payload_unit = 0;
2961         fdir_info->max_flex_payload_segment_num = 0;
2962         fdir_info->flex_payload_limit = 0;
2963         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2964 }
2965
2966 /**
2967  * Deal with flow director operations.
2968  *
2969  * @param priv
2970  *   Pointer to private structure.
2971  * @param filter_op
2972  *   Operation to perform.
2973  * @param arg
2974  *   Pointer to operation-specific structure.
2975  *
2976  * @return
2977  *   0 on success, errno value on failure.
2978  */
2979 static int
2980 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2981 {
2982         enum rte_fdir_mode fdir_mode =
2983                 priv->dev->data->dev_conf.fdir_conf.mode;
2984         int ret = 0;
2985
2986         if (filter_op == RTE_ETH_FILTER_NOP)
2987                 return 0;
2988         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2989             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2990                 ERROR("%p: flow director mode %d not supported",
2991                       (void *)priv, fdir_mode);
2992                 return EINVAL;
2993         }
2994         switch (filter_op) {
2995         case RTE_ETH_FILTER_ADD:
2996                 ret = priv_fdir_filter_add(priv, arg);
2997                 break;
2998         case RTE_ETH_FILTER_UPDATE:
2999                 ret = priv_fdir_filter_update(priv, arg);
3000                 break;
3001         case RTE_ETH_FILTER_DELETE:
3002                 ret = priv_fdir_filter_delete(priv, arg);
3003                 break;
3004         case RTE_ETH_FILTER_FLUSH:
3005                 priv_fdir_filter_flush(priv);
3006                 break;
3007         case RTE_ETH_FILTER_INFO:
3008                 priv_fdir_info_get(priv, arg);
3009                 break;
3010         default:
3011                 DEBUG("%p: unknown operation %u", (void *)priv,
3012                       filter_op);
3013                 ret = EINVAL;
3014                 break;
3015         }
3016         return ret;
3017 }
3018
3019 /**
3020  * Manage filter operations.
3021  *
3022  * @param dev
3023  *   Pointer to Ethernet device structure.
3024  * @param filter_type
3025  *   Filter type.
3026  * @param filter_op
3027  *   Operation to perform.
3028  * @param arg
3029  *   Pointer to operation-specific structure.
3030  *
3031  * @return
3032  *   0 on success, negative errno value on failure.
3033  */
3034 int
3035 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3036                      enum rte_filter_type filter_type,
3037                      enum rte_filter_op filter_op,
3038                      void *arg)
3039 {
3040         int ret = EINVAL;
3041         struct priv *priv = dev->data->dev_private;
3042
3043         switch (filter_type) {
3044         case RTE_ETH_FILTER_GENERIC:
3045                 if (filter_op != RTE_ETH_FILTER_GET)
3046                         return -EINVAL;
3047                 *(const void **)arg = &mlx5_flow_ops;
3048                 return 0;
3049         case RTE_ETH_FILTER_FDIR:
3050                 priv_lock(priv);
3051                 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3052                 priv_unlock(priv);
3053                 break;
3054         default:
3055                 ERROR("%p: filter type (%d) not supported",
3056                       (void *)dev, filter_type);
3057                 break;
3058         }
3059         return -ret;
3060 }