net/mlx5: use SPDX tags in 6WIND copyrighted files
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox.
4  */
5
6 #include <sys/queue.h>
7 #include <string.h>
8
9 /* Verbs header. */
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
11 #ifdef PEDANTIC
12 #pragma GCC diagnostic ignored "-Wpedantic"
13 #endif
14 #include <infiniband/verbs.h>
15 #ifdef PEDANTIC
16 #pragma GCC diagnostic error "-Wpedantic"
17 #endif
18
19 #include <rte_ethdev_driver.h>
20 #include <rte_flow.h>
21 #include <rte_flow_driver.h>
22 #include <rte_malloc.h>
23 #include <rte_ip.h>
24
25 #include "mlx5.h"
26 #include "mlx5_defs.h"
27 #include "mlx5_prm.h"
28 #include "mlx5_glue.h"
29
30 /* Define minimal priority for control plane flows. */
31 #define MLX5_CTRL_FLOW_PRIORITY 4
32
33 /* Internet Protocol versions. */
34 #define MLX5_IPV4 4
35 #define MLX5_IPV6 6
36
37 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
38 struct ibv_flow_spec_counter_action {
39         int dummy;
40 };
41 #endif
42
43 /* Dev ops structure defined in mlx5.c */
44 extern const struct eth_dev_ops mlx5_dev_ops;
45 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
46
47 static int
48 mlx5_flow_create_eth(const struct rte_flow_item *item,
49                      const void *default_mask,
50                      void *data);
51
52 static int
53 mlx5_flow_create_vlan(const struct rte_flow_item *item,
54                       const void *default_mask,
55                       void *data);
56
57 static int
58 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
59                       const void *default_mask,
60                       void *data);
61
62 static int
63 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
64                       const void *default_mask,
65                       void *data);
66
67 static int
68 mlx5_flow_create_udp(const struct rte_flow_item *item,
69                      const void *default_mask,
70                      void *data);
71
72 static int
73 mlx5_flow_create_tcp(const struct rte_flow_item *item,
74                      const void *default_mask,
75                      void *data);
76
77 static int
78 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
79                        const void *default_mask,
80                        void *data);
81
82 struct mlx5_flow_parse;
83
84 static void
85 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
86                       unsigned int size);
87
88 static int
89 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
90
91 static int
92 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
93
94 /* Hash RX queue types. */
95 enum hash_rxq_type {
96         HASH_RXQ_TCPV4,
97         HASH_RXQ_UDPV4,
98         HASH_RXQ_IPV4,
99         HASH_RXQ_TCPV6,
100         HASH_RXQ_UDPV6,
101         HASH_RXQ_IPV6,
102         HASH_RXQ_ETH,
103 };
104
105 /* Initialization data for hash RX queue. */
106 struct hash_rxq_init {
107         uint64_t hash_fields; /* Fields that participate in the hash. */
108         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
109         unsigned int flow_priority; /* Flow priority to use. */
110         unsigned int ip_version; /* Internet protocol. */
111 };
112
113 /* Initialization data for hash RX queues. */
114 const struct hash_rxq_init hash_rxq_init[] = {
115         [HASH_RXQ_TCPV4] = {
116                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
117                                 IBV_RX_HASH_DST_IPV4 |
118                                 IBV_RX_HASH_SRC_PORT_TCP |
119                                 IBV_RX_HASH_DST_PORT_TCP),
120                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
121                 .flow_priority = 0,
122                 .ip_version = MLX5_IPV4,
123         },
124         [HASH_RXQ_UDPV4] = {
125                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
126                                 IBV_RX_HASH_DST_IPV4 |
127                                 IBV_RX_HASH_SRC_PORT_UDP |
128                                 IBV_RX_HASH_DST_PORT_UDP),
129                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
130                 .flow_priority = 0,
131                 .ip_version = MLX5_IPV4,
132         },
133         [HASH_RXQ_IPV4] = {
134                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135                                 IBV_RX_HASH_DST_IPV4),
136                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
137                                 ETH_RSS_FRAG_IPV4),
138                 .flow_priority = 1,
139                 .ip_version = MLX5_IPV4,
140         },
141         [HASH_RXQ_TCPV6] = {
142                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
143                                 IBV_RX_HASH_DST_IPV6 |
144                                 IBV_RX_HASH_SRC_PORT_TCP |
145                                 IBV_RX_HASH_DST_PORT_TCP),
146                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
147                 .flow_priority = 0,
148                 .ip_version = MLX5_IPV6,
149         },
150         [HASH_RXQ_UDPV6] = {
151                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
152                                 IBV_RX_HASH_DST_IPV6 |
153                                 IBV_RX_HASH_SRC_PORT_UDP |
154                                 IBV_RX_HASH_DST_PORT_UDP),
155                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
156                 .flow_priority = 0,
157                 .ip_version = MLX5_IPV6,
158         },
159         [HASH_RXQ_IPV6] = {
160                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161                                 IBV_RX_HASH_DST_IPV6),
162                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
163                                 ETH_RSS_FRAG_IPV6),
164                 .flow_priority = 1,
165                 .ip_version = MLX5_IPV6,
166         },
167         [HASH_RXQ_ETH] = {
168                 .hash_fields = 0,
169                 .dpdk_rss_hf = 0,
170                 .flow_priority = 2,
171         },
172 };
173
174 /* Number of entries in hash_rxq_init[]. */
175 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
176
177 /** Structure for holding counter stats. */
178 struct mlx5_flow_counter_stats {
179         uint64_t hits; /**< Number of packets matched by the rule. */
180         uint64_t bytes; /**< Number of bytes matched by the rule. */
181 };
182
183 /** Structure for Drop queue. */
184 struct mlx5_hrxq_drop {
185         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
186         struct ibv_qp *qp; /**< Verbs queue pair. */
187         struct ibv_wq *wq; /**< Verbs work queue. */
188         struct ibv_cq *cq; /**< Verbs completion queue. */
189 };
190
191 /* Flows structures. */
192 struct mlx5_flow {
193         uint64_t hash_fields; /**< Fields that participate in the hash. */
194         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
195         struct ibv_flow *ibv_flow; /**< Verbs flow. */
196         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
197 };
198
199 /* Drop flows structures. */
200 struct mlx5_flow_drop {
201         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
202         struct ibv_flow *ibv_flow; /**< Verbs flow. */
203 };
204
205 struct rte_flow {
206         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
207         uint32_t mark:1; /**< Set if the flow is marked. */
208         uint32_t drop:1; /**< Drop queue. */
209         uint16_t queues_n; /**< Number of entries in queue[]. */
210         uint16_t (*queues)[]; /**< Queues indexes to use. */
211         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
212         uint8_t rss_key[40]; /**< copy of the RSS key. */
213         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
214         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
215         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
216         /**< Flow with Rx queue. */
217 };
218
219 /** Static initializer for items. */
220 #define ITEMS(...) \
221         (const enum rte_flow_item_type []){ \
222                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
223         }
224
225 /** Structure to generate a simple graph of layers supported by the NIC. */
226 struct mlx5_flow_items {
227         /** List of possible actions for these items. */
228         const enum rte_flow_action_type *const actions;
229         /** Bit-masks corresponding to the possibilities for the item. */
230         const void *mask;
231         /**
232          * Default bit-masks to use when item->mask is not provided. When
233          * \default_mask is also NULL, the full supported bit-mask (\mask) is
234          * used instead.
235          */
236         const void *default_mask;
237         /** Bit-masks size in bytes. */
238         const unsigned int mask_sz;
239         /**
240          * Conversion function from rte_flow to NIC specific flow.
241          *
242          * @param item
243          *   rte_flow item to convert.
244          * @param default_mask
245          *   Default bit-masks to use when item->mask is not provided.
246          * @param data
247          *   Internal structure to store the conversion.
248          *
249          * @return
250          *   0 on success, negative value otherwise.
251          */
252         int (*convert)(const struct rte_flow_item *item,
253                        const void *default_mask,
254                        void *data);
255         /** Size in bytes of the destination structure. */
256         const unsigned int dst_sz;
257         /** List of possible following items.  */
258         const enum rte_flow_item_type *const items;
259 };
260
261 /** Valid action for this PMD. */
262 static const enum rte_flow_action_type valid_actions[] = {
263         RTE_FLOW_ACTION_TYPE_DROP,
264         RTE_FLOW_ACTION_TYPE_QUEUE,
265         RTE_FLOW_ACTION_TYPE_MARK,
266         RTE_FLOW_ACTION_TYPE_FLAG,
267 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
268         RTE_FLOW_ACTION_TYPE_COUNT,
269 #endif
270         RTE_FLOW_ACTION_TYPE_END,
271 };
272
273 /** Graph of supported items and associated actions. */
274 static const struct mlx5_flow_items mlx5_flow_items[] = {
275         [RTE_FLOW_ITEM_TYPE_END] = {
276                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
277                                RTE_FLOW_ITEM_TYPE_VXLAN),
278         },
279         [RTE_FLOW_ITEM_TYPE_ETH] = {
280                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
281                                RTE_FLOW_ITEM_TYPE_IPV4,
282                                RTE_FLOW_ITEM_TYPE_IPV6),
283                 .actions = valid_actions,
284                 .mask = &(const struct rte_flow_item_eth){
285                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
286                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
287                         .type = -1,
288                 },
289                 .default_mask = &rte_flow_item_eth_mask,
290                 .mask_sz = sizeof(struct rte_flow_item_eth),
291                 .convert = mlx5_flow_create_eth,
292                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
293         },
294         [RTE_FLOW_ITEM_TYPE_VLAN] = {
295                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
296                                RTE_FLOW_ITEM_TYPE_IPV6),
297                 .actions = valid_actions,
298                 .mask = &(const struct rte_flow_item_vlan){
299                         .tci = -1,
300                 },
301                 .default_mask = &rte_flow_item_vlan_mask,
302                 .mask_sz = sizeof(struct rte_flow_item_vlan),
303                 .convert = mlx5_flow_create_vlan,
304                 .dst_sz = 0,
305         },
306         [RTE_FLOW_ITEM_TYPE_IPV4] = {
307                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
308                                RTE_FLOW_ITEM_TYPE_TCP),
309                 .actions = valid_actions,
310                 .mask = &(const struct rte_flow_item_ipv4){
311                         .hdr = {
312                                 .src_addr = -1,
313                                 .dst_addr = -1,
314                                 .type_of_service = -1,
315                                 .next_proto_id = -1,
316                                 .time_to_live = -1,
317                         },
318                 },
319                 .default_mask = &rte_flow_item_ipv4_mask,
320                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
321                 .convert = mlx5_flow_create_ipv4,
322                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
323         },
324         [RTE_FLOW_ITEM_TYPE_IPV6] = {
325                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
326                                RTE_FLOW_ITEM_TYPE_TCP),
327                 .actions = valid_actions,
328                 .mask = &(const struct rte_flow_item_ipv6){
329                         .hdr = {
330                                 .src_addr = {
331                                         0xff, 0xff, 0xff, 0xff,
332                                         0xff, 0xff, 0xff, 0xff,
333                                         0xff, 0xff, 0xff, 0xff,
334                                         0xff, 0xff, 0xff, 0xff,
335                                 },
336                                 .dst_addr = {
337                                         0xff, 0xff, 0xff, 0xff,
338                                         0xff, 0xff, 0xff, 0xff,
339                                         0xff, 0xff, 0xff, 0xff,
340                                         0xff, 0xff, 0xff, 0xff,
341                                 },
342                                 .vtc_flow = -1,
343                                 .proto = -1,
344                                 .hop_limits = -1,
345                         },
346                 },
347                 .default_mask = &rte_flow_item_ipv6_mask,
348                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
349                 .convert = mlx5_flow_create_ipv6,
350                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
351         },
352         [RTE_FLOW_ITEM_TYPE_UDP] = {
353                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
354                 .actions = valid_actions,
355                 .mask = &(const struct rte_flow_item_udp){
356                         .hdr = {
357                                 .src_port = -1,
358                                 .dst_port = -1,
359                         },
360                 },
361                 .default_mask = &rte_flow_item_udp_mask,
362                 .mask_sz = sizeof(struct rte_flow_item_udp),
363                 .convert = mlx5_flow_create_udp,
364                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
365         },
366         [RTE_FLOW_ITEM_TYPE_TCP] = {
367                 .actions = valid_actions,
368                 .mask = &(const struct rte_flow_item_tcp){
369                         .hdr = {
370                                 .src_port = -1,
371                                 .dst_port = -1,
372                         },
373                 },
374                 .default_mask = &rte_flow_item_tcp_mask,
375                 .mask_sz = sizeof(struct rte_flow_item_tcp),
376                 .convert = mlx5_flow_create_tcp,
377                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
378         },
379         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
380                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
381                 .actions = valid_actions,
382                 .mask = &(const struct rte_flow_item_vxlan){
383                         .vni = "\xff\xff\xff",
384                 },
385                 .default_mask = &rte_flow_item_vxlan_mask,
386                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
387                 .convert = mlx5_flow_create_vxlan,
388                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
389         },
390 };
391
392 /** Structure to pass to the conversion function. */
393 struct mlx5_flow_parse {
394         uint32_t inner; /**< Set once VXLAN is encountered. */
395         uint32_t allmulti:1; /**< Set once allmulti dst MAC is encountered. */
396         uint32_t create:1;
397         /**< Whether resources should remain after a validate. */
398         uint32_t drop:1; /**< Target is a drop queue. */
399         uint32_t mark:1; /**< Mark is present in the flow. */
400         uint32_t count:1; /**< Count is present in the flow. */
401         uint32_t mark_id; /**< Mark identifier. */
402         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
403         uint16_t queues_n; /**< Number of entries in queue[]. */
404         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
405         uint8_t rss_key[40]; /**< copy of the RSS key. */
406         enum hash_rxq_type layer; /**< Last pattern layer detected. */
407         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
408         struct {
409                 struct ibv_flow_attr *ibv_attr;
410                 /**< Pointer to Verbs attributes. */
411                 unsigned int offset;
412                 /**< Current position or total size of the attribute. */
413         } queue[RTE_DIM(hash_rxq_init)];
414 };
415
416 static const struct rte_flow_ops mlx5_flow_ops = {
417         .validate = mlx5_flow_validate,
418         .create = mlx5_flow_create,
419         .destroy = mlx5_flow_destroy,
420         .flush = mlx5_flow_flush,
421 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
422         .query = mlx5_flow_query,
423 #else
424         .query = NULL,
425 #endif
426         .isolate = mlx5_flow_isolate,
427 };
428
429 /* Convert FDIR request to Generic flow. */
430 struct mlx5_fdir {
431         struct rte_flow_attr attr;
432         struct rte_flow_action actions[2];
433         struct rte_flow_item items[4];
434         struct rte_flow_item_eth l2;
435         struct rte_flow_item_eth l2_mask;
436         union {
437                 struct rte_flow_item_ipv4 ipv4;
438                 struct rte_flow_item_ipv6 ipv6;
439         } l3;
440         union {
441                 struct rte_flow_item_udp udp;
442                 struct rte_flow_item_tcp tcp;
443         } l4;
444         struct rte_flow_action_queue queue;
445 };
446
447 /* Verbs specification header. */
448 struct ibv_spec_header {
449         enum ibv_flow_spec_type type;
450         uint16_t size;
451 };
452
453 /**
454  * Check support for a given item.
455  *
456  * @param item[in]
457  *   Item specification.
458  * @param mask[in]
459  *   Bit-masks covering supported fields to compare with spec, last and mask in
460  *   \item.
461  * @param size
462  *   Bit-Mask size in bytes.
463  *
464  * @return
465  *   0 on success.
466  */
467 static int
468 mlx5_flow_item_validate(const struct rte_flow_item *item,
469                         const uint8_t *mask, unsigned int size)
470 {
471         int ret = 0;
472
473         if (!item->spec && (item->mask || item->last))
474                 return -1;
475         if (item->spec && !item->mask) {
476                 unsigned int i;
477                 const uint8_t *spec = item->spec;
478
479                 for (i = 0; i < size; ++i)
480                         if ((spec[i] | mask[i]) != mask[i])
481                                 return -1;
482         }
483         if (item->last && !item->mask) {
484                 unsigned int i;
485                 const uint8_t *spec = item->last;
486
487                 for (i = 0; i < size; ++i)
488                         if ((spec[i] | mask[i]) != mask[i])
489                                 return -1;
490         }
491         if (item->mask) {
492                 unsigned int i;
493                 const uint8_t *spec = item->spec;
494
495                 for (i = 0; i < size; ++i)
496                         if ((spec[i] | mask[i]) != mask[i])
497                                 return -1;
498         }
499         if (item->spec && item->last) {
500                 uint8_t spec[size];
501                 uint8_t last[size];
502                 const uint8_t *apply = mask;
503                 unsigned int i;
504
505                 if (item->mask)
506                         apply = item->mask;
507                 for (i = 0; i < size; ++i) {
508                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
509                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
510                 }
511                 ret = memcmp(spec, last, size);
512         }
513         return ret;
514 }
515
516 /**
517  * Copy the RSS configuration from the user ones.
518  *
519  * @param priv
520  *   Pointer to private structure.
521  * @param parser
522  *   Internal parser structure.
523  * @param rss_conf
524  *   User RSS configuration to save.
525  *
526  * @return
527  *   0 on success, errno value on failure.
528  */
529 static int
530 priv_flow_convert_rss_conf(struct priv *priv,
531                            struct mlx5_flow_parse *parser,
532                            const struct rte_eth_rss_conf *rss_conf)
533 {
534         const struct rte_eth_rss_conf *rss;
535
536         if (rss_conf) {
537                 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
538                         return EINVAL;
539                 rss = rss_conf;
540         } else {
541                 rss = &priv->rss_conf;
542         }
543         if (rss->rss_key_len > 40)
544                 return EINVAL;
545         parser->rss_conf.rss_key_len = rss->rss_key_len;
546         parser->rss_conf.rss_hf = rss->rss_hf;
547         memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
548         parser->rss_conf.rss_key = parser->rss_key;
549         return 0;
550 }
551
552 /**
553  * Extract attribute to the parser.
554  *
555  * @param priv
556  *   Pointer to private structure.
557  * @param[in] attr
558  *   Flow rule attributes.
559  * @param[out] error
560  *   Perform verbose error reporting if not NULL.
561  * @param[in, out] parser
562  *   Internal parser structure.
563  *
564  * @return
565  *   0 on success, a negative errno value otherwise and rte_errno is set.
566  */
567 static int
568 priv_flow_convert_attributes(struct priv *priv,
569                              const struct rte_flow_attr *attr,
570                              struct rte_flow_error *error,
571                              struct mlx5_flow_parse *parser)
572 {
573         (void)priv;
574         (void)parser;
575         if (attr->group) {
576                 rte_flow_error_set(error, ENOTSUP,
577                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
578                                    NULL,
579                                    "groups are not supported");
580                 return -rte_errno;
581         }
582         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
583                 rte_flow_error_set(error, ENOTSUP,
584                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
585                                    NULL,
586                                    "priorities are not supported");
587                 return -rte_errno;
588         }
589         if (attr->egress) {
590                 rte_flow_error_set(error, ENOTSUP,
591                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
592                                    NULL,
593                                    "egress is not supported");
594                 return -rte_errno;
595         }
596         if (!attr->ingress) {
597                 rte_flow_error_set(error, ENOTSUP,
598                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
599                                    NULL,
600                                    "only ingress is supported");
601                 return -rte_errno;
602         }
603         return 0;
604 }
605
606 /**
607  * Extract actions request to the parser.
608  *
609  * @param priv
610  *   Pointer to private structure.
611  * @param[in] actions
612  *   Associated actions (list terminated by the END action).
613  * @param[out] error
614  *   Perform verbose error reporting if not NULL.
615  * @param[in, out] parser
616  *   Internal parser structure.
617  *
618  * @return
619  *   0 on success, a negative errno value otherwise and rte_errno is set.
620  */
621 static int
622 priv_flow_convert_actions(struct priv *priv,
623                           const struct rte_flow_action actions[],
624                           struct rte_flow_error *error,
625                           struct mlx5_flow_parse *parser)
626 {
627         /*
628          * Add default RSS configuration necessary for Verbs to create QP even
629          * if no RSS is necessary.
630          */
631         priv_flow_convert_rss_conf(priv, parser,
632                                    (const struct rte_eth_rss_conf *)
633                                    &priv->rss_conf);
634         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
635                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
636                         continue;
637                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
638                         parser->drop = 1;
639                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
640                         const struct rte_flow_action_queue *queue =
641                                 (const struct rte_flow_action_queue *)
642                                 actions->conf;
643                         uint16_t n;
644                         uint16_t found = 0;
645
646                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
647                                 goto exit_action_not_supported;
648                         for (n = 0; n < parser->queues_n; ++n) {
649                                 if (parser->queues[n] == queue->index) {
650                                         found = 1;
651                                         break;
652                                 }
653                         }
654                         if (parser->queues_n > 1 && !found) {
655                                 rte_flow_error_set(error, ENOTSUP,
656                                            RTE_FLOW_ERROR_TYPE_ACTION,
657                                            actions,
658                                            "queue action not in RSS queues");
659                                 return -rte_errno;
660                         }
661                         if (!found) {
662                                 parser->queues_n = 1;
663                                 parser->queues[0] = queue->index;
664                         }
665                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
666                         const struct rte_flow_action_rss *rss =
667                                 (const struct rte_flow_action_rss *)
668                                 actions->conf;
669                         uint16_t n;
670
671                         if (!rss || !rss->num) {
672                                 rte_flow_error_set(error, EINVAL,
673                                                    RTE_FLOW_ERROR_TYPE_ACTION,
674                                                    actions,
675                                                    "no valid queues");
676                                 return -rte_errno;
677                         }
678                         if (parser->queues_n == 1) {
679                                 uint16_t found = 0;
680
681                                 assert(parser->queues_n);
682                                 for (n = 0; n < rss->num; ++n) {
683                                         if (parser->queues[0] ==
684                                             rss->queue[n]) {
685                                                 found = 1;
686                                                 break;
687                                         }
688                                 }
689                                 if (!found) {
690                                         rte_flow_error_set(error, ENOTSUP,
691                                                    RTE_FLOW_ERROR_TYPE_ACTION,
692                                                    actions,
693                                                    "queue action not in RSS"
694                                                    " queues");
695                                         return -rte_errno;
696                                 }
697                         }
698                         for (n = 0; n < rss->num; ++n) {
699                                 if (rss->queue[n] >= priv->rxqs_n) {
700                                         rte_flow_error_set(error, EINVAL,
701                                                    RTE_FLOW_ERROR_TYPE_ACTION,
702                                                    actions,
703                                                    "queue id > number of"
704                                                    " queues");
705                                         return -rte_errno;
706                                 }
707                         }
708                         for (n = 0; n < rss->num; ++n)
709                                 parser->queues[n] = rss->queue[n];
710                         parser->queues_n = rss->num;
711                         if (priv_flow_convert_rss_conf(priv, parser,
712                                                        rss->rss_conf)) {
713                                 rte_flow_error_set(error, EINVAL,
714                                                    RTE_FLOW_ERROR_TYPE_ACTION,
715                                                    actions,
716                                                    "wrong RSS configuration");
717                                 return -rte_errno;
718                         }
719                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
720                         const struct rte_flow_action_mark *mark =
721                                 (const struct rte_flow_action_mark *)
722                                 actions->conf;
723
724                         if (!mark) {
725                                 rte_flow_error_set(error, EINVAL,
726                                                    RTE_FLOW_ERROR_TYPE_ACTION,
727                                                    actions,
728                                                    "mark must be defined");
729                                 return -rte_errno;
730                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
731                                 rte_flow_error_set(error, ENOTSUP,
732                                                    RTE_FLOW_ERROR_TYPE_ACTION,
733                                                    actions,
734                                                    "mark must be between 0"
735                                                    " and 16777199");
736                                 return -rte_errno;
737                         }
738                         parser->mark = 1;
739                         parser->mark_id = mark->id;
740                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
741                         parser->mark = 1;
742                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
743                            priv->config.flow_counter_en) {
744                         parser->count = 1;
745                 } else {
746                         goto exit_action_not_supported;
747                 }
748         }
749         if (parser->drop && parser->mark)
750                 parser->mark = 0;
751         if (!parser->queues_n && !parser->drop) {
752                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
753                                    NULL, "no valid action");
754                 return -rte_errno;
755         }
756         return 0;
757 exit_action_not_supported:
758         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
759                            actions, "action not supported");
760         return -rte_errno;
761 }
762
763 /**
764  * Validate items.
765  *
766  * @param priv
767  *   Pointer to private structure.
768  * @param[in] items
769  *   Pattern specification (list terminated by the END pattern item).
770  * @param[out] error
771  *   Perform verbose error reporting if not NULL.
772  * @param[in, out] parser
773  *   Internal parser structure.
774  *
775  * @return
776  *   0 on success, a negative errno value otherwise and rte_errno is set.
777  */
778 static int
779 priv_flow_convert_items_validate(struct priv *priv,
780                                  const struct rte_flow_item items[],
781                                  struct rte_flow_error *error,
782                                  struct mlx5_flow_parse *parser)
783 {
784         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
785         unsigned int i;
786
787         (void)priv;
788         /* Initialise the offsets to start after verbs attribute. */
789         for (i = 0; i != hash_rxq_init_n; ++i)
790                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
791         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
792                 const struct mlx5_flow_items *token = NULL;
793                 unsigned int n;
794                 int err;
795
796                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
797                         continue;
798                 for (i = 0;
799                      cur_item->items &&
800                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
801                      ++i) {
802                         if (cur_item->items[i] == items->type) {
803                                 token = &mlx5_flow_items[items->type];
804                                 break;
805                         }
806                 }
807                 if (!token)
808                         goto exit_item_not_supported;
809                 cur_item = token;
810                 err = mlx5_flow_item_validate(items,
811                                               (const uint8_t *)cur_item->mask,
812                                               cur_item->mask_sz);
813                 if (err)
814                         goto exit_item_not_supported;
815                 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
816                         if (parser->inner) {
817                                 rte_flow_error_set(error, ENOTSUP,
818                                                    RTE_FLOW_ERROR_TYPE_ITEM,
819                                                    items,
820                                                    "cannot recognize multiple"
821                                                    " VXLAN encapsulations");
822                                 return -rte_errno;
823                         }
824                         parser->inner = IBV_FLOW_SPEC_INNER;
825                 }
826                 if (parser->drop || parser->queues_n == 1) {
827                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
828                 } else {
829                         for (n = 0; n != hash_rxq_init_n; ++n)
830                                 parser->queue[n].offset += cur_item->dst_sz;
831                 }
832         }
833         if (parser->drop) {
834                 parser->queue[HASH_RXQ_ETH].offset +=
835                         sizeof(struct ibv_flow_spec_action_drop);
836         }
837         if (parser->mark) {
838                 for (i = 0; i != hash_rxq_init_n; ++i)
839                         parser->queue[i].offset +=
840                                 sizeof(struct ibv_flow_spec_action_tag);
841         }
842         if (parser->count) {
843                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
844
845                 for (i = 0; i != hash_rxq_init_n; ++i)
846                         parser->queue[i].offset += size;
847         }
848         return 0;
849 exit_item_not_supported:
850         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
851                            items, "item not supported");
852         return -rte_errno;
853 }
854
855 /**
856  * Allocate memory space to store verbs flow attributes.
857  *
858  * @param priv
859  *   Pointer to private structure.
860  * @param[in] priority
861  *   Flow priority.
862  * @param[in] size
863  *   Amount of byte to allocate.
864  * @param[out] error
865  *   Perform verbose error reporting if not NULL.
866  *
867  * @return
868  *   A verbs flow attribute on success, NULL otherwise.
869  */
870 static struct ibv_flow_attr*
871 priv_flow_convert_allocate(struct priv *priv,
872                            unsigned int priority,
873                            unsigned int size,
874                            struct rte_flow_error *error)
875 {
876         struct ibv_flow_attr *ibv_attr;
877
878         (void)priv;
879         ibv_attr = rte_calloc(__func__, 1, size, 0);
880         if (!ibv_attr) {
881                 rte_flow_error_set(error, ENOMEM,
882                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
883                                    NULL,
884                                    "cannot allocate verbs spec attributes.");
885                 return NULL;
886         }
887         ibv_attr->priority = priority;
888         return ibv_attr;
889 }
890
891 /**
892  * Finalise verbs flow attributes.
893  *
894  * @param priv
895  *   Pointer to private structure.
896  * @param[in, out] parser
897  *   Internal parser structure.
898  */
899 static void
900 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
901 {
902         const unsigned int ipv4 =
903                 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
904         const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
905         const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
906         const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
907         const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
908         const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
909         unsigned int i;
910
911         (void)priv;
912         if (parser->layer == HASH_RXQ_ETH) {
913                 goto fill;
914         } else {
915                 /*
916                  * This layer becomes useless as the pattern define under
917                  * layers.
918                  */
919                 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
920                 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
921         }
922         /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
923         for (i = ohmin; i != (ohmax + 1); ++i) {
924                 if (!parser->queue[i].ibv_attr)
925                         continue;
926                 rte_free(parser->queue[i].ibv_attr);
927                 parser->queue[i].ibv_attr = NULL;
928         }
929         /* Remove impossible flow according to the RSS configuration. */
930         if (hash_rxq_init[parser->layer].dpdk_rss_hf &
931             parser->rss_conf.rss_hf) {
932                 /* Remove any other flow. */
933                 for (i = hmin; i != (hmax + 1); ++i) {
934                         if ((i == parser->layer) ||
935                              (!parser->queue[i].ibv_attr))
936                                 continue;
937                         rte_free(parser->queue[i].ibv_attr);
938                         parser->queue[i].ibv_attr = NULL;
939                 }
940         } else  if (!parser->queue[ip].ibv_attr) {
941                 /* no RSS possible with the current configuration. */
942                 parser->queues_n = 1;
943                 return;
944         }
945 fill:
946         /*
947          * Fill missing layers in verbs specifications, or compute the correct
948          * offset to allocate the memory space for the attributes and
949          * specifications.
950          */
951         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
952                 union {
953                         struct ibv_flow_spec_ipv4_ext ipv4;
954                         struct ibv_flow_spec_ipv6 ipv6;
955                         struct ibv_flow_spec_tcp_udp udp_tcp;
956                 } specs;
957                 void *dst;
958                 uint16_t size;
959
960                 if (i == parser->layer)
961                         continue;
962                 if (parser->layer == HASH_RXQ_ETH) {
963                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
964                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
965                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
966                                         .type = IBV_FLOW_SPEC_IPV4_EXT,
967                                         .size = size,
968                                 };
969                         } else {
970                                 size = sizeof(struct ibv_flow_spec_ipv6);
971                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
972                                         .type = IBV_FLOW_SPEC_IPV6,
973                                         .size = size,
974                                 };
975                         }
976                         if (parser->queue[i].ibv_attr) {
977                                 dst = (void *)((uintptr_t)
978                                                parser->queue[i].ibv_attr +
979                                                parser->queue[i].offset);
980                                 memcpy(dst, &specs, size);
981                                 ++parser->queue[i].ibv_attr->num_of_specs;
982                         }
983                         parser->queue[i].offset += size;
984                 }
985                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
986                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
987                         size = sizeof(struct ibv_flow_spec_tcp_udp);
988                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
989                                 .type = ((i == HASH_RXQ_UDPV4 ||
990                                           i == HASH_RXQ_UDPV6) ?
991                                          IBV_FLOW_SPEC_UDP :
992                                          IBV_FLOW_SPEC_TCP),
993                                 .size = size,
994                         };
995                         if (parser->queue[i].ibv_attr) {
996                                 dst = (void *)((uintptr_t)
997                                                parser->queue[i].ibv_attr +
998                                                parser->queue[i].offset);
999                                 memcpy(dst, &specs, size);
1000                                 ++parser->queue[i].ibv_attr->num_of_specs;
1001                         }
1002                         parser->queue[i].offset += size;
1003                 }
1004         }
1005 }
1006
1007 /**
1008  * Validate and convert a flow supported by the NIC.
1009  *
1010  * @param priv
1011  *   Pointer to private structure.
1012  * @param[in] attr
1013  *   Flow rule attributes.
1014  * @param[in] pattern
1015  *   Pattern specification (list terminated by the END pattern item).
1016  * @param[in] actions
1017  *   Associated actions (list terminated by the END action).
1018  * @param[out] error
1019  *   Perform verbose error reporting if not NULL.
1020  * @param[in, out] parser
1021  *   Internal parser structure.
1022  *
1023  * @return
1024  *   0 on success, a negative errno value otherwise and rte_errno is set.
1025  */
1026 static int
1027 priv_flow_convert(struct priv *priv,
1028                   const struct rte_flow_attr *attr,
1029                   const struct rte_flow_item items[],
1030                   const struct rte_flow_action actions[],
1031                   struct rte_flow_error *error,
1032                   struct mlx5_flow_parse *parser)
1033 {
1034         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1035         unsigned int i;
1036         int ret;
1037
1038         /* First step. Validate the attributes, items and actions. */
1039         *parser = (struct mlx5_flow_parse){
1040                 .create = parser->create,
1041                 .layer = HASH_RXQ_ETH,
1042                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1043         };
1044         ret = priv_flow_convert_attributes(priv, attr, error, parser);
1045         if (ret)
1046                 return ret;
1047         ret = priv_flow_convert_actions(priv, actions, error, parser);
1048         if (ret)
1049                 return ret;
1050         ret = priv_flow_convert_items_validate(priv, items, error, parser);
1051         if (ret)
1052                 return ret;
1053         priv_flow_convert_finalise(priv, parser);
1054         /*
1055          * Second step.
1056          * Allocate the memory space to store verbs specifications.
1057          */
1058         if (parser->drop || parser->queues_n == 1) {
1059                 unsigned int priority =
1060                         attr->priority +
1061                         hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1062                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1063
1064                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1065                         priv_flow_convert_allocate(priv, priority,
1066                                                    offset, error);
1067                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1068                         return ENOMEM;
1069                 parser->queue[HASH_RXQ_ETH].offset =
1070                         sizeof(struct ibv_flow_attr);
1071         } else {
1072                 for (i = 0; i != hash_rxq_init_n; ++i) {
1073                         unsigned int priority =
1074                                 attr->priority +
1075                                 hash_rxq_init[i].flow_priority;
1076                         unsigned int offset;
1077
1078                         if (!(parser->rss_conf.rss_hf &
1079                               hash_rxq_init[i].dpdk_rss_hf) &&
1080                             (i != HASH_RXQ_ETH))
1081                                 continue;
1082                         offset = parser->queue[i].offset;
1083                         parser->queue[i].ibv_attr =
1084                                 priv_flow_convert_allocate(priv, priority,
1085                                                            offset, error);
1086                         if (!parser->queue[i].ibv_attr)
1087                                 goto exit_enomem;
1088                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1089                 }
1090         }
1091         /* Third step. Conversion parse, fill the specifications. */
1092         parser->inner = 0;
1093         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1094                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1095                         continue;
1096                 cur_item = &mlx5_flow_items[items->type];
1097                 ret = cur_item->convert(items,
1098                                         (cur_item->default_mask ?
1099                                          cur_item->default_mask :
1100                                          cur_item->mask),
1101                                         parser);
1102                 if (ret) {
1103                         rte_flow_error_set(error, ret,
1104                                            RTE_FLOW_ERROR_TYPE_ITEM,
1105                                            items, "item not supported");
1106                         goto exit_free;
1107                 }
1108         }
1109         if (parser->mark)
1110                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1111         if (parser->count && parser->create) {
1112                 mlx5_flow_create_count(priv, parser);
1113                 if (!parser->cs)
1114                         goto exit_count_error;
1115         }
1116         /*
1117          * Last step. Complete missing specification to reach the RSS
1118          * configuration.
1119          */
1120         if (parser->queues_n > 1) {
1121                 priv_flow_convert_finalise(priv, parser);
1122         } else {
1123                 /*
1124                  * Action queue have their priority overridden with
1125                  * Ethernet priority, this priority needs to be adjusted to
1126                  * their most specific layer priority.
1127                  */
1128                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1129                         attr->priority +
1130                         hash_rxq_init[parser->layer].flow_priority;
1131         }
1132         if (parser->allmulti &&
1133             parser->layer == HASH_RXQ_ETH) {
1134                 for (i = 0; i != hash_rxq_init_n; ++i) {
1135                         if (!parser->queue[i].ibv_attr)
1136                                 continue;
1137                         if (parser->queue[i].ibv_attr->num_of_specs != 1)
1138                                 break;
1139                         parser->queue[i].ibv_attr->type =
1140                                                 IBV_FLOW_ATTR_MC_DEFAULT;
1141                 }
1142         }
1143 exit_free:
1144         /* Only verification is expected, all resources should be released. */
1145         if (!parser->create) {
1146                 for (i = 0; i != hash_rxq_init_n; ++i) {
1147                         if (parser->queue[i].ibv_attr) {
1148                                 rte_free(parser->queue[i].ibv_attr);
1149                                 parser->queue[i].ibv_attr = NULL;
1150                         }
1151                 }
1152         }
1153         return ret;
1154 exit_enomem:
1155         for (i = 0; i != hash_rxq_init_n; ++i) {
1156                 if (parser->queue[i].ibv_attr) {
1157                         rte_free(parser->queue[i].ibv_attr);
1158                         parser->queue[i].ibv_attr = NULL;
1159                 }
1160         }
1161         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1162                            NULL, "cannot allocate verbs spec attributes.");
1163         return ret;
1164 exit_count_error:
1165         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1166                            NULL, "cannot create counter.");
1167         return rte_errno;
1168 }
1169
1170 /**
1171  * Copy the specification created into the flow.
1172  *
1173  * @param parser
1174  *   Internal parser structure.
1175  * @param src
1176  *   Create specification.
1177  * @param size
1178  *   Size in bytes of the specification to copy.
1179  */
1180 static void
1181 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1182                       unsigned int size)
1183 {
1184         unsigned int i;
1185         void *dst;
1186
1187         for (i = 0; i != hash_rxq_init_n; ++i) {
1188                 if (!parser->queue[i].ibv_attr)
1189                         continue;
1190                 /* Specification must be the same l3 type or none. */
1191                 if (parser->layer == HASH_RXQ_ETH ||
1192                     (hash_rxq_init[parser->layer].ip_version ==
1193                      hash_rxq_init[i].ip_version) ||
1194                     (hash_rxq_init[i].ip_version == 0)) {
1195                         dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1196                                         parser->queue[i].offset);
1197                         memcpy(dst, src, size);
1198                         ++parser->queue[i].ibv_attr->num_of_specs;
1199                         parser->queue[i].offset += size;
1200                 }
1201         }
1202 }
1203
1204 /**
1205  * Convert Ethernet item to Verbs specification.
1206  *
1207  * @param item[in]
1208  *   Item specification.
1209  * @param default_mask[in]
1210  *   Default bit-masks to use when item->mask is not provided.
1211  * @param data[in, out]
1212  *   User structure.
1213  */
1214 static int
1215 mlx5_flow_create_eth(const struct rte_flow_item *item,
1216                      const void *default_mask,
1217                      void *data)
1218 {
1219         const struct rte_flow_item_eth *spec = item->spec;
1220         const struct rte_flow_item_eth *mask = item->mask;
1221         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1222         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1223         struct ibv_flow_spec_eth eth = {
1224                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1225                 .size = eth_size,
1226         };
1227
1228         /* Don't update layer for the inner pattern. */
1229         if (!parser->inner)
1230                 parser->layer = HASH_RXQ_ETH;
1231         if (spec) {
1232                 unsigned int i;
1233
1234                 if (!mask)
1235                         mask = default_mask;
1236                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1237                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1238                 eth.val.ether_type = spec->type;
1239                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1240                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1241                 eth.mask.ether_type = mask->type;
1242                 /* Remove unwanted bits from values. */
1243                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1244                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1245                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1246                 }
1247                 eth.val.ether_type &= eth.mask.ether_type;
1248         }
1249         mlx5_flow_create_copy(parser, &eth, eth_size);
1250         parser->allmulti = eth.val.dst_mac[0] & 1;
1251         return 0;
1252 }
1253
1254 /**
1255  * Convert VLAN item to Verbs specification.
1256  *
1257  * @param item[in]
1258  *   Item specification.
1259  * @param default_mask[in]
1260  *   Default bit-masks to use when item->mask is not provided.
1261  * @param data[in, out]
1262  *   User structure.
1263  */
1264 static int
1265 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1266                       const void *default_mask,
1267                       void *data)
1268 {
1269         const struct rte_flow_item_vlan *spec = item->spec;
1270         const struct rte_flow_item_vlan *mask = item->mask;
1271         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1272         struct ibv_flow_spec_eth *eth;
1273         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1274
1275         if (spec) {
1276                 unsigned int i;
1277                 if (!mask)
1278                         mask = default_mask;
1279
1280                 for (i = 0; i != hash_rxq_init_n; ++i) {
1281                         if (!parser->queue[i].ibv_attr)
1282                                 continue;
1283
1284                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1285                                        parser->queue[i].offset - eth_size);
1286                         eth->val.vlan_tag = spec->tci;
1287                         eth->mask.vlan_tag = mask->tci;
1288                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1289                 }
1290         }
1291         return 0;
1292 }
1293
1294 /**
1295  * Convert IPv4 item to Verbs specification.
1296  *
1297  * @param item[in]
1298  *   Item specification.
1299  * @param default_mask[in]
1300  *   Default bit-masks to use when item->mask is not provided.
1301  * @param data[in, out]
1302  *   User structure.
1303  */
1304 static int
1305 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1306                       const void *default_mask,
1307                       void *data)
1308 {
1309         const struct rte_flow_item_ipv4 *spec = item->spec;
1310         const struct rte_flow_item_ipv4 *mask = item->mask;
1311         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1312         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1313         struct ibv_flow_spec_ipv4_ext ipv4 = {
1314                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1315                 .size = ipv4_size,
1316         };
1317
1318         /* Don't update layer for the inner pattern. */
1319         if (!parser->inner)
1320                 parser->layer = HASH_RXQ_IPV4;
1321         if (spec) {
1322                 if (!mask)
1323                         mask = default_mask;
1324                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1325                         .src_ip = spec->hdr.src_addr,
1326                         .dst_ip = spec->hdr.dst_addr,
1327                         .proto = spec->hdr.next_proto_id,
1328                         .tos = spec->hdr.type_of_service,
1329                 };
1330                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1331                         .src_ip = mask->hdr.src_addr,
1332                         .dst_ip = mask->hdr.dst_addr,
1333                         .proto = mask->hdr.next_proto_id,
1334                         .tos = mask->hdr.type_of_service,
1335                 };
1336                 /* Remove unwanted bits from values. */
1337                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1338                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1339                 ipv4.val.proto &= ipv4.mask.proto;
1340                 ipv4.val.tos &= ipv4.mask.tos;
1341         }
1342         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1343         return 0;
1344 }
1345
1346 /**
1347  * Convert IPv6 item to Verbs specification.
1348  *
1349  * @param item[in]
1350  *   Item specification.
1351  * @param default_mask[in]
1352  *   Default bit-masks to use when item->mask is not provided.
1353  * @param data[in, out]
1354  *   User structure.
1355  */
1356 static int
1357 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1358                       const void *default_mask,
1359                       void *data)
1360 {
1361         const struct rte_flow_item_ipv6 *spec = item->spec;
1362         const struct rte_flow_item_ipv6 *mask = item->mask;
1363         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1364         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1365         struct ibv_flow_spec_ipv6 ipv6 = {
1366                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1367                 .size = ipv6_size,
1368         };
1369
1370         /* Don't update layer for the inner pattern. */
1371         if (!parser->inner)
1372                 parser->layer = HASH_RXQ_IPV6;
1373         if (spec) {
1374                 unsigned int i;
1375                 uint32_t vtc_flow_val;
1376                 uint32_t vtc_flow_mask;
1377
1378                 if (!mask)
1379                         mask = default_mask;
1380                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1381                        RTE_DIM(ipv6.val.src_ip));
1382                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1383                        RTE_DIM(ipv6.val.dst_ip));
1384                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1385                        RTE_DIM(ipv6.mask.src_ip));
1386                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1387                        RTE_DIM(ipv6.mask.dst_ip));
1388                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1389                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1390                 ipv6.val.flow_label =
1391                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1392                                          IPV6_HDR_FL_SHIFT);
1393                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1394                                          IPV6_HDR_TC_SHIFT;
1395                 ipv6.val.next_hdr = spec->hdr.proto;
1396                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1397                 ipv6.mask.flow_label =
1398                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1399                                          IPV6_HDR_FL_SHIFT);
1400                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1401                                           IPV6_HDR_TC_SHIFT;
1402                 ipv6.mask.next_hdr = mask->hdr.proto;
1403                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1404                 /* Remove unwanted bits from values. */
1405                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1406                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1407                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1408                 }
1409                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1410                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1411                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1412                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1413         }
1414         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1415         return 0;
1416 }
1417
1418 /**
1419  * Convert UDP item to Verbs specification.
1420  *
1421  * @param item[in]
1422  *   Item specification.
1423  * @param default_mask[in]
1424  *   Default bit-masks to use when item->mask is not provided.
1425  * @param data[in, out]
1426  *   User structure.
1427  */
1428 static int
1429 mlx5_flow_create_udp(const struct rte_flow_item *item,
1430                      const void *default_mask,
1431                      void *data)
1432 {
1433         const struct rte_flow_item_udp *spec = item->spec;
1434         const struct rte_flow_item_udp *mask = item->mask;
1435         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1436         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1437         struct ibv_flow_spec_tcp_udp udp = {
1438                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1439                 .size = udp_size,
1440         };
1441
1442         /* Don't update layer for the inner pattern. */
1443         if (!parser->inner) {
1444                 if (parser->layer == HASH_RXQ_IPV4)
1445                         parser->layer = HASH_RXQ_UDPV4;
1446                 else
1447                         parser->layer = HASH_RXQ_UDPV6;
1448         }
1449         if (spec) {
1450                 if (!mask)
1451                         mask = default_mask;
1452                 udp.val.dst_port = spec->hdr.dst_port;
1453                 udp.val.src_port = spec->hdr.src_port;
1454                 udp.mask.dst_port = mask->hdr.dst_port;
1455                 udp.mask.src_port = mask->hdr.src_port;
1456                 /* Remove unwanted bits from values. */
1457                 udp.val.src_port &= udp.mask.src_port;
1458                 udp.val.dst_port &= udp.mask.dst_port;
1459         }
1460         mlx5_flow_create_copy(parser, &udp, udp_size);
1461         return 0;
1462 }
1463
1464 /**
1465  * Convert TCP item to Verbs specification.
1466  *
1467  * @param item[in]
1468  *   Item specification.
1469  * @param default_mask[in]
1470  *   Default bit-masks to use when item->mask is not provided.
1471  * @param data[in, out]
1472  *   User structure.
1473  */
1474 static int
1475 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1476                      const void *default_mask,
1477                      void *data)
1478 {
1479         const struct rte_flow_item_tcp *spec = item->spec;
1480         const struct rte_flow_item_tcp *mask = item->mask;
1481         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1482         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1483         struct ibv_flow_spec_tcp_udp tcp = {
1484                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1485                 .size = tcp_size,
1486         };
1487
1488         /* Don't update layer for the inner pattern. */
1489         if (!parser->inner) {
1490                 if (parser->layer == HASH_RXQ_IPV4)
1491                         parser->layer = HASH_RXQ_TCPV4;
1492                 else
1493                         parser->layer = HASH_RXQ_TCPV6;
1494         }
1495         if (spec) {
1496                 if (!mask)
1497                         mask = default_mask;
1498                 tcp.val.dst_port = spec->hdr.dst_port;
1499                 tcp.val.src_port = spec->hdr.src_port;
1500                 tcp.mask.dst_port = mask->hdr.dst_port;
1501                 tcp.mask.src_port = mask->hdr.src_port;
1502                 /* Remove unwanted bits from values. */
1503                 tcp.val.src_port &= tcp.mask.src_port;
1504                 tcp.val.dst_port &= tcp.mask.dst_port;
1505         }
1506         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1507         return 0;
1508 }
1509
1510 /**
1511  * Convert VXLAN item to Verbs specification.
1512  *
1513  * @param item[in]
1514  *   Item specification.
1515  * @param default_mask[in]
1516  *   Default bit-masks to use when item->mask is not provided.
1517  * @param data[in, out]
1518  *   User structure.
1519  */
1520 static int
1521 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1522                        const void *default_mask,
1523                        void *data)
1524 {
1525         const struct rte_flow_item_vxlan *spec = item->spec;
1526         const struct rte_flow_item_vxlan *mask = item->mask;
1527         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1528         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1529         struct ibv_flow_spec_tunnel vxlan = {
1530                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1531                 .size = size,
1532         };
1533         union vni {
1534                 uint32_t vlan_id;
1535                 uint8_t vni[4];
1536         } id;
1537
1538         id.vni[0] = 0;
1539         parser->inner = IBV_FLOW_SPEC_INNER;
1540         if (spec) {
1541                 if (!mask)
1542                         mask = default_mask;
1543                 memcpy(&id.vni[1], spec->vni, 3);
1544                 vxlan.val.tunnel_id = id.vlan_id;
1545                 memcpy(&id.vni[1], mask->vni, 3);
1546                 vxlan.mask.tunnel_id = id.vlan_id;
1547                 /* Remove unwanted bits from values. */
1548                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1549         }
1550         /*
1551          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1552          * layer is defined in the Verbs specification it is interpreted as
1553          * wildcard and all packets will match this rule, if it follows a full
1554          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1555          * before will also match this rule.
1556          * To avoid such situation, VNI 0 is currently refused.
1557          */
1558         if (!vxlan.val.tunnel_id)
1559                 return EINVAL;
1560         mlx5_flow_create_copy(parser, &vxlan, size);
1561         return 0;
1562 }
1563
1564 /**
1565  * Convert mark/flag action to Verbs specification.
1566  *
1567  * @param parser
1568  *   Internal parser structure.
1569  * @param mark_id
1570  *   Mark identifier.
1571  */
1572 static int
1573 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1574 {
1575         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1576         struct ibv_flow_spec_action_tag tag = {
1577                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1578                 .size = size,
1579                 .tag_id = mlx5_flow_mark_set(mark_id),
1580         };
1581
1582         assert(parser->mark);
1583         mlx5_flow_create_copy(parser, &tag, size);
1584         return 0;
1585 }
1586
1587 /**
1588  * Convert count action to Verbs specification.
1589  *
1590  * @param priv
1591  *   Pointer to private structure.
1592  * @param parser
1593  *   Pointer to MLX5 flow parser structure.
1594  *
1595  * @return
1596  *   0 on success, errno value on failure.
1597  */
1598 static int
1599 mlx5_flow_create_count(struct priv *priv __rte_unused,
1600                        struct mlx5_flow_parse *parser __rte_unused)
1601 {
1602 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1603         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1604         struct ibv_counter_set_init_attr init_attr = {0};
1605         struct ibv_flow_spec_counter_action counter = {
1606                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1607                 .size = size,
1608                 .counter_set_handle = 0,
1609         };
1610
1611         init_attr.counter_set_id = 0;
1612         parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1613         if (!parser->cs)
1614                 return EINVAL;
1615         counter.counter_set_handle = parser->cs->handle;
1616         mlx5_flow_create_copy(parser, &counter, size);
1617 #endif
1618         return 0;
1619 }
1620
1621 /**
1622  * Complete flow rule creation with a drop queue.
1623  *
1624  * @param priv
1625  *   Pointer to private structure.
1626  * @param parser
1627  *   Internal parser structure.
1628  * @param flow
1629  *   Pointer to the rte_flow.
1630  * @param[out] error
1631  *   Perform verbose error reporting if not NULL.
1632  *
1633  * @return
1634  *   0 on success, errno value on failure.
1635  */
1636 static int
1637 priv_flow_create_action_queue_drop(struct priv *priv,
1638                                    struct mlx5_flow_parse *parser,
1639                                    struct rte_flow *flow,
1640                                    struct rte_flow_error *error)
1641 {
1642         struct ibv_flow_spec_action_drop *drop;
1643         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1644         int err = 0;
1645
1646         assert(priv->pd);
1647         assert(priv->ctx);
1648         flow->drop = 1;
1649         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1650                         parser->queue[HASH_RXQ_ETH].offset);
1651         *drop = (struct ibv_flow_spec_action_drop){
1652                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1653                         .size = size,
1654         };
1655         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1656         parser->queue[HASH_RXQ_ETH].offset += size;
1657         flow->frxq[HASH_RXQ_ETH].ibv_attr =
1658                 parser->queue[HASH_RXQ_ETH].ibv_attr;
1659         if (parser->count)
1660                 flow->cs = parser->cs;
1661         if (!priv->dev->data->dev_started)
1662                 return 0;
1663         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1664         flow->frxq[HASH_RXQ_ETH].ibv_flow =
1665                 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1666                                        flow->frxq[HASH_RXQ_ETH].ibv_attr);
1667         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1668                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1669                                    NULL, "flow rule creation failure");
1670                 err = ENOMEM;
1671                 goto error;
1672         }
1673         return 0;
1674 error:
1675         assert(flow);
1676         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1677                 claim_zero(mlx5_glue->destroy_flow
1678                            (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1679                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1680         }
1681         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1682                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1683                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1684         }
1685         if (flow->cs) {
1686                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1687                 flow->cs = NULL;
1688                 parser->cs = NULL;
1689         }
1690         return err;
1691 }
1692
1693 /**
1694  * Create hash Rx queues when RSS is enabled.
1695  *
1696  * @param priv
1697  *   Pointer to private structure.
1698  * @param parser
1699  *   Internal parser structure.
1700  * @param flow
1701  *   Pointer to the rte_flow.
1702  * @param[out] error
1703  *   Perform verbose error reporting if not NULL.
1704  *
1705  * @return
1706  *   0 on success, a errno value otherwise and rte_errno is set.
1707  */
1708 static int
1709 priv_flow_create_action_queue_rss(struct priv *priv,
1710                                   struct mlx5_flow_parse *parser,
1711                                   struct rte_flow *flow,
1712                                   struct rte_flow_error *error)
1713 {
1714         unsigned int i;
1715
1716         for (i = 0; i != hash_rxq_init_n; ++i) {
1717                 uint64_t hash_fields;
1718
1719                 if (!parser->queue[i].ibv_attr)
1720                         continue;
1721                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1722                 parser->queue[i].ibv_attr = NULL;
1723                 hash_fields = hash_rxq_init[i].hash_fields;
1724                 if (!priv->dev->data->dev_started)
1725                         continue;
1726                 flow->frxq[i].hrxq =
1727                         mlx5_priv_hrxq_get(priv,
1728                                            parser->rss_conf.rss_key,
1729                                            parser->rss_conf.rss_key_len,
1730                                            hash_fields,
1731                                            parser->queues,
1732                                            parser->queues_n);
1733                 if (flow->frxq[i].hrxq)
1734                         continue;
1735                 flow->frxq[i].hrxq =
1736                         mlx5_priv_hrxq_new(priv,
1737                                            parser->rss_conf.rss_key,
1738                                            parser->rss_conf.rss_key_len,
1739                                            hash_fields,
1740                                            parser->queues,
1741                                            parser->queues_n);
1742                 if (!flow->frxq[i].hrxq) {
1743                         rte_flow_error_set(error, ENOMEM,
1744                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1745                                            NULL, "cannot create hash rxq");
1746                         return ENOMEM;
1747                 }
1748         }
1749         return 0;
1750 }
1751
1752 /**
1753  * Complete flow rule creation.
1754  *
1755  * @param priv
1756  *   Pointer to private structure.
1757  * @param parser
1758  *   Internal parser structure.
1759  * @param flow
1760  *   Pointer to the rte_flow.
1761  * @param[out] error
1762  *   Perform verbose error reporting if not NULL.
1763  *
1764  * @return
1765  *   0 on success, a errno value otherwise and rte_errno is set.
1766  */
1767 static int
1768 priv_flow_create_action_queue(struct priv *priv,
1769                               struct mlx5_flow_parse *parser,
1770                               struct rte_flow *flow,
1771                               struct rte_flow_error *error)
1772 {
1773         int err = 0;
1774         unsigned int i;
1775
1776         assert(priv->pd);
1777         assert(priv->ctx);
1778         assert(!parser->drop);
1779         err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1780         if (err)
1781                 goto error;
1782         if (parser->count)
1783                 flow->cs = parser->cs;
1784         if (!priv->dev->data->dev_started)
1785                 return 0;
1786         for (i = 0; i != hash_rxq_init_n; ++i) {
1787                 if (!flow->frxq[i].hrxq)
1788                         continue;
1789                 flow->frxq[i].ibv_flow =
1790                         mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1791                                                flow->frxq[i].ibv_attr);
1792                 if (!flow->frxq[i].ibv_flow) {
1793                         rte_flow_error_set(error, ENOMEM,
1794                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1795                                            NULL, "flow rule creation failure");
1796                         err = ENOMEM;
1797                         goto error;
1798                 }
1799                 DEBUG("%p type %d QP %p ibv_flow %p",
1800                       (void *)flow, i,
1801                       (void *)flow->frxq[i].hrxq,
1802                       (void *)flow->frxq[i].ibv_flow);
1803         }
1804         for (i = 0; i != parser->queues_n; ++i) {
1805                 struct mlx5_rxq_data *q =
1806                         (*priv->rxqs)[parser->queues[i]];
1807
1808                 q->mark |= parser->mark;
1809         }
1810         return 0;
1811 error:
1812         assert(flow);
1813         for (i = 0; i != hash_rxq_init_n; ++i) {
1814                 if (flow->frxq[i].ibv_flow) {
1815                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1816
1817                         claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1818                 }
1819                 if (flow->frxq[i].hrxq)
1820                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1821                 if (flow->frxq[i].ibv_attr)
1822                         rte_free(flow->frxq[i].ibv_attr);
1823         }
1824         if (flow->cs) {
1825                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1826                 flow->cs = NULL;
1827                 parser->cs = NULL;
1828         }
1829         return err;
1830 }
1831
1832 /**
1833  * Convert a flow.
1834  *
1835  * @param priv
1836  *   Pointer to private structure.
1837  * @param list
1838  *   Pointer to a TAILQ flow list.
1839  * @param[in] attr
1840  *   Flow rule attributes.
1841  * @param[in] pattern
1842  *   Pattern specification (list terminated by the END pattern item).
1843  * @param[in] actions
1844  *   Associated actions (list terminated by the END action).
1845  * @param[out] error
1846  *   Perform verbose error reporting if not NULL.
1847  *
1848  * @return
1849  *   A flow on success, NULL otherwise.
1850  */
1851 static struct rte_flow *
1852 priv_flow_create(struct priv *priv,
1853                  struct mlx5_flows *list,
1854                  const struct rte_flow_attr *attr,
1855                  const struct rte_flow_item items[],
1856                  const struct rte_flow_action actions[],
1857                  struct rte_flow_error *error)
1858 {
1859         struct mlx5_flow_parse parser = { .create = 1, };
1860         struct rte_flow *flow = NULL;
1861         unsigned int i;
1862         int err;
1863
1864         err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1865         if (err)
1866                 goto exit;
1867         flow = rte_calloc(__func__, 1,
1868                           sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1869                           0);
1870         if (!flow) {
1871                 rte_flow_error_set(error, ENOMEM,
1872                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1873                                    NULL,
1874                                    "cannot allocate flow memory");
1875                 return NULL;
1876         }
1877         /* Copy queues configuration. */
1878         flow->queues = (uint16_t (*)[])(flow + 1);
1879         memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1880         flow->queues_n = parser.queues_n;
1881         flow->mark = parser.mark;
1882         /* Copy RSS configuration. */
1883         flow->rss_conf = parser.rss_conf;
1884         flow->rss_conf.rss_key = flow->rss_key;
1885         memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1886         /* finalise the flow. */
1887         if (parser.drop)
1888                 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1889                                                          error);
1890         else
1891                 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1892         if (err)
1893                 goto exit;
1894         TAILQ_INSERT_TAIL(list, flow, next);
1895         DEBUG("Flow created %p", (void *)flow);
1896         return flow;
1897 exit:
1898         for (i = 0; i != hash_rxq_init_n; ++i) {
1899                 if (parser.queue[i].ibv_attr)
1900                         rte_free(parser.queue[i].ibv_attr);
1901         }
1902         rte_free(flow);
1903         return NULL;
1904 }
1905
1906 /**
1907  * Validate a flow supported by the NIC.
1908  *
1909  * @see rte_flow_validate()
1910  * @see rte_flow_ops
1911  */
1912 int
1913 mlx5_flow_validate(struct rte_eth_dev *dev,
1914                    const struct rte_flow_attr *attr,
1915                    const struct rte_flow_item items[],
1916                    const struct rte_flow_action actions[],
1917                    struct rte_flow_error *error)
1918 {
1919         struct priv *priv = dev->data->dev_private;
1920         int ret;
1921         struct mlx5_flow_parse parser = { .create = 0, };
1922
1923         priv_lock(priv);
1924         ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1925         priv_unlock(priv);
1926         return ret;
1927 }
1928
1929 /**
1930  * Create a flow.
1931  *
1932  * @see rte_flow_create()
1933  * @see rte_flow_ops
1934  */
1935 struct rte_flow *
1936 mlx5_flow_create(struct rte_eth_dev *dev,
1937                  const struct rte_flow_attr *attr,
1938                  const struct rte_flow_item items[],
1939                  const struct rte_flow_action actions[],
1940                  struct rte_flow_error *error)
1941 {
1942         struct priv *priv = dev->data->dev_private;
1943         struct rte_flow *flow;
1944
1945         priv_lock(priv);
1946         flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1947                                 error);
1948         priv_unlock(priv);
1949         return flow;
1950 }
1951
1952 /**
1953  * Destroy a flow.
1954  *
1955  * @param priv
1956  *   Pointer to private structure.
1957  * @param list
1958  *   Pointer to a TAILQ flow list.
1959  * @param[in] flow
1960  *   Flow to destroy.
1961  */
1962 static void
1963 priv_flow_destroy(struct priv *priv,
1964                   struct mlx5_flows *list,
1965                   struct rte_flow *flow)
1966 {
1967         unsigned int i;
1968
1969         if (flow->drop || !flow->mark)
1970                 goto free;
1971         for (i = 0; i != flow->queues_n; ++i) {
1972                 struct rte_flow *tmp;
1973                 int mark = 0;
1974
1975                 /*
1976                  * To remove the mark from the queue, the queue must not be
1977                  * present in any other marked flow (RSS or not).
1978                  */
1979                 TAILQ_FOREACH(tmp, list, next) {
1980                         unsigned int j;
1981                         uint16_t *tqs = NULL;
1982                         uint16_t tq_n = 0;
1983
1984                         if (!tmp->mark)
1985                                 continue;
1986                         for (j = 0; j != hash_rxq_init_n; ++j) {
1987                                 if (!tmp->frxq[j].hrxq)
1988                                         continue;
1989                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
1990                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1991                         }
1992                         if (!tq_n)
1993                                 continue;
1994                         for (j = 0; (j != tq_n) && !mark; j++)
1995                                 if (tqs[j] == (*flow->queues)[i])
1996                                         mark = 1;
1997                 }
1998                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
1999         }
2000 free:
2001         if (flow->drop) {
2002                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2003                         claim_zero(mlx5_glue->destroy_flow
2004                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2005                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2006         } else {
2007                 for (i = 0; i != hash_rxq_init_n; ++i) {
2008                         struct mlx5_flow *frxq = &flow->frxq[i];
2009
2010                         if (frxq->ibv_flow)
2011                                 claim_zero(mlx5_glue->destroy_flow
2012                                            (frxq->ibv_flow));
2013                         if (frxq->hrxq)
2014                                 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2015                         if (frxq->ibv_attr)
2016                                 rte_free(frxq->ibv_attr);
2017                 }
2018         }
2019         if (flow->cs) {
2020                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2021                 flow->cs = NULL;
2022         }
2023         TAILQ_REMOVE(list, flow, next);
2024         DEBUG("Flow destroyed %p", (void *)flow);
2025         rte_free(flow);
2026 }
2027
2028 /**
2029  * Destroy all flows.
2030  *
2031  * @param priv
2032  *   Pointer to private structure.
2033  * @param list
2034  *   Pointer to a TAILQ flow list.
2035  */
2036 void
2037 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2038 {
2039         while (!TAILQ_EMPTY(list)) {
2040                 struct rte_flow *flow;
2041
2042                 flow = TAILQ_FIRST(list);
2043                 priv_flow_destroy(priv, list, flow);
2044         }
2045 }
2046
2047 /**
2048  * Create drop queue.
2049  *
2050  * @param priv
2051  *   Pointer to private structure.
2052  *
2053  * @return
2054  *   0 on success.
2055  */
2056 int
2057 priv_flow_create_drop_queue(struct priv *priv)
2058 {
2059         struct mlx5_hrxq_drop *fdq = NULL;
2060
2061         assert(priv->pd);
2062         assert(priv->ctx);
2063         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2064         if (!fdq) {
2065                 WARN("cannot allocate memory for drop queue");
2066                 goto error;
2067         }
2068         fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2069         if (!fdq->cq) {
2070                 WARN("cannot allocate CQ for drop queue");
2071                 goto error;
2072         }
2073         fdq->wq = mlx5_glue->create_wq
2074                 (priv->ctx,
2075                  &(struct ibv_wq_init_attr){
2076                         .wq_type = IBV_WQT_RQ,
2077                         .max_wr = 1,
2078                         .max_sge = 1,
2079                         .pd = priv->pd,
2080                         .cq = fdq->cq,
2081                  });
2082         if (!fdq->wq) {
2083                 WARN("cannot allocate WQ for drop queue");
2084                 goto error;
2085         }
2086         fdq->ind_table = mlx5_glue->create_rwq_ind_table
2087                 (priv->ctx,
2088                  &(struct ibv_rwq_ind_table_init_attr){
2089                         .log_ind_tbl_size = 0,
2090                         .ind_tbl = &fdq->wq,
2091                         .comp_mask = 0,
2092                  });
2093         if (!fdq->ind_table) {
2094                 WARN("cannot allocate indirection table for drop queue");
2095                 goto error;
2096         }
2097         fdq->qp = mlx5_glue->create_qp_ex
2098                 (priv->ctx,
2099                  &(struct ibv_qp_init_attr_ex){
2100                         .qp_type = IBV_QPT_RAW_PACKET,
2101                         .comp_mask =
2102                                 IBV_QP_INIT_ATTR_PD |
2103                                 IBV_QP_INIT_ATTR_IND_TABLE |
2104                                 IBV_QP_INIT_ATTR_RX_HASH,
2105                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2106                                 .rx_hash_function =
2107                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2108                                 .rx_hash_key_len = rss_hash_default_key_len,
2109                                 .rx_hash_key = rss_hash_default_key,
2110                                 .rx_hash_fields_mask = 0,
2111                                 },
2112                         .rwq_ind_tbl = fdq->ind_table,
2113                         .pd = priv->pd
2114                  });
2115         if (!fdq->qp) {
2116                 WARN("cannot allocate QP for drop queue");
2117                 goto error;
2118         }
2119         priv->flow_drop_queue = fdq;
2120         return 0;
2121 error:
2122         if (fdq->qp)
2123                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2124         if (fdq->ind_table)
2125                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2126         if (fdq->wq)
2127                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2128         if (fdq->cq)
2129                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2130         if (fdq)
2131                 rte_free(fdq);
2132         priv->flow_drop_queue = NULL;
2133         return -1;
2134 }
2135
2136 /**
2137  * Delete drop queue.
2138  *
2139  * @param priv
2140  *   Pointer to private structure.
2141  */
2142 void
2143 priv_flow_delete_drop_queue(struct priv *priv)
2144 {
2145         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2146
2147         if (!fdq)
2148                 return;
2149         if (fdq->qp)
2150                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2151         if (fdq->ind_table)
2152                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2153         if (fdq->wq)
2154                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2155         if (fdq->cq)
2156                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2157         rte_free(fdq);
2158         priv->flow_drop_queue = NULL;
2159 }
2160
2161 /**
2162  * Remove all flows.
2163  *
2164  * @param priv
2165  *   Pointer to private structure.
2166  * @param list
2167  *   Pointer to a TAILQ flow list.
2168  */
2169 void
2170 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2171 {
2172         struct rte_flow *flow;
2173
2174         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2175                 unsigned int i;
2176
2177                 if (flow->drop) {
2178                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2179                                 continue;
2180                         claim_zero(mlx5_glue->destroy_flow
2181                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2182                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2183                         /* Next flow. */
2184                         continue;
2185                 }
2186                 if (flow->mark) {
2187                         struct mlx5_ind_table_ibv *ind_tbl = NULL;
2188
2189                         for (i = 0; i != hash_rxq_init_n; ++i) {
2190                                 if (!flow->frxq[i].hrxq)
2191                                         continue;
2192                                 ind_tbl = flow->frxq[i].hrxq->ind_table;
2193                         }
2194                         assert(ind_tbl);
2195                         for (i = 0; i != ind_tbl->queues_n; ++i)
2196                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2197                 }
2198                 for (i = 0; i != hash_rxq_init_n; ++i) {
2199                         if (!flow->frxq[i].ibv_flow)
2200                                 continue;
2201                         claim_zero(mlx5_glue->destroy_flow
2202                                    (flow->frxq[i].ibv_flow));
2203                         flow->frxq[i].ibv_flow = NULL;
2204                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2205                         flow->frxq[i].hrxq = NULL;
2206                 }
2207                 DEBUG("Flow %p removed", (void *)flow);
2208         }
2209 }
2210
2211 /**
2212  * Add all flows.
2213  *
2214  * @param priv
2215  *   Pointer to private structure.
2216  * @param list
2217  *   Pointer to a TAILQ flow list.
2218  *
2219  * @return
2220  *   0 on success, a errno value otherwise and rte_errno is set.
2221  */
2222 int
2223 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2224 {
2225         struct rte_flow *flow;
2226
2227         TAILQ_FOREACH(flow, list, next) {
2228                 unsigned int i;
2229
2230                 if (flow->drop) {
2231                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2232                                 mlx5_glue->create_flow
2233                                 (priv->flow_drop_queue->qp,
2234                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2235                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2236                                 DEBUG("Flow %p cannot be applied",
2237                                       (void *)flow);
2238                                 rte_errno = EINVAL;
2239                                 return rte_errno;
2240                         }
2241                         DEBUG("Flow %p applied", (void *)flow);
2242                         /* Next flow. */
2243                         continue;
2244                 }
2245                 for (i = 0; i != hash_rxq_init_n; ++i) {
2246                         if (!flow->frxq[i].ibv_attr)
2247                                 continue;
2248                         flow->frxq[i].hrxq =
2249                                 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2250                                                    flow->rss_conf.rss_key_len,
2251                                                    hash_rxq_init[i].hash_fields,
2252                                                    (*flow->queues),
2253                                                    flow->queues_n);
2254                         if (flow->frxq[i].hrxq)
2255                                 goto flow_create;
2256                         flow->frxq[i].hrxq =
2257                                 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2258                                                    flow->rss_conf.rss_key_len,
2259                                                    hash_rxq_init[i].hash_fields,
2260                                                    (*flow->queues),
2261                                                    flow->queues_n);
2262                         if (!flow->frxq[i].hrxq) {
2263                                 DEBUG("Flow %p cannot be applied",
2264                                       (void *)flow);
2265                                 rte_errno = EINVAL;
2266                                 return rte_errno;
2267                         }
2268 flow_create:
2269                         flow->frxq[i].ibv_flow =
2270                                 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2271                                                        flow->frxq[i].ibv_attr);
2272                         if (!flow->frxq[i].ibv_flow) {
2273                                 DEBUG("Flow %p cannot be applied",
2274                                       (void *)flow);
2275                                 rte_errno = EINVAL;
2276                                 return rte_errno;
2277                         }
2278                         DEBUG("Flow %p applied", (void *)flow);
2279                 }
2280                 if (!flow->mark)
2281                         continue;
2282                 for (i = 0; i != flow->queues_n; ++i)
2283                         (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2284         }
2285         return 0;
2286 }
2287
2288 /**
2289  * Verify the flow list is empty
2290  *
2291  * @param priv
2292  *  Pointer to private structure.
2293  *
2294  * @return the number of flows not released.
2295  */
2296 int
2297 priv_flow_verify(struct priv *priv)
2298 {
2299         struct rte_flow *flow;
2300         int ret = 0;
2301
2302         TAILQ_FOREACH(flow, &priv->flows, next) {
2303                 DEBUG("%p: flow %p still referenced", (void *)priv,
2304                       (void *)flow);
2305                 ++ret;
2306         }
2307         return ret;
2308 }
2309
2310 /**
2311  * Enable a control flow configured from the control plane.
2312  *
2313  * @param dev
2314  *   Pointer to Ethernet device.
2315  * @param eth_spec
2316  *   An Ethernet flow spec to apply.
2317  * @param eth_mask
2318  *   An Ethernet flow mask to apply.
2319  * @param vlan_spec
2320  *   A VLAN flow spec to apply.
2321  * @param vlan_mask
2322  *   A VLAN flow mask to apply.
2323  *
2324  * @return
2325  *   0 on success.
2326  */
2327 int
2328 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2329                     struct rte_flow_item_eth *eth_spec,
2330                     struct rte_flow_item_eth *eth_mask,
2331                     struct rte_flow_item_vlan *vlan_spec,
2332                     struct rte_flow_item_vlan *vlan_mask)
2333 {
2334         struct priv *priv = dev->data->dev_private;
2335         const struct rte_flow_attr attr = {
2336                 .ingress = 1,
2337                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2338         };
2339         struct rte_flow_item items[] = {
2340                 {
2341                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2342                         .spec = eth_spec,
2343                         .last = NULL,
2344                         .mask = eth_mask,
2345                 },
2346                 {
2347                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2348                                 RTE_FLOW_ITEM_TYPE_END,
2349                         .spec = vlan_spec,
2350                         .last = NULL,
2351                         .mask = vlan_mask,
2352                 },
2353                 {
2354                         .type = RTE_FLOW_ITEM_TYPE_END,
2355                 },
2356         };
2357         struct rte_flow_action actions[] = {
2358                 {
2359                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2360                 },
2361                 {
2362                         .type = RTE_FLOW_ACTION_TYPE_END,
2363                 },
2364         };
2365         struct rte_flow *flow;
2366         struct rte_flow_error error;
2367         unsigned int i;
2368         union {
2369                 struct rte_flow_action_rss rss;
2370                 struct {
2371                         const struct rte_eth_rss_conf *rss_conf;
2372                         uint16_t num;
2373                         uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2374                 } local;
2375         } action_rss;
2376
2377         if (!priv->reta_idx_n)
2378                 return EINVAL;
2379         for (i = 0; i != priv->reta_idx_n; ++i)
2380                 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2381         action_rss.local.rss_conf = &priv->rss_conf;
2382         action_rss.local.num = priv->reta_idx_n;
2383         actions[0].conf = (const void *)&action_rss.rss;
2384         flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2385                                 &error);
2386         if (!flow)
2387                 return rte_errno;
2388         return 0;
2389 }
2390
2391 /**
2392  * Enable a flow control configured from the control plane.
2393  *
2394  * @param dev
2395  *   Pointer to Ethernet device.
2396  * @param eth_spec
2397  *   An Ethernet flow spec to apply.
2398  * @param eth_mask
2399  *   An Ethernet flow mask to apply.
2400  *
2401  * @return
2402  *   0 on success.
2403  */
2404 int
2405 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2406                struct rte_flow_item_eth *eth_spec,
2407                struct rte_flow_item_eth *eth_mask)
2408 {
2409         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2410 }
2411
2412 /**
2413  * Destroy a flow.
2414  *
2415  * @see rte_flow_destroy()
2416  * @see rte_flow_ops
2417  */
2418 int
2419 mlx5_flow_destroy(struct rte_eth_dev *dev,
2420                   struct rte_flow *flow,
2421                   struct rte_flow_error *error)
2422 {
2423         struct priv *priv = dev->data->dev_private;
2424
2425         (void)error;
2426         priv_lock(priv);
2427         priv_flow_destroy(priv, &priv->flows, flow);
2428         priv_unlock(priv);
2429         return 0;
2430 }
2431
2432 /**
2433  * Destroy all flows.
2434  *
2435  * @see rte_flow_flush()
2436  * @see rte_flow_ops
2437  */
2438 int
2439 mlx5_flow_flush(struct rte_eth_dev *dev,
2440                 struct rte_flow_error *error)
2441 {
2442         struct priv *priv = dev->data->dev_private;
2443
2444         (void)error;
2445         priv_lock(priv);
2446         priv_flow_flush(priv, &priv->flows);
2447         priv_unlock(priv);
2448         return 0;
2449 }
2450
2451 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2452 /**
2453  * Query flow counter.
2454  *
2455  * @param cs
2456  *   the counter set.
2457  * @param counter_value
2458  *   returned data from the counter.
2459  *
2460  * @return
2461  *   0 on success, a errno value otherwise and rte_errno is set.
2462  */
2463 static int
2464 priv_flow_query_count(struct ibv_counter_set *cs,
2465                       struct mlx5_flow_counter_stats *counter_stats,
2466                       struct rte_flow_query_count *query_count,
2467                       struct rte_flow_error *error)
2468 {
2469         uint64_t counters[2];
2470         struct ibv_query_counter_set_attr query_cs_attr = {
2471                 .cs = cs,
2472                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2473         };
2474         struct ibv_counter_set_data query_out = {
2475                 .out = counters,
2476                 .outlen = 2 * sizeof(uint64_t),
2477         };
2478         int res = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2479
2480         if (res) {
2481                 rte_flow_error_set(error, -res,
2482                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2483                                    NULL,
2484                                    "cannot read counter");
2485                 return -res;
2486         }
2487         query_count->hits_set = 1;
2488         query_count->bytes_set = 1;
2489         query_count->hits = counters[0] - counter_stats->hits;
2490         query_count->bytes = counters[1] - counter_stats->bytes;
2491         if (query_count->reset) {
2492                 counter_stats->hits = counters[0];
2493                 counter_stats->bytes = counters[1];
2494         }
2495         return 0;
2496 }
2497
2498 /**
2499  * Query a flows.
2500  *
2501  * @see rte_flow_query()
2502  * @see rte_flow_ops
2503  */
2504 int
2505 mlx5_flow_query(struct rte_eth_dev *dev,
2506                 struct rte_flow *flow,
2507                 enum rte_flow_action_type action __rte_unused,
2508                 void *data,
2509                 struct rte_flow_error *error)
2510 {
2511         struct priv *priv = dev->data->dev_private;
2512         int res = EINVAL;
2513
2514         priv_lock(priv);
2515         if (flow->cs) {
2516                 res = priv_flow_query_count(flow->cs,
2517                                         &flow->counter_stats,
2518                                         (struct rte_flow_query_count *)data,
2519                                         error);
2520         } else {
2521                 rte_flow_error_set(error, res,
2522                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2523                                    NULL,
2524                                    "no counter found for flow");
2525         }
2526         priv_unlock(priv);
2527         return -res;
2528 }
2529 #endif
2530
2531 /**
2532  * Isolated mode.
2533  *
2534  * @see rte_flow_isolate()
2535  * @see rte_flow_ops
2536  */
2537 int
2538 mlx5_flow_isolate(struct rte_eth_dev *dev,
2539                   int enable,
2540                   struct rte_flow_error *error)
2541 {
2542         struct priv *priv = dev->data->dev_private;
2543
2544         priv_lock(priv);
2545         if (dev->data->dev_started) {
2546                 rte_flow_error_set(error, EBUSY,
2547                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2548                                    NULL,
2549                                    "port must be stopped first");
2550                 priv_unlock(priv);
2551                 return -rte_errno;
2552         }
2553         priv->isolated = !!enable;
2554         if (enable)
2555                 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2556         else
2557                 priv->dev->dev_ops = &mlx5_dev_ops;
2558         priv_unlock(priv);
2559         return 0;
2560 }
2561
2562 /**
2563  * Convert a flow director filter to a generic flow.
2564  *
2565  * @param priv
2566  *   Private structure.
2567  * @param fdir_filter
2568  *   Flow director filter to add.
2569  * @param attributes
2570  *   Generic flow parameters structure.
2571  *
2572  * @return
2573  *  0 on success, errno value on error.
2574  */
2575 static int
2576 priv_fdir_filter_convert(struct priv *priv,
2577                          const struct rte_eth_fdir_filter *fdir_filter,
2578                          struct mlx5_fdir *attributes)
2579 {
2580         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2581
2582         /* Validate queue number. */
2583         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2584                 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2585                 return EINVAL;
2586         }
2587         attributes->attr.ingress = 1;
2588         attributes->items[0] = (struct rte_flow_item) {
2589                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2590                 .spec = &attributes->l2,
2591                 .mask = &attributes->l2_mask,
2592         };
2593         switch (fdir_filter->action.behavior) {
2594         case RTE_ETH_FDIR_ACCEPT:
2595                 attributes->actions[0] = (struct rte_flow_action){
2596                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2597                         .conf = &attributes->queue,
2598                 };
2599                 break;
2600         case RTE_ETH_FDIR_REJECT:
2601                 attributes->actions[0] = (struct rte_flow_action){
2602                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2603                 };
2604                 break;
2605         default:
2606                 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2607                 return ENOTSUP;
2608         }
2609         attributes->queue.index = fdir_filter->action.rx_queue;
2610         switch (fdir_filter->input.flow_type) {
2611         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2612                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2613                         .src_addr = input->flow.udp4_flow.ip.src_ip,
2614                         .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2615                         .time_to_live = input->flow.udp4_flow.ip.ttl,
2616                         .type_of_service = input->flow.udp4_flow.ip.tos,
2617                         .next_proto_id = input->flow.udp4_flow.ip.proto,
2618                 };
2619                 attributes->l4.udp.hdr = (struct udp_hdr){
2620                         .src_port = input->flow.udp4_flow.src_port,
2621                         .dst_port = input->flow.udp4_flow.dst_port,
2622                 };
2623                 attributes->items[1] = (struct rte_flow_item){
2624                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2625                         .spec = &attributes->l3,
2626                         .mask = &attributes->l3,
2627                 };
2628                 attributes->items[2] = (struct rte_flow_item){
2629                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2630                         .spec = &attributes->l4,
2631                         .mask = &attributes->l4,
2632                 };
2633                 break;
2634         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2635                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2636                         .src_addr = input->flow.tcp4_flow.ip.src_ip,
2637                         .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2638                         .time_to_live = input->flow.tcp4_flow.ip.ttl,
2639                         .type_of_service = input->flow.tcp4_flow.ip.tos,
2640                         .next_proto_id = input->flow.tcp4_flow.ip.proto,
2641                 };
2642                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2643                         .src_port = input->flow.tcp4_flow.src_port,
2644                         .dst_port = input->flow.tcp4_flow.dst_port,
2645                 };
2646                 attributes->items[1] = (struct rte_flow_item){
2647                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2648                         .spec = &attributes->l3,
2649                         .mask = &attributes->l3,
2650                 };
2651                 attributes->items[2] = (struct rte_flow_item){
2652                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2653                         .spec = &attributes->l4,
2654                         .mask = &attributes->l4,
2655                 };
2656                 break;
2657         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2658                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2659                         .src_addr = input->flow.ip4_flow.src_ip,
2660                         .dst_addr = input->flow.ip4_flow.dst_ip,
2661                         .time_to_live = input->flow.ip4_flow.ttl,
2662                         .type_of_service = input->flow.ip4_flow.tos,
2663                         .next_proto_id = input->flow.ip4_flow.proto,
2664                 };
2665                 attributes->items[1] = (struct rte_flow_item){
2666                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2667                         .spec = &attributes->l3,
2668                         .mask = &attributes->l3,
2669                 };
2670                 break;
2671         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2672                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2673                         .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2674                         .proto = input->flow.udp6_flow.ip.proto,
2675                 };
2676                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2677                        input->flow.udp6_flow.ip.src_ip,
2678                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2679                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2680                        input->flow.udp6_flow.ip.dst_ip,
2681                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2682                 attributes->l4.udp.hdr = (struct udp_hdr){
2683                         .src_port = input->flow.udp6_flow.src_port,
2684                         .dst_port = input->flow.udp6_flow.dst_port,
2685                 };
2686                 attributes->items[1] = (struct rte_flow_item){
2687                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2688                         .spec = &attributes->l3,
2689                         .mask = &attributes->l3,
2690                 };
2691                 attributes->items[2] = (struct rte_flow_item){
2692                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2693                         .spec = &attributes->l4,
2694                         .mask = &attributes->l4,
2695                 };
2696                 break;
2697         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2698                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2699                         .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2700                         .proto = input->flow.tcp6_flow.ip.proto,
2701                 };
2702                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2703                        input->flow.tcp6_flow.ip.src_ip,
2704                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2705                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2706                        input->flow.tcp6_flow.ip.dst_ip,
2707                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2708                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2709                         .src_port = input->flow.tcp6_flow.src_port,
2710                         .dst_port = input->flow.tcp6_flow.dst_port,
2711                 };
2712                 attributes->items[1] = (struct rte_flow_item){
2713                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2714                         .spec = &attributes->l3,
2715                         .mask = &attributes->l3,
2716                 };
2717                 attributes->items[2] = (struct rte_flow_item){
2718                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2719                         .spec = &attributes->l4,
2720                         .mask = &attributes->l4,
2721                 };
2722                 break;
2723         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2724                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2725                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2726                         .proto = input->flow.ipv6_flow.proto,
2727                 };
2728                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2729                        input->flow.ipv6_flow.src_ip,
2730                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2731                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2732                        input->flow.ipv6_flow.dst_ip,
2733                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2734                 attributes->items[1] = (struct rte_flow_item){
2735                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2736                         .spec = &attributes->l3,
2737                         .mask = &attributes->l3,
2738                 };
2739                 break;
2740         default:
2741                 ERROR("invalid flow type%d",
2742                       fdir_filter->input.flow_type);
2743                 return ENOTSUP;
2744         }
2745         return 0;
2746 }
2747
2748 /**
2749  * Add new flow director filter and store it in list.
2750  *
2751  * @param priv
2752  *   Private structure.
2753  * @param fdir_filter
2754  *   Flow director filter to add.
2755  *
2756  * @return
2757  *   0 on success, errno value on failure.
2758  */
2759 static int
2760 priv_fdir_filter_add(struct priv *priv,
2761                      const struct rte_eth_fdir_filter *fdir_filter)
2762 {
2763         struct mlx5_fdir attributes = {
2764                 .attr.group = 0,
2765                 .l2_mask = {
2766                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2767                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2768                         .type = 0,
2769                 },
2770         };
2771         struct mlx5_flow_parse parser = {
2772                 .layer = HASH_RXQ_ETH,
2773         };
2774         struct rte_flow_error error;
2775         struct rte_flow *flow;
2776         int ret;
2777
2778         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2779         if (ret)
2780                 return -ret;
2781         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2782                                 attributes.actions, &error, &parser);
2783         if (ret)
2784                 return -ret;
2785         flow = priv_flow_create(priv,
2786                                 &priv->flows,
2787                                 &attributes.attr,
2788                                 attributes.items,
2789                                 attributes.actions,
2790                                 &error);
2791         if (flow) {
2792                 DEBUG("FDIR created %p", (void *)flow);
2793                 return 0;
2794         }
2795         return ENOTSUP;
2796 }
2797
2798 /**
2799  * Delete specific filter.
2800  *
2801  * @param priv
2802  *   Private structure.
2803  * @param fdir_filter
2804  *   Filter to be deleted.
2805  *
2806  * @return
2807  *   0 on success, errno value on failure.
2808  */
2809 static int
2810 priv_fdir_filter_delete(struct priv *priv,
2811                         const struct rte_eth_fdir_filter *fdir_filter)
2812 {
2813         struct mlx5_fdir attributes = {
2814                 .attr.group = 0,
2815         };
2816         struct mlx5_flow_parse parser = {
2817                 .create = 1,
2818                 .layer = HASH_RXQ_ETH,
2819         };
2820         struct rte_flow_error error;
2821         struct rte_flow *flow;
2822         unsigned int i;
2823         int ret;
2824
2825         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2826         if (ret)
2827                 return -ret;
2828         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2829                                 attributes.actions, &error, &parser);
2830         if (ret)
2831                 goto exit;
2832         /*
2833          * Special case for drop action which is only set in the
2834          * specifications when the flow is created.  In this situation the
2835          * drop specification is missing.
2836          */
2837         if (parser.drop) {
2838                 struct ibv_flow_spec_action_drop *drop;
2839
2840                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2841                                 parser.queue[HASH_RXQ_ETH].offset);
2842                 *drop = (struct ibv_flow_spec_action_drop){
2843                         .type = IBV_FLOW_SPEC_ACTION_DROP,
2844                         .size = sizeof(struct ibv_flow_spec_action_drop),
2845                 };
2846                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2847         }
2848         TAILQ_FOREACH(flow, &priv->flows, next) {
2849                 struct ibv_flow_attr *attr;
2850                 struct ibv_spec_header *attr_h;
2851                 void *spec;
2852                 struct ibv_flow_attr *flow_attr;
2853                 struct ibv_spec_header *flow_h;
2854                 void *flow_spec;
2855                 unsigned int specs_n;
2856
2857                 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2858                 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2859                 /* Compare first the attributes. */
2860                 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2861                         continue;
2862                 if (attr->num_of_specs == 0)
2863                         continue;
2864                 spec = (void *)((uintptr_t)attr +
2865                                 sizeof(struct ibv_flow_attr));
2866                 flow_spec = (void *)((uintptr_t)flow_attr +
2867                                      sizeof(struct ibv_flow_attr));
2868                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2869                 for (i = 0; i != specs_n; ++i) {
2870                         attr_h = spec;
2871                         flow_h = flow_spec;
2872                         if (memcmp(spec, flow_spec,
2873                                    RTE_MIN(attr_h->size, flow_h->size)))
2874                                 goto wrong_flow;
2875                         spec = (void *)((uintptr_t)spec + attr_h->size);
2876                         flow_spec = (void *)((uintptr_t)flow_spec +
2877                                              flow_h->size);
2878                 }
2879                 /* At this point, the flow match. */
2880                 break;
2881 wrong_flow:
2882                 /* The flow does not match. */
2883                 continue;
2884         }
2885         if (flow)
2886                 priv_flow_destroy(priv, &priv->flows, flow);
2887 exit:
2888         for (i = 0; i != hash_rxq_init_n; ++i) {
2889                 if (parser.queue[i].ibv_attr)
2890                         rte_free(parser.queue[i].ibv_attr);
2891         }
2892         return -ret;
2893 }
2894
2895 /**
2896  * Update queue for specific filter.
2897  *
2898  * @param priv
2899  *   Private structure.
2900  * @param fdir_filter
2901  *   Filter to be updated.
2902  *
2903  * @return
2904  *   0 on success, errno value on failure.
2905  */
2906 static int
2907 priv_fdir_filter_update(struct priv *priv,
2908                         const struct rte_eth_fdir_filter *fdir_filter)
2909 {
2910         int ret;
2911
2912         ret = priv_fdir_filter_delete(priv, fdir_filter);
2913         if (ret)
2914                 return ret;
2915         ret = priv_fdir_filter_add(priv, fdir_filter);
2916         return ret;
2917 }
2918
2919 /**
2920  * Flush all filters.
2921  *
2922  * @param priv
2923  *   Private structure.
2924  */
2925 static void
2926 priv_fdir_filter_flush(struct priv *priv)
2927 {
2928         priv_flow_flush(priv, &priv->flows);
2929 }
2930
2931 /**
2932  * Get flow director information.
2933  *
2934  * @param priv
2935  *   Private structure.
2936  * @param[out] fdir_info
2937  *   Resulting flow director information.
2938  */
2939 static void
2940 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2941 {
2942         struct rte_eth_fdir_masks *mask =
2943                 &priv->dev->data->dev_conf.fdir_conf.mask;
2944
2945         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2946         fdir_info->guarant_spc = 0;
2947         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2948         fdir_info->max_flexpayload = 0;
2949         fdir_info->flow_types_mask[0] = 0;
2950         fdir_info->flex_payload_unit = 0;
2951         fdir_info->max_flex_payload_segment_num = 0;
2952         fdir_info->flex_payload_limit = 0;
2953         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2954 }
2955
2956 /**
2957  * Deal with flow director operations.
2958  *
2959  * @param priv
2960  *   Pointer to private structure.
2961  * @param filter_op
2962  *   Operation to perform.
2963  * @param arg
2964  *   Pointer to operation-specific structure.
2965  *
2966  * @return
2967  *   0 on success, errno value on failure.
2968  */
2969 static int
2970 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2971 {
2972         enum rte_fdir_mode fdir_mode =
2973                 priv->dev->data->dev_conf.fdir_conf.mode;
2974         int ret = 0;
2975
2976         if (filter_op == RTE_ETH_FILTER_NOP)
2977                 return 0;
2978         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2979             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2980                 ERROR("%p: flow director mode %d not supported",
2981                       (void *)priv, fdir_mode);
2982                 return EINVAL;
2983         }
2984         switch (filter_op) {
2985         case RTE_ETH_FILTER_ADD:
2986                 ret = priv_fdir_filter_add(priv, arg);
2987                 break;
2988         case RTE_ETH_FILTER_UPDATE:
2989                 ret = priv_fdir_filter_update(priv, arg);
2990                 break;
2991         case RTE_ETH_FILTER_DELETE:
2992                 ret = priv_fdir_filter_delete(priv, arg);
2993                 break;
2994         case RTE_ETH_FILTER_FLUSH:
2995                 priv_fdir_filter_flush(priv);
2996                 break;
2997         case RTE_ETH_FILTER_INFO:
2998                 priv_fdir_info_get(priv, arg);
2999                 break;
3000         default:
3001                 DEBUG("%p: unknown operation %u", (void *)priv,
3002                       filter_op);
3003                 ret = EINVAL;
3004                 break;
3005         }
3006         return ret;
3007 }
3008
3009 /**
3010  * Manage filter operations.
3011  *
3012  * @param dev
3013  *   Pointer to Ethernet device structure.
3014  * @param filter_type
3015  *   Filter type.
3016  * @param filter_op
3017  *   Operation to perform.
3018  * @param arg
3019  *   Pointer to operation-specific structure.
3020  *
3021  * @return
3022  *   0 on success, negative errno value on failure.
3023  */
3024 int
3025 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3026                      enum rte_filter_type filter_type,
3027                      enum rte_filter_op filter_op,
3028                      void *arg)
3029 {
3030         int ret = EINVAL;
3031         struct priv *priv = dev->data->dev_private;
3032
3033         switch (filter_type) {
3034         case RTE_ETH_FILTER_GENERIC:
3035                 if (filter_op != RTE_ETH_FILTER_GET)
3036                         return -EINVAL;
3037                 *(const void **)arg = &mlx5_flow_ops;
3038                 return 0;
3039         case RTE_ETH_FILTER_FDIR:
3040                 priv_lock(priv);
3041                 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3042                 priv_unlock(priv);
3043                 break;
3044         default:
3045                 ERROR("%p: filter type (%d) not supported",
3046                       (void *)dev, filter_type);
3047                 break;
3048         }
3049         return -ret;
3050 }