net/mlx5: revert support of IPv4 time-to-live filter
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox.
4  */
5
6 #include <sys/queue.h>
7 #include <string.h>
8
9 /* Verbs header. */
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
11 #ifdef PEDANTIC
12 #pragma GCC diagnostic ignored "-Wpedantic"
13 #endif
14 #include <infiniband/verbs.h>
15 #ifdef PEDANTIC
16 #pragma GCC diagnostic error "-Wpedantic"
17 #endif
18
19 #include <rte_ethdev_driver.h>
20 #include <rte_flow.h>
21 #include <rte_flow_driver.h>
22 #include <rte_malloc.h>
23 #include <rte_ip.h>
24
25 #include "mlx5.h"
26 #include "mlx5_defs.h"
27 #include "mlx5_prm.h"
28 #include "mlx5_glue.h"
29
30 /* Define minimal priority for control plane flows. */
31 #define MLX5_CTRL_FLOW_PRIORITY 4
32
33 /* Internet Protocol versions. */
34 #define MLX5_IPV4 4
35 #define MLX5_IPV6 6
36
37 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
38 struct ibv_flow_spec_counter_action {
39         int dummy;
40 };
41 #endif
42
43 /* Dev ops structure defined in mlx5.c */
44 extern const struct eth_dev_ops mlx5_dev_ops;
45 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
46
47 static int
48 mlx5_flow_create_eth(const struct rte_flow_item *item,
49                      const void *default_mask,
50                      void *data);
51
52 static int
53 mlx5_flow_create_vlan(const struct rte_flow_item *item,
54                       const void *default_mask,
55                       void *data);
56
57 static int
58 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
59                       const void *default_mask,
60                       void *data);
61
62 static int
63 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
64                       const void *default_mask,
65                       void *data);
66
67 static int
68 mlx5_flow_create_udp(const struct rte_flow_item *item,
69                      const void *default_mask,
70                      void *data);
71
72 static int
73 mlx5_flow_create_tcp(const struct rte_flow_item *item,
74                      const void *default_mask,
75                      void *data);
76
77 static int
78 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
79                        const void *default_mask,
80                        void *data);
81
82 struct mlx5_flow_parse;
83
84 static void
85 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
86                       unsigned int size);
87
88 static int
89 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
90
91 static int
92 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
93
94 /* Hash RX queue types. */
95 enum hash_rxq_type {
96         HASH_RXQ_TCPV4,
97         HASH_RXQ_UDPV4,
98         HASH_RXQ_IPV4,
99         HASH_RXQ_TCPV6,
100         HASH_RXQ_UDPV6,
101         HASH_RXQ_IPV6,
102         HASH_RXQ_ETH,
103 };
104
105 /* Initialization data for hash RX queue. */
106 struct hash_rxq_init {
107         uint64_t hash_fields; /* Fields that participate in the hash. */
108         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
109         unsigned int flow_priority; /* Flow priority to use. */
110         unsigned int ip_version; /* Internet protocol. */
111 };
112
113 /* Initialization data for hash RX queues. */
114 const struct hash_rxq_init hash_rxq_init[] = {
115         [HASH_RXQ_TCPV4] = {
116                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
117                                 IBV_RX_HASH_DST_IPV4 |
118                                 IBV_RX_HASH_SRC_PORT_TCP |
119                                 IBV_RX_HASH_DST_PORT_TCP),
120                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
121                 .flow_priority = 0,
122                 .ip_version = MLX5_IPV4,
123         },
124         [HASH_RXQ_UDPV4] = {
125                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
126                                 IBV_RX_HASH_DST_IPV4 |
127                                 IBV_RX_HASH_SRC_PORT_UDP |
128                                 IBV_RX_HASH_DST_PORT_UDP),
129                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
130                 .flow_priority = 0,
131                 .ip_version = MLX5_IPV4,
132         },
133         [HASH_RXQ_IPV4] = {
134                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135                                 IBV_RX_HASH_DST_IPV4),
136                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
137                                 ETH_RSS_FRAG_IPV4),
138                 .flow_priority = 1,
139                 .ip_version = MLX5_IPV4,
140         },
141         [HASH_RXQ_TCPV6] = {
142                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
143                                 IBV_RX_HASH_DST_IPV6 |
144                                 IBV_RX_HASH_SRC_PORT_TCP |
145                                 IBV_RX_HASH_DST_PORT_TCP),
146                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
147                 .flow_priority = 0,
148                 .ip_version = MLX5_IPV6,
149         },
150         [HASH_RXQ_UDPV6] = {
151                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
152                                 IBV_RX_HASH_DST_IPV6 |
153                                 IBV_RX_HASH_SRC_PORT_UDP |
154                                 IBV_RX_HASH_DST_PORT_UDP),
155                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
156                 .flow_priority = 0,
157                 .ip_version = MLX5_IPV6,
158         },
159         [HASH_RXQ_IPV6] = {
160                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161                                 IBV_RX_HASH_DST_IPV6),
162                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
163                                 ETH_RSS_FRAG_IPV6),
164                 .flow_priority = 1,
165                 .ip_version = MLX5_IPV6,
166         },
167         [HASH_RXQ_ETH] = {
168                 .hash_fields = 0,
169                 .dpdk_rss_hf = 0,
170                 .flow_priority = 2,
171         },
172 };
173
174 /* Number of entries in hash_rxq_init[]. */
175 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
176
177 /** Structure for holding counter stats. */
178 struct mlx5_flow_counter_stats {
179         uint64_t hits; /**< Number of packets matched by the rule. */
180         uint64_t bytes; /**< Number of bytes matched by the rule. */
181 };
182
183 /** Structure for Drop queue. */
184 struct mlx5_hrxq_drop {
185         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
186         struct ibv_qp *qp; /**< Verbs queue pair. */
187         struct ibv_wq *wq; /**< Verbs work queue. */
188         struct ibv_cq *cq; /**< Verbs completion queue. */
189 };
190
191 /* Flows structures. */
192 struct mlx5_flow {
193         uint64_t hash_fields; /**< Fields that participate in the hash. */
194         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
195         struct ibv_flow *ibv_flow; /**< Verbs flow. */
196         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
197 };
198
199 /* Drop flows structures. */
200 struct mlx5_flow_drop {
201         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
202         struct ibv_flow *ibv_flow; /**< Verbs flow. */
203 };
204
205 struct rte_flow {
206         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
207         uint32_t mark:1; /**< Set if the flow is marked. */
208         uint32_t drop:1; /**< Drop queue. */
209         uint16_t queues_n; /**< Number of entries in queue[]. */
210         uint16_t (*queues)[]; /**< Queues indexes to use. */
211         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
212         uint8_t rss_key[40]; /**< copy of the RSS key. */
213         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
214         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
215         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
216         /**< Flow with Rx queue. */
217 };
218
219 /** Static initializer for items. */
220 #define ITEMS(...) \
221         (const enum rte_flow_item_type []){ \
222                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
223         }
224
225 /** Structure to generate a simple graph of layers supported by the NIC. */
226 struct mlx5_flow_items {
227         /** List of possible actions for these items. */
228         const enum rte_flow_action_type *const actions;
229         /** Bit-masks corresponding to the possibilities for the item. */
230         const void *mask;
231         /**
232          * Default bit-masks to use when item->mask is not provided. When
233          * \default_mask is also NULL, the full supported bit-mask (\mask) is
234          * used instead.
235          */
236         const void *default_mask;
237         /** Bit-masks size in bytes. */
238         const unsigned int mask_sz;
239         /**
240          * Conversion function from rte_flow to NIC specific flow.
241          *
242          * @param item
243          *   rte_flow item to convert.
244          * @param default_mask
245          *   Default bit-masks to use when item->mask is not provided.
246          * @param data
247          *   Internal structure to store the conversion.
248          *
249          * @return
250          *   0 on success, negative value otherwise.
251          */
252         int (*convert)(const struct rte_flow_item *item,
253                        const void *default_mask,
254                        void *data);
255         /** Size in bytes of the destination structure. */
256         const unsigned int dst_sz;
257         /** List of possible following items.  */
258         const enum rte_flow_item_type *const items;
259 };
260
261 /** Valid action for this PMD. */
262 static const enum rte_flow_action_type valid_actions[] = {
263         RTE_FLOW_ACTION_TYPE_DROP,
264         RTE_FLOW_ACTION_TYPE_QUEUE,
265         RTE_FLOW_ACTION_TYPE_MARK,
266         RTE_FLOW_ACTION_TYPE_FLAG,
267 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
268         RTE_FLOW_ACTION_TYPE_COUNT,
269 #endif
270         RTE_FLOW_ACTION_TYPE_END,
271 };
272
273 /** Graph of supported items and associated actions. */
274 static const struct mlx5_flow_items mlx5_flow_items[] = {
275         [RTE_FLOW_ITEM_TYPE_END] = {
276                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
277                                RTE_FLOW_ITEM_TYPE_VXLAN),
278         },
279         [RTE_FLOW_ITEM_TYPE_ETH] = {
280                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
281                                RTE_FLOW_ITEM_TYPE_IPV4,
282                                RTE_FLOW_ITEM_TYPE_IPV6),
283                 .actions = valid_actions,
284                 .mask = &(const struct rte_flow_item_eth){
285                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
286                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
287                         .type = -1,
288                 },
289                 .default_mask = &rte_flow_item_eth_mask,
290                 .mask_sz = sizeof(struct rte_flow_item_eth),
291                 .convert = mlx5_flow_create_eth,
292                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
293         },
294         [RTE_FLOW_ITEM_TYPE_VLAN] = {
295                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
296                                RTE_FLOW_ITEM_TYPE_IPV6),
297                 .actions = valid_actions,
298                 .mask = &(const struct rte_flow_item_vlan){
299                         .tci = -1,
300                 },
301                 .default_mask = &rte_flow_item_vlan_mask,
302                 .mask_sz = sizeof(struct rte_flow_item_vlan),
303                 .convert = mlx5_flow_create_vlan,
304                 .dst_sz = 0,
305         },
306         [RTE_FLOW_ITEM_TYPE_IPV4] = {
307                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
308                                RTE_FLOW_ITEM_TYPE_TCP),
309                 .actions = valid_actions,
310                 .mask = &(const struct rte_flow_item_ipv4){
311                         .hdr = {
312                                 .src_addr = -1,
313                                 .dst_addr = -1,
314                                 .type_of_service = -1,
315                                 .next_proto_id = -1,
316                         },
317                 },
318                 .default_mask = &rte_flow_item_ipv4_mask,
319                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
320                 .convert = mlx5_flow_create_ipv4,
321                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
322         },
323         [RTE_FLOW_ITEM_TYPE_IPV6] = {
324                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
325                                RTE_FLOW_ITEM_TYPE_TCP),
326                 .actions = valid_actions,
327                 .mask = &(const struct rte_flow_item_ipv6){
328                         .hdr = {
329                                 .src_addr = {
330                                         0xff, 0xff, 0xff, 0xff,
331                                         0xff, 0xff, 0xff, 0xff,
332                                         0xff, 0xff, 0xff, 0xff,
333                                         0xff, 0xff, 0xff, 0xff,
334                                 },
335                                 .dst_addr = {
336                                         0xff, 0xff, 0xff, 0xff,
337                                         0xff, 0xff, 0xff, 0xff,
338                                         0xff, 0xff, 0xff, 0xff,
339                                         0xff, 0xff, 0xff, 0xff,
340                                 },
341                                 .vtc_flow = -1,
342                                 .proto = -1,
343                                 .hop_limits = -1,
344                         },
345                 },
346                 .default_mask = &rte_flow_item_ipv6_mask,
347                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
348                 .convert = mlx5_flow_create_ipv6,
349                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
350         },
351         [RTE_FLOW_ITEM_TYPE_UDP] = {
352                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
353                 .actions = valid_actions,
354                 .mask = &(const struct rte_flow_item_udp){
355                         .hdr = {
356                                 .src_port = -1,
357                                 .dst_port = -1,
358                         },
359                 },
360                 .default_mask = &rte_flow_item_udp_mask,
361                 .mask_sz = sizeof(struct rte_flow_item_udp),
362                 .convert = mlx5_flow_create_udp,
363                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
364         },
365         [RTE_FLOW_ITEM_TYPE_TCP] = {
366                 .actions = valid_actions,
367                 .mask = &(const struct rte_flow_item_tcp){
368                         .hdr = {
369                                 .src_port = -1,
370                                 .dst_port = -1,
371                         },
372                 },
373                 .default_mask = &rte_flow_item_tcp_mask,
374                 .mask_sz = sizeof(struct rte_flow_item_tcp),
375                 .convert = mlx5_flow_create_tcp,
376                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
377         },
378         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
379                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
380                 .actions = valid_actions,
381                 .mask = &(const struct rte_flow_item_vxlan){
382                         .vni = "\xff\xff\xff",
383                 },
384                 .default_mask = &rte_flow_item_vxlan_mask,
385                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
386                 .convert = mlx5_flow_create_vxlan,
387                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
388         },
389 };
390
391 /** Structure to pass to the conversion function. */
392 struct mlx5_flow_parse {
393         uint32_t inner; /**< Set once VXLAN is encountered. */
394         uint32_t allmulti:1; /**< Set once allmulti dst MAC is encountered. */
395         uint32_t create:1;
396         /**< Whether resources should remain after a validate. */
397         uint32_t drop:1; /**< Target is a drop queue. */
398         uint32_t mark:1; /**< Mark is present in the flow. */
399         uint32_t count:1; /**< Count is present in the flow. */
400         uint32_t mark_id; /**< Mark identifier. */
401         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
402         uint16_t queues_n; /**< Number of entries in queue[]. */
403         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
404         uint8_t rss_key[40]; /**< copy of the RSS key. */
405         enum hash_rxq_type layer; /**< Last pattern layer detected. */
406         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
407         struct {
408                 struct ibv_flow_attr *ibv_attr;
409                 /**< Pointer to Verbs attributes. */
410                 unsigned int offset;
411                 /**< Current position or total size of the attribute. */
412         } queue[RTE_DIM(hash_rxq_init)];
413 };
414
415 static const struct rte_flow_ops mlx5_flow_ops = {
416         .validate = mlx5_flow_validate,
417         .create = mlx5_flow_create,
418         .destroy = mlx5_flow_destroy,
419         .flush = mlx5_flow_flush,
420 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
421         .query = mlx5_flow_query,
422 #else
423         .query = NULL,
424 #endif
425         .isolate = mlx5_flow_isolate,
426 };
427
428 /* Convert FDIR request to Generic flow. */
429 struct mlx5_fdir {
430         struct rte_flow_attr attr;
431         struct rte_flow_action actions[2];
432         struct rte_flow_item items[4];
433         struct rte_flow_item_eth l2;
434         struct rte_flow_item_eth l2_mask;
435         union {
436                 struct rte_flow_item_ipv4 ipv4;
437                 struct rte_flow_item_ipv6 ipv6;
438         } l3;
439         union {
440                 struct rte_flow_item_udp udp;
441                 struct rte_flow_item_tcp tcp;
442         } l4;
443         struct rte_flow_action_queue queue;
444 };
445
446 /* Verbs specification header. */
447 struct ibv_spec_header {
448         enum ibv_flow_spec_type type;
449         uint16_t size;
450 };
451
452 /**
453  * Check support for a given item.
454  *
455  * @param item[in]
456  *   Item specification.
457  * @param mask[in]
458  *   Bit-masks covering supported fields to compare with spec, last and mask in
459  *   \item.
460  * @param size
461  *   Bit-Mask size in bytes.
462  *
463  * @return
464  *   0 on success.
465  */
466 static int
467 mlx5_flow_item_validate(const struct rte_flow_item *item,
468                         const uint8_t *mask, unsigned int size)
469 {
470         int ret = 0;
471
472         if (!item->spec && (item->mask || item->last))
473                 return -1;
474         if (item->spec && !item->mask) {
475                 unsigned int i;
476                 const uint8_t *spec = item->spec;
477
478                 for (i = 0; i < size; ++i)
479                         if ((spec[i] | mask[i]) != mask[i])
480                                 return -1;
481         }
482         if (item->last && !item->mask) {
483                 unsigned int i;
484                 const uint8_t *spec = item->last;
485
486                 for (i = 0; i < size; ++i)
487                         if ((spec[i] | mask[i]) != mask[i])
488                                 return -1;
489         }
490         if (item->mask) {
491                 unsigned int i;
492                 const uint8_t *spec = item->spec;
493
494                 for (i = 0; i < size; ++i)
495                         if ((spec[i] | mask[i]) != mask[i])
496                                 return -1;
497         }
498         if (item->spec && item->last) {
499                 uint8_t spec[size];
500                 uint8_t last[size];
501                 const uint8_t *apply = mask;
502                 unsigned int i;
503
504                 if (item->mask)
505                         apply = item->mask;
506                 for (i = 0; i < size; ++i) {
507                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
508                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
509                 }
510                 ret = memcmp(spec, last, size);
511         }
512         return ret;
513 }
514
515 /**
516  * Copy the RSS configuration from the user ones, of the rss_conf is null,
517  * uses the driver one.
518  *
519  * @param priv
520  *   Pointer to private structure.
521  * @param parser
522  *   Internal parser structure.
523  * @param rss_conf
524  *   User RSS configuration to save.
525  *
526  * @return
527  *   0 on success, errno value on failure.
528  */
529 static int
530 priv_flow_convert_rss_conf(struct priv *priv,
531                            struct mlx5_flow_parse *parser,
532                            const struct rte_eth_rss_conf *rss_conf)
533 {
534         /*
535          * This function is also called at the beginning of
536          * priv_flow_convert_actions() to initialize the parser with the
537          * device default RSS configuration.
538          */
539         (void)priv;
540         if (rss_conf) {
541                 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
542                         return EINVAL;
543                 if (rss_conf->rss_key_len != 40)
544                         return EINVAL;
545                 if (rss_conf->rss_key_len && rss_conf->rss_key) {
546                         parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
547                         memcpy(parser->rss_key, rss_conf->rss_key,
548                                rss_conf->rss_key_len);
549                         parser->rss_conf.rss_key = parser->rss_key;
550                 }
551                 parser->rss_conf.rss_hf = rss_conf->rss_hf;
552         }
553         return 0;
554 }
555
556 /**
557  * Extract attribute to the parser.
558  *
559  * @param priv
560  *   Pointer to private structure.
561  * @param[in] attr
562  *   Flow rule attributes.
563  * @param[out] error
564  *   Perform verbose error reporting if not NULL.
565  * @param[in, out] parser
566  *   Internal parser structure.
567  *
568  * @return
569  *   0 on success, a negative errno value otherwise and rte_errno is set.
570  */
571 static int
572 priv_flow_convert_attributes(struct priv *priv,
573                              const struct rte_flow_attr *attr,
574                              struct rte_flow_error *error,
575                              struct mlx5_flow_parse *parser)
576 {
577         (void)priv;
578         (void)parser;
579         if (attr->group) {
580                 rte_flow_error_set(error, ENOTSUP,
581                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
582                                    NULL,
583                                    "groups are not supported");
584                 return -rte_errno;
585         }
586         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
587                 rte_flow_error_set(error, ENOTSUP,
588                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
589                                    NULL,
590                                    "priorities are not supported");
591                 return -rte_errno;
592         }
593         if (attr->egress) {
594                 rte_flow_error_set(error, ENOTSUP,
595                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
596                                    NULL,
597                                    "egress is not supported");
598                 return -rte_errno;
599         }
600         if (!attr->ingress) {
601                 rte_flow_error_set(error, ENOTSUP,
602                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
603                                    NULL,
604                                    "only ingress is supported");
605                 return -rte_errno;
606         }
607         return 0;
608 }
609
610 /**
611  * Extract actions request to the parser.
612  *
613  * @param priv
614  *   Pointer to private structure.
615  * @param[in] actions
616  *   Associated actions (list terminated by the END action).
617  * @param[out] error
618  *   Perform verbose error reporting if not NULL.
619  * @param[in, out] parser
620  *   Internal parser structure.
621  *
622  * @return
623  *   0 on success, a negative errno value otherwise and rte_errno is set.
624  */
625 static int
626 priv_flow_convert_actions(struct priv *priv,
627                           const struct rte_flow_action actions[],
628                           struct rte_flow_error *error,
629                           struct mlx5_flow_parse *parser)
630 {
631         /*
632          * Add default RSS configuration necessary for Verbs to create QP even
633          * if no RSS is necessary.
634          */
635         priv_flow_convert_rss_conf(priv, parser,
636                                    (const struct rte_eth_rss_conf *)
637                                    &priv->rss_conf);
638         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
639                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
640                         continue;
641                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
642                         parser->drop = 1;
643                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
644                         const struct rte_flow_action_queue *queue =
645                                 (const struct rte_flow_action_queue *)
646                                 actions->conf;
647                         uint16_t n;
648                         uint16_t found = 0;
649
650                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
651                                 goto exit_action_not_supported;
652                         for (n = 0; n < parser->queues_n; ++n) {
653                                 if (parser->queues[n] == queue->index) {
654                                         found = 1;
655                                         break;
656                                 }
657                         }
658                         if (parser->queues_n > 1 && !found) {
659                                 rte_flow_error_set(error, ENOTSUP,
660                                            RTE_FLOW_ERROR_TYPE_ACTION,
661                                            actions,
662                                            "queue action not in RSS queues");
663                                 return -rte_errno;
664                         }
665                         if (!found) {
666                                 parser->queues_n = 1;
667                                 parser->queues[0] = queue->index;
668                         }
669                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
670                         const struct rte_flow_action_rss *rss =
671                                 (const struct rte_flow_action_rss *)
672                                 actions->conf;
673                         uint16_t n;
674
675                         if (!rss || !rss->num) {
676                                 rte_flow_error_set(error, EINVAL,
677                                                    RTE_FLOW_ERROR_TYPE_ACTION,
678                                                    actions,
679                                                    "no valid queues");
680                                 return -rte_errno;
681                         }
682                         if (parser->queues_n == 1) {
683                                 uint16_t found = 0;
684
685                                 assert(parser->queues_n);
686                                 for (n = 0; n < rss->num; ++n) {
687                                         if (parser->queues[0] ==
688                                             rss->queue[n]) {
689                                                 found = 1;
690                                                 break;
691                                         }
692                                 }
693                                 if (!found) {
694                                         rte_flow_error_set(error, ENOTSUP,
695                                                    RTE_FLOW_ERROR_TYPE_ACTION,
696                                                    actions,
697                                                    "queue action not in RSS"
698                                                    " queues");
699                                         return -rte_errno;
700                                 }
701                         }
702                         for (n = 0; n < rss->num; ++n) {
703                                 if (rss->queue[n] >= priv->rxqs_n) {
704                                         rte_flow_error_set(error, EINVAL,
705                                                    RTE_FLOW_ERROR_TYPE_ACTION,
706                                                    actions,
707                                                    "queue id > number of"
708                                                    " queues");
709                                         return -rte_errno;
710                                 }
711                         }
712                         for (n = 0; n < rss->num; ++n)
713                                 parser->queues[n] = rss->queue[n];
714                         parser->queues_n = rss->num;
715                         if (priv_flow_convert_rss_conf(priv, parser,
716                                                        rss->rss_conf)) {
717                                 rte_flow_error_set(error, EINVAL,
718                                                    RTE_FLOW_ERROR_TYPE_ACTION,
719                                                    actions,
720                                                    "wrong RSS configuration");
721                                 return -rte_errno;
722                         }
723                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
724                         const struct rte_flow_action_mark *mark =
725                                 (const struct rte_flow_action_mark *)
726                                 actions->conf;
727
728                         if (!mark) {
729                                 rte_flow_error_set(error, EINVAL,
730                                                    RTE_FLOW_ERROR_TYPE_ACTION,
731                                                    actions,
732                                                    "mark must be defined");
733                                 return -rte_errno;
734                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
735                                 rte_flow_error_set(error, ENOTSUP,
736                                                    RTE_FLOW_ERROR_TYPE_ACTION,
737                                                    actions,
738                                                    "mark must be between 0"
739                                                    " and 16777199");
740                                 return -rte_errno;
741                         }
742                         parser->mark = 1;
743                         parser->mark_id = mark->id;
744                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
745                         parser->mark = 1;
746                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
747                            priv->config.flow_counter_en) {
748                         parser->count = 1;
749                 } else {
750                         goto exit_action_not_supported;
751                 }
752         }
753         if (parser->drop && parser->mark)
754                 parser->mark = 0;
755         if (!parser->queues_n && !parser->drop) {
756                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
757                                    NULL, "no valid action");
758                 return -rte_errno;
759         }
760         return 0;
761 exit_action_not_supported:
762         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
763                            actions, "action not supported");
764         return -rte_errno;
765 }
766
767 /**
768  * Validate items.
769  *
770  * @param priv
771  *   Pointer to private structure.
772  * @param[in] items
773  *   Pattern specification (list terminated by the END pattern item).
774  * @param[out] error
775  *   Perform verbose error reporting if not NULL.
776  * @param[in, out] parser
777  *   Internal parser structure.
778  *
779  * @return
780  *   0 on success, a negative errno value otherwise and rte_errno is set.
781  */
782 static int
783 priv_flow_convert_items_validate(struct priv *priv,
784                                  const struct rte_flow_item items[],
785                                  struct rte_flow_error *error,
786                                  struct mlx5_flow_parse *parser)
787 {
788         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
789         unsigned int i;
790
791         (void)priv;
792         /* Initialise the offsets to start after verbs attribute. */
793         for (i = 0; i != hash_rxq_init_n; ++i)
794                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
795         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
796                 const struct mlx5_flow_items *token = NULL;
797                 unsigned int n;
798                 int err;
799
800                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
801                         continue;
802                 for (i = 0;
803                      cur_item->items &&
804                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
805                      ++i) {
806                         if (cur_item->items[i] == items->type) {
807                                 token = &mlx5_flow_items[items->type];
808                                 break;
809                         }
810                 }
811                 if (!token)
812                         goto exit_item_not_supported;
813                 cur_item = token;
814                 err = mlx5_flow_item_validate(items,
815                                               (const uint8_t *)cur_item->mask,
816                                               cur_item->mask_sz);
817                 if (err)
818                         goto exit_item_not_supported;
819                 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
820                         if (parser->inner) {
821                                 rte_flow_error_set(error, ENOTSUP,
822                                                    RTE_FLOW_ERROR_TYPE_ITEM,
823                                                    items,
824                                                    "cannot recognize multiple"
825                                                    " VXLAN encapsulations");
826                                 return -rte_errno;
827                         }
828                         parser->inner = IBV_FLOW_SPEC_INNER;
829                 }
830                 if (parser->drop) {
831                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
832                 } else {
833                         for (n = 0; n != hash_rxq_init_n; ++n)
834                                 parser->queue[n].offset += cur_item->dst_sz;
835                 }
836         }
837         if (parser->drop) {
838                 parser->queue[HASH_RXQ_ETH].offset +=
839                         sizeof(struct ibv_flow_spec_action_drop);
840         }
841         if (parser->mark) {
842                 for (i = 0; i != hash_rxq_init_n; ++i)
843                         parser->queue[i].offset +=
844                                 sizeof(struct ibv_flow_spec_action_tag);
845         }
846         if (parser->count) {
847                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
848
849                 for (i = 0; i != hash_rxq_init_n; ++i)
850                         parser->queue[i].offset += size;
851         }
852         return 0;
853 exit_item_not_supported:
854         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
855                            items, "item not supported");
856         return -rte_errno;
857 }
858
859 /**
860  * Allocate memory space to store verbs flow attributes.
861  *
862  * @param priv
863  *   Pointer to private structure.
864  * @param[in] priority
865  *   Flow priority.
866  * @param[in] size
867  *   Amount of byte to allocate.
868  * @param[out] error
869  *   Perform verbose error reporting if not NULL.
870  *
871  * @return
872  *   A verbs flow attribute on success, NULL otherwise.
873  */
874 static struct ibv_flow_attr*
875 priv_flow_convert_allocate(struct priv *priv,
876                            unsigned int priority,
877                            unsigned int size,
878                            struct rte_flow_error *error)
879 {
880         struct ibv_flow_attr *ibv_attr;
881
882         (void)priv;
883         ibv_attr = rte_calloc(__func__, 1, size, 0);
884         if (!ibv_attr) {
885                 rte_flow_error_set(error, ENOMEM,
886                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
887                                    NULL,
888                                    "cannot allocate verbs spec attributes.");
889                 return NULL;
890         }
891         ibv_attr->priority = priority;
892         return ibv_attr;
893 }
894
895 /**
896  * Finalise verbs flow attributes.
897  *
898  * @param priv
899  *   Pointer to private structure.
900  * @param[in, out] parser
901  *   Internal parser structure.
902  */
903 static void
904 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
905 {
906         const unsigned int ipv4 =
907                 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
908         const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
909         const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
910         const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
911         const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
912         const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
913         unsigned int i;
914
915         (void)priv;
916         if (parser->layer == HASH_RXQ_ETH) {
917                 goto fill;
918         } else {
919                 /*
920                  * This layer becomes useless as the pattern define under
921                  * layers.
922                  */
923                 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
924                 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
925         }
926         /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
927         for (i = ohmin; i != (ohmax + 1); ++i) {
928                 if (!parser->queue[i].ibv_attr)
929                         continue;
930                 rte_free(parser->queue[i].ibv_attr);
931                 parser->queue[i].ibv_attr = NULL;
932         }
933         /* Remove impossible flow according to the RSS configuration. */
934         if (hash_rxq_init[parser->layer].dpdk_rss_hf &
935             parser->rss_conf.rss_hf) {
936                 /* Remove any other flow. */
937                 for (i = hmin; i != (hmax + 1); ++i) {
938                         if ((i == parser->layer) ||
939                              (!parser->queue[i].ibv_attr))
940                                 continue;
941                         rte_free(parser->queue[i].ibv_attr);
942                         parser->queue[i].ibv_attr = NULL;
943                 }
944         } else  if (!parser->queue[ip].ibv_attr) {
945                 /* no RSS possible with the current configuration. */
946                 parser->queues_n = 1;
947                 return;
948         }
949 fill:
950         /*
951          * Fill missing layers in verbs specifications, or compute the correct
952          * offset to allocate the memory space for the attributes and
953          * specifications.
954          */
955         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
956                 union {
957                         struct ibv_flow_spec_ipv4_ext ipv4;
958                         struct ibv_flow_spec_ipv6 ipv6;
959                         struct ibv_flow_spec_tcp_udp udp_tcp;
960                 } specs;
961                 void *dst;
962                 uint16_t size;
963
964                 if (i == parser->layer)
965                         continue;
966                 if (parser->layer == HASH_RXQ_ETH) {
967                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
968                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
969                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
970                                         .type = IBV_FLOW_SPEC_IPV4_EXT,
971                                         .size = size,
972                                 };
973                         } else {
974                                 size = sizeof(struct ibv_flow_spec_ipv6);
975                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
976                                         .type = IBV_FLOW_SPEC_IPV6,
977                                         .size = size,
978                                 };
979                         }
980                         if (parser->queue[i].ibv_attr) {
981                                 dst = (void *)((uintptr_t)
982                                                parser->queue[i].ibv_attr +
983                                                parser->queue[i].offset);
984                                 memcpy(dst, &specs, size);
985                                 ++parser->queue[i].ibv_attr->num_of_specs;
986                         }
987                         parser->queue[i].offset += size;
988                 }
989                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
990                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
991                         size = sizeof(struct ibv_flow_spec_tcp_udp);
992                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
993                                 .type = ((i == HASH_RXQ_UDPV4 ||
994                                           i == HASH_RXQ_UDPV6) ?
995                                          IBV_FLOW_SPEC_UDP :
996                                          IBV_FLOW_SPEC_TCP),
997                                 .size = size,
998                         };
999                         if (parser->queue[i].ibv_attr) {
1000                                 dst = (void *)((uintptr_t)
1001                                                parser->queue[i].ibv_attr +
1002                                                parser->queue[i].offset);
1003                                 memcpy(dst, &specs, size);
1004                                 ++parser->queue[i].ibv_attr->num_of_specs;
1005                         }
1006                         parser->queue[i].offset += size;
1007                 }
1008         }
1009 }
1010
1011 /**
1012  * Validate and convert a flow supported by the NIC.
1013  *
1014  * @param priv
1015  *   Pointer to private structure.
1016  * @param[in] attr
1017  *   Flow rule attributes.
1018  * @param[in] pattern
1019  *   Pattern specification (list terminated by the END pattern item).
1020  * @param[in] actions
1021  *   Associated actions (list terminated by the END action).
1022  * @param[out] error
1023  *   Perform verbose error reporting if not NULL.
1024  * @param[in, out] parser
1025  *   Internal parser structure.
1026  *
1027  * @return
1028  *   0 on success, a negative errno value otherwise and rte_errno is set.
1029  */
1030 static int
1031 priv_flow_convert(struct priv *priv,
1032                   const struct rte_flow_attr *attr,
1033                   const struct rte_flow_item items[],
1034                   const struct rte_flow_action actions[],
1035                   struct rte_flow_error *error,
1036                   struct mlx5_flow_parse *parser)
1037 {
1038         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1039         unsigned int i;
1040         int ret;
1041
1042         /* First step. Validate the attributes, items and actions. */
1043         *parser = (struct mlx5_flow_parse){
1044                 .create = parser->create,
1045                 .layer = HASH_RXQ_ETH,
1046                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1047         };
1048         ret = priv_flow_convert_attributes(priv, attr, error, parser);
1049         if (ret)
1050                 return ret;
1051         ret = priv_flow_convert_actions(priv, actions, error, parser);
1052         if (ret)
1053                 return ret;
1054         ret = priv_flow_convert_items_validate(priv, items, error, parser);
1055         if (ret)
1056                 return ret;
1057         priv_flow_convert_finalise(priv, parser);
1058         /*
1059          * Second step.
1060          * Allocate the memory space to store verbs specifications.
1061          */
1062         if (parser->drop) {
1063                 unsigned int priority =
1064                         attr->priority +
1065                         hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1066                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1067
1068                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1069                         priv_flow_convert_allocate(priv, priority,
1070                                                    offset, error);
1071                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1072                         return ENOMEM;
1073                 parser->queue[HASH_RXQ_ETH].offset =
1074                         sizeof(struct ibv_flow_attr);
1075         } else {
1076                 for (i = 0; i != hash_rxq_init_n; ++i) {
1077                         unsigned int priority =
1078                                 attr->priority +
1079                                 hash_rxq_init[i].flow_priority;
1080                         unsigned int offset;
1081
1082                         if (!(parser->rss_conf.rss_hf &
1083                               hash_rxq_init[i].dpdk_rss_hf) &&
1084                             (i != HASH_RXQ_ETH))
1085                                 continue;
1086                         offset = parser->queue[i].offset;
1087                         parser->queue[i].ibv_attr =
1088                                 priv_flow_convert_allocate(priv, priority,
1089                                                            offset, error);
1090                         if (!parser->queue[i].ibv_attr)
1091                                 goto exit_enomem;
1092                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1093                 }
1094         }
1095         /* Third step. Conversion parse, fill the specifications. */
1096         parser->inner = 0;
1097         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1098                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1099                         continue;
1100                 cur_item = &mlx5_flow_items[items->type];
1101                 ret = cur_item->convert(items,
1102                                         (cur_item->default_mask ?
1103                                          cur_item->default_mask :
1104                                          cur_item->mask),
1105                                         parser);
1106                 if (ret) {
1107                         rte_flow_error_set(error, ret,
1108                                            RTE_FLOW_ERROR_TYPE_ITEM,
1109                                            items, "item not supported");
1110                         goto exit_free;
1111                 }
1112         }
1113         if (parser->mark)
1114                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1115         if (parser->count && parser->create) {
1116                 mlx5_flow_create_count(priv, parser);
1117                 if (!parser->cs)
1118                         goto exit_count_error;
1119         }
1120         /*
1121          * Last step. Complete missing specification to reach the RSS
1122          * configuration.
1123          */
1124         if (!parser->drop) {
1125                 priv_flow_convert_finalise(priv, parser);
1126         } else {
1127                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1128                         attr->priority +
1129                         hash_rxq_init[parser->layer].flow_priority;
1130         }
1131         if (parser->allmulti &&
1132             parser->layer == HASH_RXQ_ETH) {
1133                 for (i = 0; i != hash_rxq_init_n; ++i) {
1134                         if (!parser->queue[i].ibv_attr)
1135                                 continue;
1136                         if (parser->queue[i].ibv_attr->num_of_specs != 1)
1137                                 break;
1138                         parser->queue[i].ibv_attr->type =
1139                                                 IBV_FLOW_ATTR_MC_DEFAULT;
1140                 }
1141         }
1142 exit_free:
1143         /* Only verification is expected, all resources should be released. */
1144         if (!parser->create) {
1145                 for (i = 0; i != hash_rxq_init_n; ++i) {
1146                         if (parser->queue[i].ibv_attr) {
1147                                 rte_free(parser->queue[i].ibv_attr);
1148                                 parser->queue[i].ibv_attr = NULL;
1149                         }
1150                 }
1151         }
1152         return ret;
1153 exit_enomem:
1154         for (i = 0; i != hash_rxq_init_n; ++i) {
1155                 if (parser->queue[i].ibv_attr) {
1156                         rte_free(parser->queue[i].ibv_attr);
1157                         parser->queue[i].ibv_attr = NULL;
1158                 }
1159         }
1160         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1161                            NULL, "cannot allocate verbs spec attributes.");
1162         return ret;
1163 exit_count_error:
1164         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1165                            NULL, "cannot create counter.");
1166         return rte_errno;
1167 }
1168
1169 /**
1170  * Copy the specification created into the flow.
1171  *
1172  * @param parser
1173  *   Internal parser structure.
1174  * @param src
1175  *   Create specification.
1176  * @param size
1177  *   Size in bytes of the specification to copy.
1178  */
1179 static void
1180 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1181                       unsigned int size)
1182 {
1183         unsigned int i;
1184         void *dst;
1185
1186         for (i = 0; i != hash_rxq_init_n; ++i) {
1187                 if (!parser->queue[i].ibv_attr)
1188                         continue;
1189                 /* Specification must be the same l3 type or none. */
1190                 if (parser->layer == HASH_RXQ_ETH ||
1191                     (hash_rxq_init[parser->layer].ip_version ==
1192                      hash_rxq_init[i].ip_version) ||
1193                     (hash_rxq_init[i].ip_version == 0)) {
1194                         dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1195                                         parser->queue[i].offset);
1196                         memcpy(dst, src, size);
1197                         ++parser->queue[i].ibv_attr->num_of_specs;
1198                         parser->queue[i].offset += size;
1199                 }
1200         }
1201 }
1202
1203 /**
1204  * Convert Ethernet item to Verbs specification.
1205  *
1206  * @param item[in]
1207  *   Item specification.
1208  * @param default_mask[in]
1209  *   Default bit-masks to use when item->mask is not provided.
1210  * @param data[in, out]
1211  *   User structure.
1212  */
1213 static int
1214 mlx5_flow_create_eth(const struct rte_flow_item *item,
1215                      const void *default_mask,
1216                      void *data)
1217 {
1218         const struct rte_flow_item_eth *spec = item->spec;
1219         const struct rte_flow_item_eth *mask = item->mask;
1220         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1221         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1222         struct ibv_flow_spec_eth eth = {
1223                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1224                 .size = eth_size,
1225         };
1226
1227         /* Don't update layer for the inner pattern. */
1228         if (!parser->inner)
1229                 parser->layer = HASH_RXQ_ETH;
1230         if (spec) {
1231                 unsigned int i;
1232
1233                 if (!mask)
1234                         mask = default_mask;
1235                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1236                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1237                 eth.val.ether_type = spec->type;
1238                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1239                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1240                 eth.mask.ether_type = mask->type;
1241                 /* Remove unwanted bits from values. */
1242                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1243                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1244                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1245                 }
1246                 eth.val.ether_type &= eth.mask.ether_type;
1247         }
1248         mlx5_flow_create_copy(parser, &eth, eth_size);
1249         parser->allmulti = eth.val.dst_mac[0] & 1;
1250         return 0;
1251 }
1252
1253 /**
1254  * Convert VLAN item to Verbs specification.
1255  *
1256  * @param item[in]
1257  *   Item specification.
1258  * @param default_mask[in]
1259  *   Default bit-masks to use when item->mask is not provided.
1260  * @param data[in, out]
1261  *   User structure.
1262  */
1263 static int
1264 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1265                       const void *default_mask,
1266                       void *data)
1267 {
1268         const struct rte_flow_item_vlan *spec = item->spec;
1269         const struct rte_flow_item_vlan *mask = item->mask;
1270         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1271         struct ibv_flow_spec_eth *eth;
1272         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1273
1274         if (spec) {
1275                 unsigned int i;
1276                 if (!mask)
1277                         mask = default_mask;
1278
1279                 for (i = 0; i != hash_rxq_init_n; ++i) {
1280                         if (!parser->queue[i].ibv_attr)
1281                                 continue;
1282
1283                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1284                                        parser->queue[i].offset - eth_size);
1285                         eth->val.vlan_tag = spec->tci;
1286                         eth->mask.vlan_tag = mask->tci;
1287                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1288                 }
1289         }
1290         return 0;
1291 }
1292
1293 /**
1294  * Convert IPv4 item to Verbs specification.
1295  *
1296  * @param item[in]
1297  *   Item specification.
1298  * @param default_mask[in]
1299  *   Default bit-masks to use when item->mask is not provided.
1300  * @param data[in, out]
1301  *   User structure.
1302  */
1303 static int
1304 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1305                       const void *default_mask,
1306                       void *data)
1307 {
1308         const struct rte_flow_item_ipv4 *spec = item->spec;
1309         const struct rte_flow_item_ipv4 *mask = item->mask;
1310         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1311         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1312         struct ibv_flow_spec_ipv4_ext ipv4 = {
1313                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1314                 .size = ipv4_size,
1315         };
1316
1317         /* Don't update layer for the inner pattern. */
1318         if (!parser->inner)
1319                 parser->layer = HASH_RXQ_IPV4;
1320         if (spec) {
1321                 if (!mask)
1322                         mask = default_mask;
1323                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1324                         .src_ip = spec->hdr.src_addr,
1325                         .dst_ip = spec->hdr.dst_addr,
1326                         .proto = spec->hdr.next_proto_id,
1327                         .tos = spec->hdr.type_of_service,
1328                 };
1329                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1330                         .src_ip = mask->hdr.src_addr,
1331                         .dst_ip = mask->hdr.dst_addr,
1332                         .proto = mask->hdr.next_proto_id,
1333                         .tos = mask->hdr.type_of_service,
1334                 };
1335                 /* Remove unwanted bits from values. */
1336                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1337                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1338                 ipv4.val.proto &= ipv4.mask.proto;
1339                 ipv4.val.tos &= ipv4.mask.tos;
1340         }
1341         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1342         return 0;
1343 }
1344
1345 /**
1346  * Convert IPv6 item to Verbs specification.
1347  *
1348  * @param item[in]
1349  *   Item specification.
1350  * @param default_mask[in]
1351  *   Default bit-masks to use when item->mask is not provided.
1352  * @param data[in, out]
1353  *   User structure.
1354  */
1355 static int
1356 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1357                       const void *default_mask,
1358                       void *data)
1359 {
1360         const struct rte_flow_item_ipv6 *spec = item->spec;
1361         const struct rte_flow_item_ipv6 *mask = item->mask;
1362         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1363         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1364         struct ibv_flow_spec_ipv6 ipv6 = {
1365                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1366                 .size = ipv6_size,
1367         };
1368
1369         /* Don't update layer for the inner pattern. */
1370         if (!parser->inner)
1371                 parser->layer = HASH_RXQ_IPV6;
1372         if (spec) {
1373                 unsigned int i;
1374                 uint32_t vtc_flow_val;
1375                 uint32_t vtc_flow_mask;
1376
1377                 if (!mask)
1378                         mask = default_mask;
1379                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1380                        RTE_DIM(ipv6.val.src_ip));
1381                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1382                        RTE_DIM(ipv6.val.dst_ip));
1383                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1384                        RTE_DIM(ipv6.mask.src_ip));
1385                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1386                        RTE_DIM(ipv6.mask.dst_ip));
1387                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1388                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1389                 ipv6.val.flow_label =
1390                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1391                                          IPV6_HDR_FL_SHIFT);
1392                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1393                                          IPV6_HDR_TC_SHIFT;
1394                 ipv6.val.next_hdr = spec->hdr.proto;
1395                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1396                 ipv6.mask.flow_label =
1397                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1398                                          IPV6_HDR_FL_SHIFT);
1399                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1400                                           IPV6_HDR_TC_SHIFT;
1401                 ipv6.mask.next_hdr = mask->hdr.proto;
1402                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1403                 /* Remove unwanted bits from values. */
1404                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1405                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1406                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1407                 }
1408                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1409                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1410                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1411                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1412         }
1413         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1414         return 0;
1415 }
1416
1417 /**
1418  * Convert UDP item to Verbs specification.
1419  *
1420  * @param item[in]
1421  *   Item specification.
1422  * @param default_mask[in]
1423  *   Default bit-masks to use when item->mask is not provided.
1424  * @param data[in, out]
1425  *   User structure.
1426  */
1427 static int
1428 mlx5_flow_create_udp(const struct rte_flow_item *item,
1429                      const void *default_mask,
1430                      void *data)
1431 {
1432         const struct rte_flow_item_udp *spec = item->spec;
1433         const struct rte_flow_item_udp *mask = item->mask;
1434         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1435         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1436         struct ibv_flow_spec_tcp_udp udp = {
1437                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1438                 .size = udp_size,
1439         };
1440
1441         /* Don't update layer for the inner pattern. */
1442         if (!parser->inner) {
1443                 if (parser->layer == HASH_RXQ_IPV4)
1444                         parser->layer = HASH_RXQ_UDPV4;
1445                 else
1446                         parser->layer = HASH_RXQ_UDPV6;
1447         }
1448         if (spec) {
1449                 if (!mask)
1450                         mask = default_mask;
1451                 udp.val.dst_port = spec->hdr.dst_port;
1452                 udp.val.src_port = spec->hdr.src_port;
1453                 udp.mask.dst_port = mask->hdr.dst_port;
1454                 udp.mask.src_port = mask->hdr.src_port;
1455                 /* Remove unwanted bits from values. */
1456                 udp.val.src_port &= udp.mask.src_port;
1457                 udp.val.dst_port &= udp.mask.dst_port;
1458         }
1459         mlx5_flow_create_copy(parser, &udp, udp_size);
1460         return 0;
1461 }
1462
1463 /**
1464  * Convert TCP item to Verbs specification.
1465  *
1466  * @param item[in]
1467  *   Item specification.
1468  * @param default_mask[in]
1469  *   Default bit-masks to use when item->mask is not provided.
1470  * @param data[in, out]
1471  *   User structure.
1472  */
1473 static int
1474 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1475                      const void *default_mask,
1476                      void *data)
1477 {
1478         const struct rte_flow_item_tcp *spec = item->spec;
1479         const struct rte_flow_item_tcp *mask = item->mask;
1480         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1481         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1482         struct ibv_flow_spec_tcp_udp tcp = {
1483                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1484                 .size = tcp_size,
1485         };
1486
1487         /* Don't update layer for the inner pattern. */
1488         if (!parser->inner) {
1489                 if (parser->layer == HASH_RXQ_IPV4)
1490                         parser->layer = HASH_RXQ_TCPV4;
1491                 else
1492                         parser->layer = HASH_RXQ_TCPV6;
1493         }
1494         if (spec) {
1495                 if (!mask)
1496                         mask = default_mask;
1497                 tcp.val.dst_port = spec->hdr.dst_port;
1498                 tcp.val.src_port = spec->hdr.src_port;
1499                 tcp.mask.dst_port = mask->hdr.dst_port;
1500                 tcp.mask.src_port = mask->hdr.src_port;
1501                 /* Remove unwanted bits from values. */
1502                 tcp.val.src_port &= tcp.mask.src_port;
1503                 tcp.val.dst_port &= tcp.mask.dst_port;
1504         }
1505         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1506         return 0;
1507 }
1508
1509 /**
1510  * Convert VXLAN item to Verbs specification.
1511  *
1512  * @param item[in]
1513  *   Item specification.
1514  * @param default_mask[in]
1515  *   Default bit-masks to use when item->mask is not provided.
1516  * @param data[in, out]
1517  *   User structure.
1518  */
1519 static int
1520 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1521                        const void *default_mask,
1522                        void *data)
1523 {
1524         const struct rte_flow_item_vxlan *spec = item->spec;
1525         const struct rte_flow_item_vxlan *mask = item->mask;
1526         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1527         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1528         struct ibv_flow_spec_tunnel vxlan = {
1529                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1530                 .size = size,
1531         };
1532         union vni {
1533                 uint32_t vlan_id;
1534                 uint8_t vni[4];
1535         } id;
1536
1537         id.vni[0] = 0;
1538         parser->inner = IBV_FLOW_SPEC_INNER;
1539         if (spec) {
1540                 if (!mask)
1541                         mask = default_mask;
1542                 memcpy(&id.vni[1], spec->vni, 3);
1543                 vxlan.val.tunnel_id = id.vlan_id;
1544                 memcpy(&id.vni[1], mask->vni, 3);
1545                 vxlan.mask.tunnel_id = id.vlan_id;
1546                 /* Remove unwanted bits from values. */
1547                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1548         }
1549         /*
1550          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1551          * layer is defined in the Verbs specification it is interpreted as
1552          * wildcard and all packets will match this rule, if it follows a full
1553          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1554          * before will also match this rule.
1555          * To avoid such situation, VNI 0 is currently refused.
1556          */
1557         if (!vxlan.val.tunnel_id)
1558                 return EINVAL;
1559         mlx5_flow_create_copy(parser, &vxlan, size);
1560         return 0;
1561 }
1562
1563 /**
1564  * Convert mark/flag action to Verbs specification.
1565  *
1566  * @param parser
1567  *   Internal parser structure.
1568  * @param mark_id
1569  *   Mark identifier.
1570  */
1571 static int
1572 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1573 {
1574         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1575         struct ibv_flow_spec_action_tag tag = {
1576                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1577                 .size = size,
1578                 .tag_id = mlx5_flow_mark_set(mark_id),
1579         };
1580
1581         assert(parser->mark);
1582         mlx5_flow_create_copy(parser, &tag, size);
1583         return 0;
1584 }
1585
1586 /**
1587  * Convert count action to Verbs specification.
1588  *
1589  * @param priv
1590  *   Pointer to private structure.
1591  * @param parser
1592  *   Pointer to MLX5 flow parser structure.
1593  *
1594  * @return
1595  *   0 on success, errno value on failure.
1596  */
1597 static int
1598 mlx5_flow_create_count(struct priv *priv __rte_unused,
1599                        struct mlx5_flow_parse *parser __rte_unused)
1600 {
1601 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1602         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1603         struct ibv_counter_set_init_attr init_attr = {0};
1604         struct ibv_flow_spec_counter_action counter = {
1605                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1606                 .size = size,
1607                 .counter_set_handle = 0,
1608         };
1609
1610         init_attr.counter_set_id = 0;
1611         parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1612         if (!parser->cs)
1613                 return EINVAL;
1614         counter.counter_set_handle = parser->cs->handle;
1615         mlx5_flow_create_copy(parser, &counter, size);
1616 #endif
1617         return 0;
1618 }
1619
1620 /**
1621  * Complete flow rule creation with a drop queue.
1622  *
1623  * @param priv
1624  *   Pointer to private structure.
1625  * @param parser
1626  *   Internal parser structure.
1627  * @param flow
1628  *   Pointer to the rte_flow.
1629  * @param[out] error
1630  *   Perform verbose error reporting if not NULL.
1631  *
1632  * @return
1633  *   0 on success, errno value on failure.
1634  */
1635 static int
1636 priv_flow_create_action_queue_drop(struct priv *priv,
1637                                    struct mlx5_flow_parse *parser,
1638                                    struct rte_flow *flow,
1639                                    struct rte_flow_error *error)
1640 {
1641         struct ibv_flow_spec_action_drop *drop;
1642         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1643         int err = 0;
1644
1645         assert(priv->pd);
1646         assert(priv->ctx);
1647         flow->drop = 1;
1648         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1649                         parser->queue[HASH_RXQ_ETH].offset);
1650         *drop = (struct ibv_flow_spec_action_drop){
1651                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1652                         .size = size,
1653         };
1654         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1655         parser->queue[HASH_RXQ_ETH].offset += size;
1656         flow->frxq[HASH_RXQ_ETH].ibv_attr =
1657                 parser->queue[HASH_RXQ_ETH].ibv_attr;
1658         if (parser->count)
1659                 flow->cs = parser->cs;
1660         if (!priv->dev->data->dev_started)
1661                 return 0;
1662         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1663         flow->frxq[HASH_RXQ_ETH].ibv_flow =
1664                 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1665                                        flow->frxq[HASH_RXQ_ETH].ibv_attr);
1666         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1667                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1668                                    NULL, "flow rule creation failure");
1669                 err = ENOMEM;
1670                 goto error;
1671         }
1672         return 0;
1673 error:
1674         assert(flow);
1675         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1676                 claim_zero(mlx5_glue->destroy_flow
1677                            (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1678                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1679         }
1680         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1681                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1682                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1683         }
1684         if (flow->cs) {
1685                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1686                 flow->cs = NULL;
1687                 parser->cs = NULL;
1688         }
1689         return err;
1690 }
1691
1692 /**
1693  * Create hash Rx queues when RSS is enabled.
1694  *
1695  * @param priv
1696  *   Pointer to private structure.
1697  * @param parser
1698  *   Internal parser structure.
1699  * @param flow
1700  *   Pointer to the rte_flow.
1701  * @param[out] error
1702  *   Perform verbose error reporting if not NULL.
1703  *
1704  * @return
1705  *   0 on success, a errno value otherwise and rte_errno is set.
1706  */
1707 static int
1708 priv_flow_create_action_queue_rss(struct priv *priv,
1709                                   struct mlx5_flow_parse *parser,
1710                                   struct rte_flow *flow,
1711                                   struct rte_flow_error *error)
1712 {
1713         unsigned int i;
1714
1715         for (i = 0; i != hash_rxq_init_n; ++i) {
1716                 uint64_t hash_fields;
1717
1718                 if (!parser->queue[i].ibv_attr)
1719                         continue;
1720                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1721                 parser->queue[i].ibv_attr = NULL;
1722                 hash_fields = hash_rxq_init[i].hash_fields;
1723                 if (!priv->dev->data->dev_started)
1724                         continue;
1725                 flow->frxq[i].hrxq =
1726                         mlx5_priv_hrxq_get(priv,
1727                                            parser->rss_conf.rss_key,
1728                                            parser->rss_conf.rss_key_len,
1729                                            hash_fields,
1730                                            parser->queues,
1731                                            parser->queues_n);
1732                 if (flow->frxq[i].hrxq)
1733                         continue;
1734                 flow->frxq[i].hrxq =
1735                         mlx5_priv_hrxq_new(priv,
1736                                            parser->rss_conf.rss_key,
1737                                            parser->rss_conf.rss_key_len,
1738                                            hash_fields,
1739                                            parser->queues,
1740                                            parser->queues_n);
1741                 if (!flow->frxq[i].hrxq) {
1742                         rte_flow_error_set(error, ENOMEM,
1743                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1744                                            NULL, "cannot create hash rxq");
1745                         return ENOMEM;
1746                 }
1747         }
1748         return 0;
1749 }
1750
1751 /**
1752  * Complete flow rule creation.
1753  *
1754  * @param priv
1755  *   Pointer to private structure.
1756  * @param parser
1757  *   Internal parser structure.
1758  * @param flow
1759  *   Pointer to the rte_flow.
1760  * @param[out] error
1761  *   Perform verbose error reporting if not NULL.
1762  *
1763  * @return
1764  *   0 on success, a errno value otherwise and rte_errno is set.
1765  */
1766 static int
1767 priv_flow_create_action_queue(struct priv *priv,
1768                               struct mlx5_flow_parse *parser,
1769                               struct rte_flow *flow,
1770                               struct rte_flow_error *error)
1771 {
1772         int err = 0;
1773         unsigned int i;
1774
1775         assert(priv->pd);
1776         assert(priv->ctx);
1777         assert(!parser->drop);
1778         err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1779         if (err)
1780                 goto error;
1781         if (parser->count)
1782                 flow->cs = parser->cs;
1783         if (!priv->dev->data->dev_started)
1784                 return 0;
1785         for (i = 0; i != hash_rxq_init_n; ++i) {
1786                 if (!flow->frxq[i].hrxq)
1787                         continue;
1788                 flow->frxq[i].ibv_flow =
1789                         mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1790                                                flow->frxq[i].ibv_attr);
1791                 if (!flow->frxq[i].ibv_flow) {
1792                         rte_flow_error_set(error, ENOMEM,
1793                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1794                                            NULL, "flow rule creation failure");
1795                         err = ENOMEM;
1796                         goto error;
1797                 }
1798                 DEBUG("%p type %d QP %p ibv_flow %p",
1799                       (void *)flow, i,
1800                       (void *)flow->frxq[i].hrxq,
1801                       (void *)flow->frxq[i].ibv_flow);
1802         }
1803         for (i = 0; i != parser->queues_n; ++i) {
1804                 struct mlx5_rxq_data *q =
1805                         (*priv->rxqs)[parser->queues[i]];
1806
1807                 q->mark |= parser->mark;
1808         }
1809         return 0;
1810 error:
1811         assert(flow);
1812         for (i = 0; i != hash_rxq_init_n; ++i) {
1813                 if (flow->frxq[i].ibv_flow) {
1814                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1815
1816                         claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1817                 }
1818                 if (flow->frxq[i].hrxq)
1819                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1820                 if (flow->frxq[i].ibv_attr)
1821                         rte_free(flow->frxq[i].ibv_attr);
1822         }
1823         if (flow->cs) {
1824                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1825                 flow->cs = NULL;
1826                 parser->cs = NULL;
1827         }
1828         return err;
1829 }
1830
1831 /**
1832  * Convert a flow.
1833  *
1834  * @param priv
1835  *   Pointer to private structure.
1836  * @param list
1837  *   Pointer to a TAILQ flow list.
1838  * @param[in] attr
1839  *   Flow rule attributes.
1840  * @param[in] pattern
1841  *   Pattern specification (list terminated by the END pattern item).
1842  * @param[in] actions
1843  *   Associated actions (list terminated by the END action).
1844  * @param[out] error
1845  *   Perform verbose error reporting if not NULL.
1846  *
1847  * @return
1848  *   A flow on success, NULL otherwise.
1849  */
1850 static struct rte_flow *
1851 priv_flow_create(struct priv *priv,
1852                  struct mlx5_flows *list,
1853                  const struct rte_flow_attr *attr,
1854                  const struct rte_flow_item items[],
1855                  const struct rte_flow_action actions[],
1856                  struct rte_flow_error *error)
1857 {
1858         struct mlx5_flow_parse parser = { .create = 1, };
1859         struct rte_flow *flow = NULL;
1860         unsigned int i;
1861         int err;
1862
1863         err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1864         if (err)
1865                 goto exit;
1866         flow = rte_calloc(__func__, 1,
1867                           sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1868                           0);
1869         if (!flow) {
1870                 rte_flow_error_set(error, ENOMEM,
1871                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1872                                    NULL,
1873                                    "cannot allocate flow memory");
1874                 return NULL;
1875         }
1876         /* Copy queues configuration. */
1877         flow->queues = (uint16_t (*)[])(flow + 1);
1878         memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1879         flow->queues_n = parser.queues_n;
1880         flow->mark = parser.mark;
1881         /* Copy RSS configuration. */
1882         flow->rss_conf = parser.rss_conf;
1883         flow->rss_conf.rss_key = flow->rss_key;
1884         memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1885         /* finalise the flow. */
1886         if (parser.drop)
1887                 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1888                                                          error);
1889         else
1890                 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1891         if (err)
1892                 goto exit;
1893         TAILQ_INSERT_TAIL(list, flow, next);
1894         DEBUG("Flow created %p", (void *)flow);
1895         return flow;
1896 exit:
1897         for (i = 0; i != hash_rxq_init_n; ++i) {
1898                 if (parser.queue[i].ibv_attr)
1899                         rte_free(parser.queue[i].ibv_attr);
1900         }
1901         rte_free(flow);
1902         return NULL;
1903 }
1904
1905 /**
1906  * Validate a flow supported by the NIC.
1907  *
1908  * @see rte_flow_validate()
1909  * @see rte_flow_ops
1910  */
1911 int
1912 mlx5_flow_validate(struct rte_eth_dev *dev,
1913                    const struct rte_flow_attr *attr,
1914                    const struct rte_flow_item items[],
1915                    const struct rte_flow_action actions[],
1916                    struct rte_flow_error *error)
1917 {
1918         struct priv *priv = dev->data->dev_private;
1919         int ret;
1920         struct mlx5_flow_parse parser = { .create = 0, };
1921
1922         priv_lock(priv);
1923         ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1924         priv_unlock(priv);
1925         return ret;
1926 }
1927
1928 /**
1929  * Create a flow.
1930  *
1931  * @see rte_flow_create()
1932  * @see rte_flow_ops
1933  */
1934 struct rte_flow *
1935 mlx5_flow_create(struct rte_eth_dev *dev,
1936                  const struct rte_flow_attr *attr,
1937                  const struct rte_flow_item items[],
1938                  const struct rte_flow_action actions[],
1939                  struct rte_flow_error *error)
1940 {
1941         struct priv *priv = dev->data->dev_private;
1942         struct rte_flow *flow;
1943
1944         priv_lock(priv);
1945         flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1946                                 error);
1947         priv_unlock(priv);
1948         return flow;
1949 }
1950
1951 /**
1952  * Destroy a flow.
1953  *
1954  * @param priv
1955  *   Pointer to private structure.
1956  * @param list
1957  *   Pointer to a TAILQ flow list.
1958  * @param[in] flow
1959  *   Flow to destroy.
1960  */
1961 static void
1962 priv_flow_destroy(struct priv *priv,
1963                   struct mlx5_flows *list,
1964                   struct rte_flow *flow)
1965 {
1966         unsigned int i;
1967
1968         if (flow->drop || !flow->mark)
1969                 goto free;
1970         for (i = 0; i != flow->queues_n; ++i) {
1971                 struct rte_flow *tmp;
1972                 int mark = 0;
1973
1974                 /*
1975                  * To remove the mark from the queue, the queue must not be
1976                  * present in any other marked flow (RSS or not).
1977                  */
1978                 TAILQ_FOREACH(tmp, list, next) {
1979                         unsigned int j;
1980                         uint16_t *tqs = NULL;
1981                         uint16_t tq_n = 0;
1982
1983                         if (!tmp->mark)
1984                                 continue;
1985                         for (j = 0; j != hash_rxq_init_n; ++j) {
1986                                 if (!tmp->frxq[j].hrxq)
1987                                         continue;
1988                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
1989                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1990                         }
1991                         if (!tq_n)
1992                                 continue;
1993                         for (j = 0; (j != tq_n) && !mark; j++)
1994                                 if (tqs[j] == (*flow->queues)[i])
1995                                         mark = 1;
1996                 }
1997                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
1998         }
1999 free:
2000         if (flow->drop) {
2001                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2002                         claim_zero(mlx5_glue->destroy_flow
2003                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2004                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2005         } else {
2006                 for (i = 0; i != hash_rxq_init_n; ++i) {
2007                         struct mlx5_flow *frxq = &flow->frxq[i];
2008
2009                         if (frxq->ibv_flow)
2010                                 claim_zero(mlx5_glue->destroy_flow
2011                                            (frxq->ibv_flow));
2012                         if (frxq->hrxq)
2013                                 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2014                         if (frxq->ibv_attr)
2015                                 rte_free(frxq->ibv_attr);
2016                 }
2017         }
2018         if (flow->cs) {
2019                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2020                 flow->cs = NULL;
2021         }
2022         TAILQ_REMOVE(list, flow, next);
2023         DEBUG("Flow destroyed %p", (void *)flow);
2024         rte_free(flow);
2025 }
2026
2027 /**
2028  * Destroy all flows.
2029  *
2030  * @param priv
2031  *   Pointer to private structure.
2032  * @param list
2033  *   Pointer to a TAILQ flow list.
2034  */
2035 void
2036 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2037 {
2038         while (!TAILQ_EMPTY(list)) {
2039                 struct rte_flow *flow;
2040
2041                 flow = TAILQ_FIRST(list);
2042                 priv_flow_destroy(priv, list, flow);
2043         }
2044 }
2045
2046 /**
2047  * Create drop queue.
2048  *
2049  * @param priv
2050  *   Pointer to private structure.
2051  *
2052  * @return
2053  *   0 on success.
2054  */
2055 int
2056 priv_flow_create_drop_queue(struct priv *priv)
2057 {
2058         struct mlx5_hrxq_drop *fdq = NULL;
2059
2060         assert(priv->pd);
2061         assert(priv->ctx);
2062         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2063         if (!fdq) {
2064                 WARN("cannot allocate memory for drop queue");
2065                 goto error;
2066         }
2067         fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2068         if (!fdq->cq) {
2069                 WARN("cannot allocate CQ for drop queue");
2070                 goto error;
2071         }
2072         fdq->wq = mlx5_glue->create_wq
2073                 (priv->ctx,
2074                  &(struct ibv_wq_init_attr){
2075                         .wq_type = IBV_WQT_RQ,
2076                         .max_wr = 1,
2077                         .max_sge = 1,
2078                         .pd = priv->pd,
2079                         .cq = fdq->cq,
2080                  });
2081         if (!fdq->wq) {
2082                 WARN("cannot allocate WQ for drop queue");
2083                 goto error;
2084         }
2085         fdq->ind_table = mlx5_glue->create_rwq_ind_table
2086                 (priv->ctx,
2087                  &(struct ibv_rwq_ind_table_init_attr){
2088                         .log_ind_tbl_size = 0,
2089                         .ind_tbl = &fdq->wq,
2090                         .comp_mask = 0,
2091                  });
2092         if (!fdq->ind_table) {
2093                 WARN("cannot allocate indirection table for drop queue");
2094                 goto error;
2095         }
2096         fdq->qp = mlx5_glue->create_qp_ex
2097                 (priv->ctx,
2098                  &(struct ibv_qp_init_attr_ex){
2099                         .qp_type = IBV_QPT_RAW_PACKET,
2100                         .comp_mask =
2101                                 IBV_QP_INIT_ATTR_PD |
2102                                 IBV_QP_INIT_ATTR_IND_TABLE |
2103                                 IBV_QP_INIT_ATTR_RX_HASH,
2104                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2105                                 .rx_hash_function =
2106                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2107                                 .rx_hash_key_len = rss_hash_default_key_len,
2108                                 .rx_hash_key = rss_hash_default_key,
2109                                 .rx_hash_fields_mask = 0,
2110                                 },
2111                         .rwq_ind_tbl = fdq->ind_table,
2112                         .pd = priv->pd
2113                  });
2114         if (!fdq->qp) {
2115                 WARN("cannot allocate QP for drop queue");
2116                 goto error;
2117         }
2118         priv->flow_drop_queue = fdq;
2119         return 0;
2120 error:
2121         if (fdq->qp)
2122                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2123         if (fdq->ind_table)
2124                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2125         if (fdq->wq)
2126                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2127         if (fdq->cq)
2128                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2129         if (fdq)
2130                 rte_free(fdq);
2131         priv->flow_drop_queue = NULL;
2132         return -1;
2133 }
2134
2135 /**
2136  * Delete drop queue.
2137  *
2138  * @param priv
2139  *   Pointer to private structure.
2140  */
2141 void
2142 priv_flow_delete_drop_queue(struct priv *priv)
2143 {
2144         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2145
2146         if (!fdq)
2147                 return;
2148         if (fdq->qp)
2149                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2150         if (fdq->ind_table)
2151                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2152         if (fdq->wq)
2153                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2154         if (fdq->cq)
2155                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2156         rte_free(fdq);
2157         priv->flow_drop_queue = NULL;
2158 }
2159
2160 /**
2161  * Remove all flows.
2162  *
2163  * @param priv
2164  *   Pointer to private structure.
2165  * @param list
2166  *   Pointer to a TAILQ flow list.
2167  */
2168 void
2169 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2170 {
2171         struct rte_flow *flow;
2172
2173         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2174                 unsigned int i;
2175                 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2176
2177                 if (flow->drop) {
2178                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2179                                 continue;
2180                         claim_zero(mlx5_glue->destroy_flow
2181                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2182                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2183                         DEBUG("Flow %p removed", (void *)flow);
2184                         /* Next flow. */
2185                         continue;
2186                 }
2187                 /* Verify the flow has not already been cleaned. */
2188                 for (i = 0; i != hash_rxq_init_n; ++i) {
2189                         if (!flow->frxq[i].ibv_flow)
2190                                 continue;
2191                         /*
2192                          * Indirection table may be necessary to remove the
2193                          * flags in the Rx queues.
2194                          * This helps to speed-up the process by avoiding
2195                          * another loop.
2196                          */
2197                         ind_tbl = flow->frxq[i].hrxq->ind_table;
2198                         break;
2199                 }
2200                 if (i == hash_rxq_init_n)
2201                         return;
2202                 if (flow->mark) {
2203                         assert(ind_tbl);
2204                         for (i = 0; i != ind_tbl->queues_n; ++i)
2205                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2206                 }
2207                 for (i = 0; i != hash_rxq_init_n; ++i) {
2208                         if (!flow->frxq[i].ibv_flow)
2209                                 continue;
2210                         claim_zero(mlx5_glue->destroy_flow
2211                                    (flow->frxq[i].ibv_flow));
2212                         flow->frxq[i].ibv_flow = NULL;
2213                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2214                         flow->frxq[i].hrxq = NULL;
2215                 }
2216                 DEBUG("Flow %p removed", (void *)flow);
2217         }
2218 }
2219
2220 /**
2221  * Add all flows.
2222  *
2223  * @param priv
2224  *   Pointer to private structure.
2225  * @param list
2226  *   Pointer to a TAILQ flow list.
2227  *
2228  * @return
2229  *   0 on success, a errno value otherwise and rte_errno is set.
2230  */
2231 int
2232 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2233 {
2234         struct rte_flow *flow;
2235
2236         TAILQ_FOREACH(flow, list, next) {
2237                 unsigned int i;
2238
2239                 if (flow->drop) {
2240                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2241                                 mlx5_glue->create_flow
2242                                 (priv->flow_drop_queue->qp,
2243                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2244                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2245                                 DEBUG("Flow %p cannot be applied",
2246                                       (void *)flow);
2247                                 rte_errno = EINVAL;
2248                                 return rte_errno;
2249                         }
2250                         DEBUG("Flow %p applied", (void *)flow);
2251                         /* Next flow. */
2252                         continue;
2253                 }
2254                 for (i = 0; i != hash_rxq_init_n; ++i) {
2255                         if (!flow->frxq[i].ibv_attr)
2256                                 continue;
2257                         flow->frxq[i].hrxq =
2258                                 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2259                                                    flow->rss_conf.rss_key_len,
2260                                                    hash_rxq_init[i].hash_fields,
2261                                                    (*flow->queues),
2262                                                    flow->queues_n);
2263                         if (flow->frxq[i].hrxq)
2264                                 goto flow_create;
2265                         flow->frxq[i].hrxq =
2266                                 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2267                                                    flow->rss_conf.rss_key_len,
2268                                                    hash_rxq_init[i].hash_fields,
2269                                                    (*flow->queues),
2270                                                    flow->queues_n);
2271                         if (!flow->frxq[i].hrxq) {
2272                                 DEBUG("Flow %p cannot be applied",
2273                                       (void *)flow);
2274                                 rte_errno = EINVAL;
2275                                 return rte_errno;
2276                         }
2277 flow_create:
2278                         flow->frxq[i].ibv_flow =
2279                                 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2280                                                        flow->frxq[i].ibv_attr);
2281                         if (!flow->frxq[i].ibv_flow) {
2282                                 DEBUG("Flow %p cannot be applied",
2283                                       (void *)flow);
2284                                 rte_errno = EINVAL;
2285                                 return rte_errno;
2286                         }
2287                         DEBUG("Flow %p applied", (void *)flow);
2288                 }
2289                 if (!flow->mark)
2290                         continue;
2291                 for (i = 0; i != flow->queues_n; ++i)
2292                         (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2293         }
2294         return 0;
2295 }
2296
2297 /**
2298  * Verify the flow list is empty
2299  *
2300  * @param priv
2301  *  Pointer to private structure.
2302  *
2303  * @return the number of flows not released.
2304  */
2305 int
2306 priv_flow_verify(struct priv *priv)
2307 {
2308         struct rte_flow *flow;
2309         int ret = 0;
2310
2311         TAILQ_FOREACH(flow, &priv->flows, next) {
2312                 DEBUG("%p: flow %p still referenced", (void *)priv,
2313                       (void *)flow);
2314                 ++ret;
2315         }
2316         return ret;
2317 }
2318
2319 /**
2320  * Enable a control flow configured from the control plane.
2321  *
2322  * @param dev
2323  *   Pointer to Ethernet device.
2324  * @param eth_spec
2325  *   An Ethernet flow spec to apply.
2326  * @param eth_mask
2327  *   An Ethernet flow mask to apply.
2328  * @param vlan_spec
2329  *   A VLAN flow spec to apply.
2330  * @param vlan_mask
2331  *   A VLAN flow mask to apply.
2332  *
2333  * @return
2334  *   0 on success.
2335  */
2336 int
2337 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2338                     struct rte_flow_item_eth *eth_spec,
2339                     struct rte_flow_item_eth *eth_mask,
2340                     struct rte_flow_item_vlan *vlan_spec,
2341                     struct rte_flow_item_vlan *vlan_mask)
2342 {
2343         struct priv *priv = dev->data->dev_private;
2344         const struct rte_flow_attr attr = {
2345                 .ingress = 1,
2346                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2347         };
2348         struct rte_flow_item items[] = {
2349                 {
2350                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2351                         .spec = eth_spec,
2352                         .last = NULL,
2353                         .mask = eth_mask,
2354                 },
2355                 {
2356                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2357                                 RTE_FLOW_ITEM_TYPE_END,
2358                         .spec = vlan_spec,
2359                         .last = NULL,
2360                         .mask = vlan_mask,
2361                 },
2362                 {
2363                         .type = RTE_FLOW_ITEM_TYPE_END,
2364                 },
2365         };
2366         struct rte_flow_action actions[] = {
2367                 {
2368                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2369                 },
2370                 {
2371                         .type = RTE_FLOW_ACTION_TYPE_END,
2372                 },
2373         };
2374         struct rte_flow *flow;
2375         struct rte_flow_error error;
2376         unsigned int i;
2377         union {
2378                 struct rte_flow_action_rss rss;
2379                 struct {
2380                         const struct rte_eth_rss_conf *rss_conf;
2381                         uint16_t num;
2382                         uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2383                 } local;
2384         } action_rss;
2385
2386         if (!priv->reta_idx_n)
2387                 return EINVAL;
2388         for (i = 0; i != priv->reta_idx_n; ++i)
2389                 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2390         action_rss.local.rss_conf = &priv->rss_conf;
2391         action_rss.local.num = priv->reta_idx_n;
2392         actions[0].conf = (const void *)&action_rss.rss;
2393         flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2394                                 &error);
2395         if (!flow)
2396                 return rte_errno;
2397         return 0;
2398 }
2399
2400 /**
2401  * Enable a flow control configured from the control plane.
2402  *
2403  * @param dev
2404  *   Pointer to Ethernet device.
2405  * @param eth_spec
2406  *   An Ethernet flow spec to apply.
2407  * @param eth_mask
2408  *   An Ethernet flow mask to apply.
2409  *
2410  * @return
2411  *   0 on success.
2412  */
2413 int
2414 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2415                struct rte_flow_item_eth *eth_spec,
2416                struct rte_flow_item_eth *eth_mask)
2417 {
2418         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2419 }
2420
2421 /**
2422  * Destroy a flow.
2423  *
2424  * @see rte_flow_destroy()
2425  * @see rte_flow_ops
2426  */
2427 int
2428 mlx5_flow_destroy(struct rte_eth_dev *dev,
2429                   struct rte_flow *flow,
2430                   struct rte_flow_error *error)
2431 {
2432         struct priv *priv = dev->data->dev_private;
2433
2434         (void)error;
2435         priv_lock(priv);
2436         priv_flow_destroy(priv, &priv->flows, flow);
2437         priv_unlock(priv);
2438         return 0;
2439 }
2440
2441 /**
2442  * Destroy all flows.
2443  *
2444  * @see rte_flow_flush()
2445  * @see rte_flow_ops
2446  */
2447 int
2448 mlx5_flow_flush(struct rte_eth_dev *dev,
2449                 struct rte_flow_error *error)
2450 {
2451         struct priv *priv = dev->data->dev_private;
2452
2453         (void)error;
2454         priv_lock(priv);
2455         priv_flow_flush(priv, &priv->flows);
2456         priv_unlock(priv);
2457         return 0;
2458 }
2459
2460 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2461 /**
2462  * Query flow counter.
2463  *
2464  * @param cs
2465  *   the counter set.
2466  * @param counter_value
2467  *   returned data from the counter.
2468  *
2469  * @return
2470  *   0 on success, a errno value otherwise and rte_errno is set.
2471  */
2472 static int
2473 priv_flow_query_count(struct ibv_counter_set *cs,
2474                       struct mlx5_flow_counter_stats *counter_stats,
2475                       struct rte_flow_query_count *query_count,
2476                       struct rte_flow_error *error)
2477 {
2478         uint64_t counters[2];
2479         struct ibv_query_counter_set_attr query_cs_attr = {
2480                 .cs = cs,
2481                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2482         };
2483         struct ibv_counter_set_data query_out = {
2484                 .out = counters,
2485                 .outlen = 2 * sizeof(uint64_t),
2486         };
2487         int res = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2488
2489         if (res) {
2490                 rte_flow_error_set(error, -res,
2491                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2492                                    NULL,
2493                                    "cannot read counter");
2494                 return -res;
2495         }
2496         query_count->hits_set = 1;
2497         query_count->bytes_set = 1;
2498         query_count->hits = counters[0] - counter_stats->hits;
2499         query_count->bytes = counters[1] - counter_stats->bytes;
2500         if (query_count->reset) {
2501                 counter_stats->hits = counters[0];
2502                 counter_stats->bytes = counters[1];
2503         }
2504         return 0;
2505 }
2506
2507 /**
2508  * Query a flows.
2509  *
2510  * @see rte_flow_query()
2511  * @see rte_flow_ops
2512  */
2513 int
2514 mlx5_flow_query(struct rte_eth_dev *dev,
2515                 struct rte_flow *flow,
2516                 enum rte_flow_action_type action __rte_unused,
2517                 void *data,
2518                 struct rte_flow_error *error)
2519 {
2520         struct priv *priv = dev->data->dev_private;
2521         int res = EINVAL;
2522
2523         priv_lock(priv);
2524         if (flow->cs) {
2525                 res = priv_flow_query_count(flow->cs,
2526                                         &flow->counter_stats,
2527                                         (struct rte_flow_query_count *)data,
2528                                         error);
2529         } else {
2530                 rte_flow_error_set(error, res,
2531                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2532                                    NULL,
2533                                    "no counter found for flow");
2534         }
2535         priv_unlock(priv);
2536         return -res;
2537 }
2538 #endif
2539
2540 /**
2541  * Isolated mode.
2542  *
2543  * @see rte_flow_isolate()
2544  * @see rte_flow_ops
2545  */
2546 int
2547 mlx5_flow_isolate(struct rte_eth_dev *dev,
2548                   int enable,
2549                   struct rte_flow_error *error)
2550 {
2551         struct priv *priv = dev->data->dev_private;
2552
2553         priv_lock(priv);
2554         if (dev->data->dev_started) {
2555                 rte_flow_error_set(error, EBUSY,
2556                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2557                                    NULL,
2558                                    "port must be stopped first");
2559                 priv_unlock(priv);
2560                 return -rte_errno;
2561         }
2562         priv->isolated = !!enable;
2563         if (enable)
2564                 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2565         else
2566                 priv->dev->dev_ops = &mlx5_dev_ops;
2567         priv_unlock(priv);
2568         return 0;
2569 }
2570
2571 /**
2572  * Convert a flow director filter to a generic flow.
2573  *
2574  * @param priv
2575  *   Private structure.
2576  * @param fdir_filter
2577  *   Flow director filter to add.
2578  * @param attributes
2579  *   Generic flow parameters structure.
2580  *
2581  * @return
2582  *  0 on success, errno value on error.
2583  */
2584 static int
2585 priv_fdir_filter_convert(struct priv *priv,
2586                          const struct rte_eth_fdir_filter *fdir_filter,
2587                          struct mlx5_fdir *attributes)
2588 {
2589         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2590
2591         /* Validate queue number. */
2592         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2593                 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2594                 return EINVAL;
2595         }
2596         attributes->attr.ingress = 1;
2597         attributes->items[0] = (struct rte_flow_item) {
2598                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2599                 .spec = &attributes->l2,
2600                 .mask = &attributes->l2_mask,
2601         };
2602         switch (fdir_filter->action.behavior) {
2603         case RTE_ETH_FDIR_ACCEPT:
2604                 attributes->actions[0] = (struct rte_flow_action){
2605                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2606                         .conf = &attributes->queue,
2607                 };
2608                 break;
2609         case RTE_ETH_FDIR_REJECT:
2610                 attributes->actions[0] = (struct rte_flow_action){
2611                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2612                 };
2613                 break;
2614         default:
2615                 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2616                 return ENOTSUP;
2617         }
2618         attributes->queue.index = fdir_filter->action.rx_queue;
2619         switch (fdir_filter->input.flow_type) {
2620         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2621                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2622                         .src_addr = input->flow.udp4_flow.ip.src_ip,
2623                         .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2624                         .time_to_live = input->flow.udp4_flow.ip.ttl,
2625                         .type_of_service = input->flow.udp4_flow.ip.tos,
2626                         .next_proto_id = input->flow.udp4_flow.ip.proto,
2627                 };
2628                 attributes->l4.udp.hdr = (struct udp_hdr){
2629                         .src_port = input->flow.udp4_flow.src_port,
2630                         .dst_port = input->flow.udp4_flow.dst_port,
2631                 };
2632                 attributes->items[1] = (struct rte_flow_item){
2633                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2634                         .spec = &attributes->l3,
2635                         .mask = &attributes->l3,
2636                 };
2637                 attributes->items[2] = (struct rte_flow_item){
2638                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2639                         .spec = &attributes->l4,
2640                         .mask = &attributes->l4,
2641                 };
2642                 break;
2643         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2644                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2645                         .src_addr = input->flow.tcp4_flow.ip.src_ip,
2646                         .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2647                         .time_to_live = input->flow.tcp4_flow.ip.ttl,
2648                         .type_of_service = input->flow.tcp4_flow.ip.tos,
2649                         .next_proto_id = input->flow.tcp4_flow.ip.proto,
2650                 };
2651                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2652                         .src_port = input->flow.tcp4_flow.src_port,
2653                         .dst_port = input->flow.tcp4_flow.dst_port,
2654                 };
2655                 attributes->items[1] = (struct rte_flow_item){
2656                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2657                         .spec = &attributes->l3,
2658                         .mask = &attributes->l3,
2659                 };
2660                 attributes->items[2] = (struct rte_flow_item){
2661                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2662                         .spec = &attributes->l4,
2663                         .mask = &attributes->l4,
2664                 };
2665                 break;
2666         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2667                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2668                         .src_addr = input->flow.ip4_flow.src_ip,
2669                         .dst_addr = input->flow.ip4_flow.dst_ip,
2670                         .time_to_live = input->flow.ip4_flow.ttl,
2671                         .type_of_service = input->flow.ip4_flow.tos,
2672                         .next_proto_id = input->flow.ip4_flow.proto,
2673                 };
2674                 attributes->items[1] = (struct rte_flow_item){
2675                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2676                         .spec = &attributes->l3,
2677                         .mask = &attributes->l3,
2678                 };
2679                 break;
2680         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2681                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2682                         .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2683                         .proto = input->flow.udp6_flow.ip.proto,
2684                 };
2685                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2686                        input->flow.udp6_flow.ip.src_ip,
2687                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2688                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2689                        input->flow.udp6_flow.ip.dst_ip,
2690                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2691                 attributes->l4.udp.hdr = (struct udp_hdr){
2692                         .src_port = input->flow.udp6_flow.src_port,
2693                         .dst_port = input->flow.udp6_flow.dst_port,
2694                 };
2695                 attributes->items[1] = (struct rte_flow_item){
2696                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2697                         .spec = &attributes->l3,
2698                         .mask = &attributes->l3,
2699                 };
2700                 attributes->items[2] = (struct rte_flow_item){
2701                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2702                         .spec = &attributes->l4,
2703                         .mask = &attributes->l4,
2704                 };
2705                 break;
2706         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2707                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2708                         .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2709                         .proto = input->flow.tcp6_flow.ip.proto,
2710                 };
2711                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2712                        input->flow.tcp6_flow.ip.src_ip,
2713                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2714                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2715                        input->flow.tcp6_flow.ip.dst_ip,
2716                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2717                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2718                         .src_port = input->flow.tcp6_flow.src_port,
2719                         .dst_port = input->flow.tcp6_flow.dst_port,
2720                 };
2721                 attributes->items[1] = (struct rte_flow_item){
2722                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2723                         .spec = &attributes->l3,
2724                         .mask = &attributes->l3,
2725                 };
2726                 attributes->items[2] = (struct rte_flow_item){
2727                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2728                         .spec = &attributes->l4,
2729                         .mask = &attributes->l4,
2730                 };
2731                 break;
2732         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2733                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2734                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2735                         .proto = input->flow.ipv6_flow.proto,
2736                 };
2737                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2738                        input->flow.ipv6_flow.src_ip,
2739                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2740                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2741                        input->flow.ipv6_flow.dst_ip,
2742                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2743                 attributes->items[1] = (struct rte_flow_item){
2744                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2745                         .spec = &attributes->l3,
2746                         .mask = &attributes->l3,
2747                 };
2748                 break;
2749         default:
2750                 ERROR("invalid flow type%d",
2751                       fdir_filter->input.flow_type);
2752                 return ENOTSUP;
2753         }
2754         return 0;
2755 }
2756
2757 /**
2758  * Add new flow director filter and store it in list.
2759  *
2760  * @param priv
2761  *   Private structure.
2762  * @param fdir_filter
2763  *   Flow director filter to add.
2764  *
2765  * @return
2766  *   0 on success, errno value on failure.
2767  */
2768 static int
2769 priv_fdir_filter_add(struct priv *priv,
2770                      const struct rte_eth_fdir_filter *fdir_filter)
2771 {
2772         struct mlx5_fdir attributes = {
2773                 .attr.group = 0,
2774                 .l2_mask = {
2775                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2776                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2777                         .type = 0,
2778                 },
2779         };
2780         struct mlx5_flow_parse parser = {
2781                 .layer = HASH_RXQ_ETH,
2782         };
2783         struct rte_flow_error error;
2784         struct rte_flow *flow;
2785         int ret;
2786
2787         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2788         if (ret)
2789                 return -ret;
2790         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2791                                 attributes.actions, &error, &parser);
2792         if (ret)
2793                 return -ret;
2794         flow = priv_flow_create(priv,
2795                                 &priv->flows,
2796                                 &attributes.attr,
2797                                 attributes.items,
2798                                 attributes.actions,
2799                                 &error);
2800         if (flow) {
2801                 DEBUG("FDIR created %p", (void *)flow);
2802                 return 0;
2803         }
2804         return ENOTSUP;
2805 }
2806
2807 /**
2808  * Delete specific filter.
2809  *
2810  * @param priv
2811  *   Private structure.
2812  * @param fdir_filter
2813  *   Filter to be deleted.
2814  *
2815  * @return
2816  *   0 on success, errno value on failure.
2817  */
2818 static int
2819 priv_fdir_filter_delete(struct priv *priv,
2820                         const struct rte_eth_fdir_filter *fdir_filter)
2821 {
2822         struct mlx5_fdir attributes = {
2823                 .attr.group = 0,
2824         };
2825         struct mlx5_flow_parse parser = {
2826                 .create = 1,
2827                 .layer = HASH_RXQ_ETH,
2828         };
2829         struct rte_flow_error error;
2830         struct rte_flow *flow;
2831         unsigned int i;
2832         int ret;
2833
2834         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2835         if (ret)
2836                 return -ret;
2837         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2838                                 attributes.actions, &error, &parser);
2839         if (ret)
2840                 goto exit;
2841         /*
2842          * Special case for drop action which is only set in the
2843          * specifications when the flow is created.  In this situation the
2844          * drop specification is missing.
2845          */
2846         if (parser.drop) {
2847                 struct ibv_flow_spec_action_drop *drop;
2848
2849                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2850                                 parser.queue[HASH_RXQ_ETH].offset);
2851                 *drop = (struct ibv_flow_spec_action_drop){
2852                         .type = IBV_FLOW_SPEC_ACTION_DROP,
2853                         .size = sizeof(struct ibv_flow_spec_action_drop),
2854                 };
2855                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2856         }
2857         TAILQ_FOREACH(flow, &priv->flows, next) {
2858                 struct ibv_flow_attr *attr;
2859                 struct ibv_spec_header *attr_h;
2860                 void *spec;
2861                 struct ibv_flow_attr *flow_attr;
2862                 struct ibv_spec_header *flow_h;
2863                 void *flow_spec;
2864                 unsigned int specs_n;
2865
2866                 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2867                 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2868                 /* Compare first the attributes. */
2869                 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2870                         continue;
2871                 if (attr->num_of_specs == 0)
2872                         continue;
2873                 spec = (void *)((uintptr_t)attr +
2874                                 sizeof(struct ibv_flow_attr));
2875                 flow_spec = (void *)((uintptr_t)flow_attr +
2876                                      sizeof(struct ibv_flow_attr));
2877                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2878                 for (i = 0; i != specs_n; ++i) {
2879                         attr_h = spec;
2880                         flow_h = flow_spec;
2881                         if (memcmp(spec, flow_spec,
2882                                    RTE_MIN(attr_h->size, flow_h->size)))
2883                                 goto wrong_flow;
2884                         spec = (void *)((uintptr_t)spec + attr_h->size);
2885                         flow_spec = (void *)((uintptr_t)flow_spec +
2886                                              flow_h->size);
2887                 }
2888                 /* At this point, the flow match. */
2889                 break;
2890 wrong_flow:
2891                 /* The flow does not match. */
2892                 continue;
2893         }
2894         if (flow)
2895                 priv_flow_destroy(priv, &priv->flows, flow);
2896 exit:
2897         for (i = 0; i != hash_rxq_init_n; ++i) {
2898                 if (parser.queue[i].ibv_attr)
2899                         rte_free(parser.queue[i].ibv_attr);
2900         }
2901         return -ret;
2902 }
2903
2904 /**
2905  * Update queue for specific filter.
2906  *
2907  * @param priv
2908  *   Private structure.
2909  * @param fdir_filter
2910  *   Filter to be updated.
2911  *
2912  * @return
2913  *   0 on success, errno value on failure.
2914  */
2915 static int
2916 priv_fdir_filter_update(struct priv *priv,
2917                         const struct rte_eth_fdir_filter *fdir_filter)
2918 {
2919         int ret;
2920
2921         ret = priv_fdir_filter_delete(priv, fdir_filter);
2922         if (ret)
2923                 return ret;
2924         ret = priv_fdir_filter_add(priv, fdir_filter);
2925         return ret;
2926 }
2927
2928 /**
2929  * Flush all filters.
2930  *
2931  * @param priv
2932  *   Private structure.
2933  */
2934 static void
2935 priv_fdir_filter_flush(struct priv *priv)
2936 {
2937         priv_flow_flush(priv, &priv->flows);
2938 }
2939
2940 /**
2941  * Get flow director information.
2942  *
2943  * @param priv
2944  *   Private structure.
2945  * @param[out] fdir_info
2946  *   Resulting flow director information.
2947  */
2948 static void
2949 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2950 {
2951         struct rte_eth_fdir_masks *mask =
2952                 &priv->dev->data->dev_conf.fdir_conf.mask;
2953
2954         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2955         fdir_info->guarant_spc = 0;
2956         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2957         fdir_info->max_flexpayload = 0;
2958         fdir_info->flow_types_mask[0] = 0;
2959         fdir_info->flex_payload_unit = 0;
2960         fdir_info->max_flex_payload_segment_num = 0;
2961         fdir_info->flex_payload_limit = 0;
2962         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2963 }
2964
2965 /**
2966  * Deal with flow director operations.
2967  *
2968  * @param priv
2969  *   Pointer to private structure.
2970  * @param filter_op
2971  *   Operation to perform.
2972  * @param arg
2973  *   Pointer to operation-specific structure.
2974  *
2975  * @return
2976  *   0 on success, errno value on failure.
2977  */
2978 static int
2979 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2980 {
2981         enum rte_fdir_mode fdir_mode =
2982                 priv->dev->data->dev_conf.fdir_conf.mode;
2983         int ret = 0;
2984
2985         if (filter_op == RTE_ETH_FILTER_NOP)
2986                 return 0;
2987         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2988             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2989                 ERROR("%p: flow director mode %d not supported",
2990                       (void *)priv, fdir_mode);
2991                 return EINVAL;
2992         }
2993         switch (filter_op) {
2994         case RTE_ETH_FILTER_ADD:
2995                 ret = priv_fdir_filter_add(priv, arg);
2996                 break;
2997         case RTE_ETH_FILTER_UPDATE:
2998                 ret = priv_fdir_filter_update(priv, arg);
2999                 break;
3000         case RTE_ETH_FILTER_DELETE:
3001                 ret = priv_fdir_filter_delete(priv, arg);
3002                 break;
3003         case RTE_ETH_FILTER_FLUSH:
3004                 priv_fdir_filter_flush(priv);
3005                 break;
3006         case RTE_ETH_FILTER_INFO:
3007                 priv_fdir_info_get(priv, arg);
3008                 break;
3009         default:
3010                 DEBUG("%p: unknown operation %u", (void *)priv,
3011                       filter_op);
3012                 ret = EINVAL;
3013                 break;
3014         }
3015         return ret;
3016 }
3017
3018 /**
3019  * Manage filter operations.
3020  *
3021  * @param dev
3022  *   Pointer to Ethernet device structure.
3023  * @param filter_type
3024  *   Filter type.
3025  * @param filter_op
3026  *   Operation to perform.
3027  * @param arg
3028  *   Pointer to operation-specific structure.
3029  *
3030  * @return
3031  *   0 on success, negative errno value on failure.
3032  */
3033 int
3034 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3035                      enum rte_filter_type filter_type,
3036                      enum rte_filter_op filter_op,
3037                      void *arg)
3038 {
3039         int ret = EINVAL;
3040         struct priv *priv = dev->data->dev_private;
3041
3042         switch (filter_type) {
3043         case RTE_ETH_FILTER_GENERIC:
3044                 if (filter_op != RTE_ETH_FILTER_GET)
3045                         return -EINVAL;
3046                 *(const void **)arg = &mlx5_flow_ops;
3047                 return 0;
3048         case RTE_ETH_FILTER_FDIR:
3049                 priv_lock(priv);
3050                 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3051                 priv_unlock(priv);
3052                 break;
3053         default:
3054                 ERROR("%p: filter type (%d) not supported",
3055                       (void *)dev, filter_type);
3056                 break;
3057         }
3058         return -ret;
3059 }