net/mlx5: fix flow RSS configuration
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox.
4  */
5
6 #include <sys/queue.h>
7 #include <string.h>
8
9 /* Verbs header. */
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
11 #ifdef PEDANTIC
12 #pragma GCC diagnostic ignored "-Wpedantic"
13 #endif
14 #include <infiniband/verbs.h>
15 #ifdef PEDANTIC
16 #pragma GCC diagnostic error "-Wpedantic"
17 #endif
18
19 #include <rte_ethdev_driver.h>
20 #include <rte_flow.h>
21 #include <rte_flow_driver.h>
22 #include <rte_malloc.h>
23 #include <rte_ip.h>
24
25 #include "mlx5.h"
26 #include "mlx5_defs.h"
27 #include "mlx5_prm.h"
28 #include "mlx5_glue.h"
29
30 /* Define minimal priority for control plane flows. */
31 #define MLX5_CTRL_FLOW_PRIORITY 4
32
33 /* Internet Protocol versions. */
34 #define MLX5_IPV4 4
35 #define MLX5_IPV6 6
36
37 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
38 struct ibv_flow_spec_counter_action {
39         int dummy;
40 };
41 #endif
42
43 /* Dev ops structure defined in mlx5.c */
44 extern const struct eth_dev_ops mlx5_dev_ops;
45 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
46
47 static int
48 mlx5_flow_create_eth(const struct rte_flow_item *item,
49                      const void *default_mask,
50                      void *data);
51
52 static int
53 mlx5_flow_create_vlan(const struct rte_flow_item *item,
54                       const void *default_mask,
55                       void *data);
56
57 static int
58 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
59                       const void *default_mask,
60                       void *data);
61
62 static int
63 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
64                       const void *default_mask,
65                       void *data);
66
67 static int
68 mlx5_flow_create_udp(const struct rte_flow_item *item,
69                      const void *default_mask,
70                      void *data);
71
72 static int
73 mlx5_flow_create_tcp(const struct rte_flow_item *item,
74                      const void *default_mask,
75                      void *data);
76
77 static int
78 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
79                        const void *default_mask,
80                        void *data);
81
82 struct mlx5_flow_parse;
83
84 static void
85 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
86                       unsigned int size);
87
88 static int
89 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
90
91 static int
92 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
93
94 /* Hash RX queue types. */
95 enum hash_rxq_type {
96         HASH_RXQ_TCPV4,
97         HASH_RXQ_UDPV4,
98         HASH_RXQ_IPV4,
99         HASH_RXQ_TCPV6,
100         HASH_RXQ_UDPV6,
101         HASH_RXQ_IPV6,
102         HASH_RXQ_ETH,
103 };
104
105 /* Initialization data for hash RX queue. */
106 struct hash_rxq_init {
107         uint64_t hash_fields; /* Fields that participate in the hash. */
108         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
109         unsigned int flow_priority; /* Flow priority to use. */
110         unsigned int ip_version; /* Internet protocol. */
111 };
112
113 /* Initialization data for hash RX queues. */
114 const struct hash_rxq_init hash_rxq_init[] = {
115         [HASH_RXQ_TCPV4] = {
116                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
117                                 IBV_RX_HASH_DST_IPV4 |
118                                 IBV_RX_HASH_SRC_PORT_TCP |
119                                 IBV_RX_HASH_DST_PORT_TCP),
120                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
121                 .flow_priority = 0,
122                 .ip_version = MLX5_IPV4,
123         },
124         [HASH_RXQ_UDPV4] = {
125                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
126                                 IBV_RX_HASH_DST_IPV4 |
127                                 IBV_RX_HASH_SRC_PORT_UDP |
128                                 IBV_RX_HASH_DST_PORT_UDP),
129                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
130                 .flow_priority = 0,
131                 .ip_version = MLX5_IPV4,
132         },
133         [HASH_RXQ_IPV4] = {
134                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135                                 IBV_RX_HASH_DST_IPV4),
136                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
137                                 ETH_RSS_FRAG_IPV4),
138                 .flow_priority = 1,
139                 .ip_version = MLX5_IPV4,
140         },
141         [HASH_RXQ_TCPV6] = {
142                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
143                                 IBV_RX_HASH_DST_IPV6 |
144                                 IBV_RX_HASH_SRC_PORT_TCP |
145                                 IBV_RX_HASH_DST_PORT_TCP),
146                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
147                 .flow_priority = 0,
148                 .ip_version = MLX5_IPV6,
149         },
150         [HASH_RXQ_UDPV6] = {
151                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
152                                 IBV_RX_HASH_DST_IPV6 |
153                                 IBV_RX_HASH_SRC_PORT_UDP |
154                                 IBV_RX_HASH_DST_PORT_UDP),
155                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
156                 .flow_priority = 0,
157                 .ip_version = MLX5_IPV6,
158         },
159         [HASH_RXQ_IPV6] = {
160                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161                                 IBV_RX_HASH_DST_IPV6),
162                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
163                                 ETH_RSS_FRAG_IPV6),
164                 .flow_priority = 1,
165                 .ip_version = MLX5_IPV6,
166         },
167         [HASH_RXQ_ETH] = {
168                 .hash_fields = 0,
169                 .dpdk_rss_hf = 0,
170                 .flow_priority = 2,
171         },
172 };
173
174 /* Number of entries in hash_rxq_init[]. */
175 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
176
177 /** Structure for holding counter stats. */
178 struct mlx5_flow_counter_stats {
179         uint64_t hits; /**< Number of packets matched by the rule. */
180         uint64_t bytes; /**< Number of bytes matched by the rule. */
181 };
182
183 /** Structure for Drop queue. */
184 struct mlx5_hrxq_drop {
185         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
186         struct ibv_qp *qp; /**< Verbs queue pair. */
187         struct ibv_wq *wq; /**< Verbs work queue. */
188         struct ibv_cq *cq; /**< Verbs completion queue. */
189 };
190
191 /* Flows structures. */
192 struct mlx5_flow {
193         uint64_t hash_fields; /**< Fields that participate in the hash. */
194         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
195         struct ibv_flow *ibv_flow; /**< Verbs flow. */
196         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
197 };
198
199 /* Drop flows structures. */
200 struct mlx5_flow_drop {
201         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
202         struct ibv_flow *ibv_flow; /**< Verbs flow. */
203 };
204
205 struct rte_flow {
206         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
207         uint32_t mark:1; /**< Set if the flow is marked. */
208         uint32_t drop:1; /**< Drop queue. */
209         uint16_t queues_n; /**< Number of entries in queue[]. */
210         uint16_t (*queues)[]; /**< Queues indexes to use. */
211         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
212         uint8_t rss_key[40]; /**< copy of the RSS key. */
213         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
214         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
215         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
216         /**< Flow with Rx queue. */
217 };
218
219 /** Static initializer for items. */
220 #define ITEMS(...) \
221         (const enum rte_flow_item_type []){ \
222                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
223         }
224
225 /** Structure to generate a simple graph of layers supported by the NIC. */
226 struct mlx5_flow_items {
227         /** List of possible actions for these items. */
228         const enum rte_flow_action_type *const actions;
229         /** Bit-masks corresponding to the possibilities for the item. */
230         const void *mask;
231         /**
232          * Default bit-masks to use when item->mask is not provided. When
233          * \default_mask is also NULL, the full supported bit-mask (\mask) is
234          * used instead.
235          */
236         const void *default_mask;
237         /** Bit-masks size in bytes. */
238         const unsigned int mask_sz;
239         /**
240          * Conversion function from rte_flow to NIC specific flow.
241          *
242          * @param item
243          *   rte_flow item to convert.
244          * @param default_mask
245          *   Default bit-masks to use when item->mask is not provided.
246          * @param data
247          *   Internal structure to store the conversion.
248          *
249          * @return
250          *   0 on success, negative value otherwise.
251          */
252         int (*convert)(const struct rte_flow_item *item,
253                        const void *default_mask,
254                        void *data);
255         /** Size in bytes of the destination structure. */
256         const unsigned int dst_sz;
257         /** List of possible following items.  */
258         const enum rte_flow_item_type *const items;
259 };
260
261 /** Valid action for this PMD. */
262 static const enum rte_flow_action_type valid_actions[] = {
263         RTE_FLOW_ACTION_TYPE_DROP,
264         RTE_FLOW_ACTION_TYPE_QUEUE,
265         RTE_FLOW_ACTION_TYPE_MARK,
266         RTE_FLOW_ACTION_TYPE_FLAG,
267 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
268         RTE_FLOW_ACTION_TYPE_COUNT,
269 #endif
270         RTE_FLOW_ACTION_TYPE_END,
271 };
272
273 /** Graph of supported items and associated actions. */
274 static const struct mlx5_flow_items mlx5_flow_items[] = {
275         [RTE_FLOW_ITEM_TYPE_END] = {
276                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
277                                RTE_FLOW_ITEM_TYPE_VXLAN),
278         },
279         [RTE_FLOW_ITEM_TYPE_ETH] = {
280                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
281                                RTE_FLOW_ITEM_TYPE_IPV4,
282                                RTE_FLOW_ITEM_TYPE_IPV6),
283                 .actions = valid_actions,
284                 .mask = &(const struct rte_flow_item_eth){
285                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
286                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
287                         .type = -1,
288                 },
289                 .default_mask = &rte_flow_item_eth_mask,
290                 .mask_sz = sizeof(struct rte_flow_item_eth),
291                 .convert = mlx5_flow_create_eth,
292                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
293         },
294         [RTE_FLOW_ITEM_TYPE_VLAN] = {
295                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
296                                RTE_FLOW_ITEM_TYPE_IPV6),
297                 .actions = valid_actions,
298                 .mask = &(const struct rte_flow_item_vlan){
299                         .tci = -1,
300                 },
301                 .default_mask = &rte_flow_item_vlan_mask,
302                 .mask_sz = sizeof(struct rte_flow_item_vlan),
303                 .convert = mlx5_flow_create_vlan,
304                 .dst_sz = 0,
305         },
306         [RTE_FLOW_ITEM_TYPE_IPV4] = {
307                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
308                                RTE_FLOW_ITEM_TYPE_TCP),
309                 .actions = valid_actions,
310                 .mask = &(const struct rte_flow_item_ipv4){
311                         .hdr = {
312                                 .src_addr = -1,
313                                 .dst_addr = -1,
314                                 .type_of_service = -1,
315                                 .next_proto_id = -1,
316                                 .time_to_live = -1,
317                         },
318                 },
319                 .default_mask = &rte_flow_item_ipv4_mask,
320                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
321                 .convert = mlx5_flow_create_ipv4,
322                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
323         },
324         [RTE_FLOW_ITEM_TYPE_IPV6] = {
325                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
326                                RTE_FLOW_ITEM_TYPE_TCP),
327                 .actions = valid_actions,
328                 .mask = &(const struct rte_flow_item_ipv6){
329                         .hdr = {
330                                 .src_addr = {
331                                         0xff, 0xff, 0xff, 0xff,
332                                         0xff, 0xff, 0xff, 0xff,
333                                         0xff, 0xff, 0xff, 0xff,
334                                         0xff, 0xff, 0xff, 0xff,
335                                 },
336                                 .dst_addr = {
337                                         0xff, 0xff, 0xff, 0xff,
338                                         0xff, 0xff, 0xff, 0xff,
339                                         0xff, 0xff, 0xff, 0xff,
340                                         0xff, 0xff, 0xff, 0xff,
341                                 },
342                                 .vtc_flow = -1,
343                                 .proto = -1,
344                                 .hop_limits = -1,
345                         },
346                 },
347                 .default_mask = &rte_flow_item_ipv6_mask,
348                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
349                 .convert = mlx5_flow_create_ipv6,
350                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
351         },
352         [RTE_FLOW_ITEM_TYPE_UDP] = {
353                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
354                 .actions = valid_actions,
355                 .mask = &(const struct rte_flow_item_udp){
356                         .hdr = {
357                                 .src_port = -1,
358                                 .dst_port = -1,
359                         },
360                 },
361                 .default_mask = &rte_flow_item_udp_mask,
362                 .mask_sz = sizeof(struct rte_flow_item_udp),
363                 .convert = mlx5_flow_create_udp,
364                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
365         },
366         [RTE_FLOW_ITEM_TYPE_TCP] = {
367                 .actions = valid_actions,
368                 .mask = &(const struct rte_flow_item_tcp){
369                         .hdr = {
370                                 .src_port = -1,
371                                 .dst_port = -1,
372                         },
373                 },
374                 .default_mask = &rte_flow_item_tcp_mask,
375                 .mask_sz = sizeof(struct rte_flow_item_tcp),
376                 .convert = mlx5_flow_create_tcp,
377                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
378         },
379         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
380                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
381                 .actions = valid_actions,
382                 .mask = &(const struct rte_flow_item_vxlan){
383                         .vni = "\xff\xff\xff",
384                 },
385                 .default_mask = &rte_flow_item_vxlan_mask,
386                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
387                 .convert = mlx5_flow_create_vxlan,
388                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
389         },
390 };
391
392 /** Structure to pass to the conversion function. */
393 struct mlx5_flow_parse {
394         uint32_t inner; /**< Set once VXLAN is encountered. */
395         uint32_t allmulti:1; /**< Set once allmulti dst MAC is encountered. */
396         uint32_t create:1;
397         /**< Whether resources should remain after a validate. */
398         uint32_t drop:1; /**< Target is a drop queue. */
399         uint32_t mark:1; /**< Mark is present in the flow. */
400         uint32_t count:1; /**< Count is present in the flow. */
401         uint32_t mark_id; /**< Mark identifier. */
402         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
403         uint16_t queues_n; /**< Number of entries in queue[]. */
404         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
405         uint8_t rss_key[40]; /**< copy of the RSS key. */
406         enum hash_rxq_type layer; /**< Last pattern layer detected. */
407         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
408         struct {
409                 struct ibv_flow_attr *ibv_attr;
410                 /**< Pointer to Verbs attributes. */
411                 unsigned int offset;
412                 /**< Current position or total size of the attribute. */
413         } queue[RTE_DIM(hash_rxq_init)];
414 };
415
416 static const struct rte_flow_ops mlx5_flow_ops = {
417         .validate = mlx5_flow_validate,
418         .create = mlx5_flow_create,
419         .destroy = mlx5_flow_destroy,
420         .flush = mlx5_flow_flush,
421 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
422         .query = mlx5_flow_query,
423 #else
424         .query = NULL,
425 #endif
426         .isolate = mlx5_flow_isolate,
427 };
428
429 /* Convert FDIR request to Generic flow. */
430 struct mlx5_fdir {
431         struct rte_flow_attr attr;
432         struct rte_flow_action actions[2];
433         struct rte_flow_item items[4];
434         struct rte_flow_item_eth l2;
435         struct rte_flow_item_eth l2_mask;
436         union {
437                 struct rte_flow_item_ipv4 ipv4;
438                 struct rte_flow_item_ipv6 ipv6;
439         } l3;
440         union {
441                 struct rte_flow_item_udp udp;
442                 struct rte_flow_item_tcp tcp;
443         } l4;
444         struct rte_flow_action_queue queue;
445 };
446
447 /* Verbs specification header. */
448 struct ibv_spec_header {
449         enum ibv_flow_spec_type type;
450         uint16_t size;
451 };
452
453 /**
454  * Check support for a given item.
455  *
456  * @param item[in]
457  *   Item specification.
458  * @param mask[in]
459  *   Bit-masks covering supported fields to compare with spec, last and mask in
460  *   \item.
461  * @param size
462  *   Bit-Mask size in bytes.
463  *
464  * @return
465  *   0 on success.
466  */
467 static int
468 mlx5_flow_item_validate(const struct rte_flow_item *item,
469                         const uint8_t *mask, unsigned int size)
470 {
471         int ret = 0;
472
473         if (!item->spec && (item->mask || item->last))
474                 return -1;
475         if (item->spec && !item->mask) {
476                 unsigned int i;
477                 const uint8_t *spec = item->spec;
478
479                 for (i = 0; i < size; ++i)
480                         if ((spec[i] | mask[i]) != mask[i])
481                                 return -1;
482         }
483         if (item->last && !item->mask) {
484                 unsigned int i;
485                 const uint8_t *spec = item->last;
486
487                 for (i = 0; i < size; ++i)
488                         if ((spec[i] | mask[i]) != mask[i])
489                                 return -1;
490         }
491         if (item->mask) {
492                 unsigned int i;
493                 const uint8_t *spec = item->spec;
494
495                 for (i = 0; i < size; ++i)
496                         if ((spec[i] | mask[i]) != mask[i])
497                                 return -1;
498         }
499         if (item->spec && item->last) {
500                 uint8_t spec[size];
501                 uint8_t last[size];
502                 const uint8_t *apply = mask;
503                 unsigned int i;
504
505                 if (item->mask)
506                         apply = item->mask;
507                 for (i = 0; i < size; ++i) {
508                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
509                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
510                 }
511                 ret = memcmp(spec, last, size);
512         }
513         return ret;
514 }
515
516 /**
517  * Copy the RSS configuration from the user ones, of the rss_conf is null,
518  * uses the driver one.
519  *
520  * @param priv
521  *   Pointer to private structure.
522  * @param parser
523  *   Internal parser structure.
524  * @param rss_conf
525  *   User RSS configuration to save.
526  *
527  * @return
528  *   0 on success, errno value on failure.
529  */
530 static int
531 priv_flow_convert_rss_conf(struct priv *priv,
532                            struct mlx5_flow_parse *parser,
533                            const struct rte_eth_rss_conf *rss_conf)
534 {
535         /*
536          * This function is also called at the beginning of
537          * priv_flow_convert_actions() to initialize the parser with the
538          * device default RSS configuration.
539          */
540         (void)priv;
541         if (rss_conf) {
542                 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
543                         return EINVAL;
544                 if (rss_conf->rss_key_len != 40)
545                         return EINVAL;
546                 if (rss_conf->rss_key_len && rss_conf->rss_key) {
547                         parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
548                         memcpy(parser->rss_key, rss_conf->rss_key,
549                                rss_conf->rss_key_len);
550                         parser->rss_conf.rss_key = parser->rss_key;
551                 }
552                 parser->rss_conf.rss_hf = rss_conf->rss_hf;
553         }
554         return 0;
555 }
556
557 /**
558  * Extract attribute to the parser.
559  *
560  * @param priv
561  *   Pointer to private structure.
562  * @param[in] attr
563  *   Flow rule attributes.
564  * @param[out] error
565  *   Perform verbose error reporting if not NULL.
566  * @param[in, out] parser
567  *   Internal parser structure.
568  *
569  * @return
570  *   0 on success, a negative errno value otherwise and rte_errno is set.
571  */
572 static int
573 priv_flow_convert_attributes(struct priv *priv,
574                              const struct rte_flow_attr *attr,
575                              struct rte_flow_error *error,
576                              struct mlx5_flow_parse *parser)
577 {
578         (void)priv;
579         (void)parser;
580         if (attr->group) {
581                 rte_flow_error_set(error, ENOTSUP,
582                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
583                                    NULL,
584                                    "groups are not supported");
585                 return -rte_errno;
586         }
587         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
588                 rte_flow_error_set(error, ENOTSUP,
589                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
590                                    NULL,
591                                    "priorities are not supported");
592                 return -rte_errno;
593         }
594         if (attr->egress) {
595                 rte_flow_error_set(error, ENOTSUP,
596                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
597                                    NULL,
598                                    "egress is not supported");
599                 return -rte_errno;
600         }
601         if (!attr->ingress) {
602                 rte_flow_error_set(error, ENOTSUP,
603                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
604                                    NULL,
605                                    "only ingress is supported");
606                 return -rte_errno;
607         }
608         return 0;
609 }
610
611 /**
612  * Extract actions request to the parser.
613  *
614  * @param priv
615  *   Pointer to private structure.
616  * @param[in] actions
617  *   Associated actions (list terminated by the END action).
618  * @param[out] error
619  *   Perform verbose error reporting if not NULL.
620  * @param[in, out] parser
621  *   Internal parser structure.
622  *
623  * @return
624  *   0 on success, a negative errno value otherwise and rte_errno is set.
625  */
626 static int
627 priv_flow_convert_actions(struct priv *priv,
628                           const struct rte_flow_action actions[],
629                           struct rte_flow_error *error,
630                           struct mlx5_flow_parse *parser)
631 {
632         /*
633          * Add default RSS configuration necessary for Verbs to create QP even
634          * if no RSS is necessary.
635          */
636         priv_flow_convert_rss_conf(priv, parser,
637                                    (const struct rte_eth_rss_conf *)
638                                    &priv->rss_conf);
639         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
640                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
641                         continue;
642                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
643                         parser->drop = 1;
644                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
645                         const struct rte_flow_action_queue *queue =
646                                 (const struct rte_flow_action_queue *)
647                                 actions->conf;
648                         uint16_t n;
649                         uint16_t found = 0;
650
651                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
652                                 goto exit_action_not_supported;
653                         for (n = 0; n < parser->queues_n; ++n) {
654                                 if (parser->queues[n] == queue->index) {
655                                         found = 1;
656                                         break;
657                                 }
658                         }
659                         if (parser->queues_n > 1 && !found) {
660                                 rte_flow_error_set(error, ENOTSUP,
661                                            RTE_FLOW_ERROR_TYPE_ACTION,
662                                            actions,
663                                            "queue action not in RSS queues");
664                                 return -rte_errno;
665                         }
666                         if (!found) {
667                                 parser->queues_n = 1;
668                                 parser->queues[0] = queue->index;
669                         }
670                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
671                         const struct rte_flow_action_rss *rss =
672                                 (const struct rte_flow_action_rss *)
673                                 actions->conf;
674                         uint16_t n;
675
676                         if (!rss || !rss->num) {
677                                 rte_flow_error_set(error, EINVAL,
678                                                    RTE_FLOW_ERROR_TYPE_ACTION,
679                                                    actions,
680                                                    "no valid queues");
681                                 return -rte_errno;
682                         }
683                         if (parser->queues_n == 1) {
684                                 uint16_t found = 0;
685
686                                 assert(parser->queues_n);
687                                 for (n = 0; n < rss->num; ++n) {
688                                         if (parser->queues[0] ==
689                                             rss->queue[n]) {
690                                                 found = 1;
691                                                 break;
692                                         }
693                                 }
694                                 if (!found) {
695                                         rte_flow_error_set(error, ENOTSUP,
696                                                    RTE_FLOW_ERROR_TYPE_ACTION,
697                                                    actions,
698                                                    "queue action not in RSS"
699                                                    " queues");
700                                         return -rte_errno;
701                                 }
702                         }
703                         for (n = 0; n < rss->num; ++n) {
704                                 if (rss->queue[n] >= priv->rxqs_n) {
705                                         rte_flow_error_set(error, EINVAL,
706                                                    RTE_FLOW_ERROR_TYPE_ACTION,
707                                                    actions,
708                                                    "queue id > number of"
709                                                    " queues");
710                                         return -rte_errno;
711                                 }
712                         }
713                         for (n = 0; n < rss->num; ++n)
714                                 parser->queues[n] = rss->queue[n];
715                         parser->queues_n = rss->num;
716                         if (priv_flow_convert_rss_conf(priv, parser,
717                                                        rss->rss_conf)) {
718                                 rte_flow_error_set(error, EINVAL,
719                                                    RTE_FLOW_ERROR_TYPE_ACTION,
720                                                    actions,
721                                                    "wrong RSS configuration");
722                                 return -rte_errno;
723                         }
724                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
725                         const struct rte_flow_action_mark *mark =
726                                 (const struct rte_flow_action_mark *)
727                                 actions->conf;
728
729                         if (!mark) {
730                                 rte_flow_error_set(error, EINVAL,
731                                                    RTE_FLOW_ERROR_TYPE_ACTION,
732                                                    actions,
733                                                    "mark must be defined");
734                                 return -rte_errno;
735                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
736                                 rte_flow_error_set(error, ENOTSUP,
737                                                    RTE_FLOW_ERROR_TYPE_ACTION,
738                                                    actions,
739                                                    "mark must be between 0"
740                                                    " and 16777199");
741                                 return -rte_errno;
742                         }
743                         parser->mark = 1;
744                         parser->mark_id = mark->id;
745                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
746                         parser->mark = 1;
747                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
748                            priv->config.flow_counter_en) {
749                         parser->count = 1;
750                 } else {
751                         goto exit_action_not_supported;
752                 }
753         }
754         if (parser->drop && parser->mark)
755                 parser->mark = 0;
756         if (!parser->queues_n && !parser->drop) {
757                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
758                                    NULL, "no valid action");
759                 return -rte_errno;
760         }
761         return 0;
762 exit_action_not_supported:
763         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
764                            actions, "action not supported");
765         return -rte_errno;
766 }
767
768 /**
769  * Validate items.
770  *
771  * @param priv
772  *   Pointer to private structure.
773  * @param[in] items
774  *   Pattern specification (list terminated by the END pattern item).
775  * @param[out] error
776  *   Perform verbose error reporting if not NULL.
777  * @param[in, out] parser
778  *   Internal parser structure.
779  *
780  * @return
781  *   0 on success, a negative errno value otherwise and rte_errno is set.
782  */
783 static int
784 priv_flow_convert_items_validate(struct priv *priv,
785                                  const struct rte_flow_item items[],
786                                  struct rte_flow_error *error,
787                                  struct mlx5_flow_parse *parser)
788 {
789         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
790         unsigned int i;
791
792         (void)priv;
793         /* Initialise the offsets to start after verbs attribute. */
794         for (i = 0; i != hash_rxq_init_n; ++i)
795                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
796         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
797                 const struct mlx5_flow_items *token = NULL;
798                 unsigned int n;
799                 int err;
800
801                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
802                         continue;
803                 for (i = 0;
804                      cur_item->items &&
805                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
806                      ++i) {
807                         if (cur_item->items[i] == items->type) {
808                                 token = &mlx5_flow_items[items->type];
809                                 break;
810                         }
811                 }
812                 if (!token)
813                         goto exit_item_not_supported;
814                 cur_item = token;
815                 err = mlx5_flow_item_validate(items,
816                                               (const uint8_t *)cur_item->mask,
817                                               cur_item->mask_sz);
818                 if (err)
819                         goto exit_item_not_supported;
820                 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
821                         if (parser->inner) {
822                                 rte_flow_error_set(error, ENOTSUP,
823                                                    RTE_FLOW_ERROR_TYPE_ITEM,
824                                                    items,
825                                                    "cannot recognize multiple"
826                                                    " VXLAN encapsulations");
827                                 return -rte_errno;
828                         }
829                         parser->inner = IBV_FLOW_SPEC_INNER;
830                 }
831                 if (parser->drop) {
832                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
833                 } else {
834                         for (n = 0; n != hash_rxq_init_n; ++n)
835                                 parser->queue[n].offset += cur_item->dst_sz;
836                 }
837         }
838         if (parser->drop) {
839                 parser->queue[HASH_RXQ_ETH].offset +=
840                         sizeof(struct ibv_flow_spec_action_drop);
841         }
842         if (parser->mark) {
843                 for (i = 0; i != hash_rxq_init_n; ++i)
844                         parser->queue[i].offset +=
845                                 sizeof(struct ibv_flow_spec_action_tag);
846         }
847         if (parser->count) {
848                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
849
850                 for (i = 0; i != hash_rxq_init_n; ++i)
851                         parser->queue[i].offset += size;
852         }
853         return 0;
854 exit_item_not_supported:
855         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
856                            items, "item not supported");
857         return -rte_errno;
858 }
859
860 /**
861  * Allocate memory space to store verbs flow attributes.
862  *
863  * @param priv
864  *   Pointer to private structure.
865  * @param[in] priority
866  *   Flow priority.
867  * @param[in] size
868  *   Amount of byte to allocate.
869  * @param[out] error
870  *   Perform verbose error reporting if not NULL.
871  *
872  * @return
873  *   A verbs flow attribute on success, NULL otherwise.
874  */
875 static struct ibv_flow_attr*
876 priv_flow_convert_allocate(struct priv *priv,
877                            unsigned int priority,
878                            unsigned int size,
879                            struct rte_flow_error *error)
880 {
881         struct ibv_flow_attr *ibv_attr;
882
883         (void)priv;
884         ibv_attr = rte_calloc(__func__, 1, size, 0);
885         if (!ibv_attr) {
886                 rte_flow_error_set(error, ENOMEM,
887                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
888                                    NULL,
889                                    "cannot allocate verbs spec attributes.");
890                 return NULL;
891         }
892         ibv_attr->priority = priority;
893         return ibv_attr;
894 }
895
896 /**
897  * Finalise verbs flow attributes.
898  *
899  * @param priv
900  *   Pointer to private structure.
901  * @param[in, out] parser
902  *   Internal parser structure.
903  */
904 static void
905 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
906 {
907         const unsigned int ipv4 =
908                 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
909         const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
910         const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
911         const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
912         const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
913         const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
914         unsigned int i;
915
916         (void)priv;
917         if (parser->layer == HASH_RXQ_ETH) {
918                 goto fill;
919         } else {
920                 /*
921                  * This layer becomes useless as the pattern define under
922                  * layers.
923                  */
924                 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
925                 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
926         }
927         /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
928         for (i = ohmin; i != (ohmax + 1); ++i) {
929                 if (!parser->queue[i].ibv_attr)
930                         continue;
931                 rte_free(parser->queue[i].ibv_attr);
932                 parser->queue[i].ibv_attr = NULL;
933         }
934         /* Remove impossible flow according to the RSS configuration. */
935         if (hash_rxq_init[parser->layer].dpdk_rss_hf &
936             parser->rss_conf.rss_hf) {
937                 /* Remove any other flow. */
938                 for (i = hmin; i != (hmax + 1); ++i) {
939                         if ((i == parser->layer) ||
940                              (!parser->queue[i].ibv_attr))
941                                 continue;
942                         rte_free(parser->queue[i].ibv_attr);
943                         parser->queue[i].ibv_attr = NULL;
944                 }
945         } else  if (!parser->queue[ip].ibv_attr) {
946                 /* no RSS possible with the current configuration. */
947                 parser->queues_n = 1;
948                 return;
949         }
950 fill:
951         /*
952          * Fill missing layers in verbs specifications, or compute the correct
953          * offset to allocate the memory space for the attributes and
954          * specifications.
955          */
956         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
957                 union {
958                         struct ibv_flow_spec_ipv4_ext ipv4;
959                         struct ibv_flow_spec_ipv6 ipv6;
960                         struct ibv_flow_spec_tcp_udp udp_tcp;
961                 } specs;
962                 void *dst;
963                 uint16_t size;
964
965                 if (i == parser->layer)
966                         continue;
967                 if (parser->layer == HASH_RXQ_ETH) {
968                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
969                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
970                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
971                                         .type = IBV_FLOW_SPEC_IPV4_EXT,
972                                         .size = size,
973                                 };
974                         } else {
975                                 size = sizeof(struct ibv_flow_spec_ipv6);
976                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
977                                         .type = IBV_FLOW_SPEC_IPV6,
978                                         .size = size,
979                                 };
980                         }
981                         if (parser->queue[i].ibv_attr) {
982                                 dst = (void *)((uintptr_t)
983                                                parser->queue[i].ibv_attr +
984                                                parser->queue[i].offset);
985                                 memcpy(dst, &specs, size);
986                                 ++parser->queue[i].ibv_attr->num_of_specs;
987                         }
988                         parser->queue[i].offset += size;
989                 }
990                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
991                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
992                         size = sizeof(struct ibv_flow_spec_tcp_udp);
993                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
994                                 .type = ((i == HASH_RXQ_UDPV4 ||
995                                           i == HASH_RXQ_UDPV6) ?
996                                          IBV_FLOW_SPEC_UDP :
997                                          IBV_FLOW_SPEC_TCP),
998                                 .size = size,
999                         };
1000                         if (parser->queue[i].ibv_attr) {
1001                                 dst = (void *)((uintptr_t)
1002                                                parser->queue[i].ibv_attr +
1003                                                parser->queue[i].offset);
1004                                 memcpy(dst, &specs, size);
1005                                 ++parser->queue[i].ibv_attr->num_of_specs;
1006                         }
1007                         parser->queue[i].offset += size;
1008                 }
1009         }
1010 }
1011
1012 /**
1013  * Validate and convert a flow supported by the NIC.
1014  *
1015  * @param priv
1016  *   Pointer to private structure.
1017  * @param[in] attr
1018  *   Flow rule attributes.
1019  * @param[in] pattern
1020  *   Pattern specification (list terminated by the END pattern item).
1021  * @param[in] actions
1022  *   Associated actions (list terminated by the END action).
1023  * @param[out] error
1024  *   Perform verbose error reporting if not NULL.
1025  * @param[in, out] parser
1026  *   Internal parser structure.
1027  *
1028  * @return
1029  *   0 on success, a negative errno value otherwise and rte_errno is set.
1030  */
1031 static int
1032 priv_flow_convert(struct priv *priv,
1033                   const struct rte_flow_attr *attr,
1034                   const struct rte_flow_item items[],
1035                   const struct rte_flow_action actions[],
1036                   struct rte_flow_error *error,
1037                   struct mlx5_flow_parse *parser)
1038 {
1039         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1040         unsigned int i;
1041         int ret;
1042
1043         /* First step. Validate the attributes, items and actions. */
1044         *parser = (struct mlx5_flow_parse){
1045                 .create = parser->create,
1046                 .layer = HASH_RXQ_ETH,
1047                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1048         };
1049         ret = priv_flow_convert_attributes(priv, attr, error, parser);
1050         if (ret)
1051                 return ret;
1052         ret = priv_flow_convert_actions(priv, actions, error, parser);
1053         if (ret)
1054                 return ret;
1055         ret = priv_flow_convert_items_validate(priv, items, error, parser);
1056         if (ret)
1057                 return ret;
1058         priv_flow_convert_finalise(priv, parser);
1059         /*
1060          * Second step.
1061          * Allocate the memory space to store verbs specifications.
1062          */
1063         if (parser->drop) {
1064                 unsigned int priority =
1065                         attr->priority +
1066                         hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1067                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1068
1069                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1070                         priv_flow_convert_allocate(priv, priority,
1071                                                    offset, error);
1072                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1073                         return ENOMEM;
1074                 parser->queue[HASH_RXQ_ETH].offset =
1075                         sizeof(struct ibv_flow_attr);
1076         } else {
1077                 for (i = 0; i != hash_rxq_init_n; ++i) {
1078                         unsigned int priority =
1079                                 attr->priority +
1080                                 hash_rxq_init[i].flow_priority;
1081                         unsigned int offset;
1082
1083                         if (!(parser->rss_conf.rss_hf &
1084                               hash_rxq_init[i].dpdk_rss_hf) &&
1085                             (i != HASH_RXQ_ETH))
1086                                 continue;
1087                         offset = parser->queue[i].offset;
1088                         parser->queue[i].ibv_attr =
1089                                 priv_flow_convert_allocate(priv, priority,
1090                                                            offset, error);
1091                         if (!parser->queue[i].ibv_attr)
1092                                 goto exit_enomem;
1093                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1094                 }
1095         }
1096         /* Third step. Conversion parse, fill the specifications. */
1097         parser->inner = 0;
1098         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1099                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1100                         continue;
1101                 cur_item = &mlx5_flow_items[items->type];
1102                 ret = cur_item->convert(items,
1103                                         (cur_item->default_mask ?
1104                                          cur_item->default_mask :
1105                                          cur_item->mask),
1106                                         parser);
1107                 if (ret) {
1108                         rte_flow_error_set(error, ret,
1109                                            RTE_FLOW_ERROR_TYPE_ITEM,
1110                                            items, "item not supported");
1111                         goto exit_free;
1112                 }
1113         }
1114         if (parser->mark)
1115                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1116         if (parser->count && parser->create) {
1117                 mlx5_flow_create_count(priv, parser);
1118                 if (!parser->cs)
1119                         goto exit_count_error;
1120         }
1121         /*
1122          * Last step. Complete missing specification to reach the RSS
1123          * configuration.
1124          */
1125         if (!parser->drop) {
1126                 priv_flow_convert_finalise(priv, parser);
1127         } else {
1128                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1129                         attr->priority +
1130                         hash_rxq_init[parser->layer].flow_priority;
1131         }
1132         if (parser->allmulti &&
1133             parser->layer == HASH_RXQ_ETH) {
1134                 for (i = 0; i != hash_rxq_init_n; ++i) {
1135                         if (!parser->queue[i].ibv_attr)
1136                                 continue;
1137                         if (parser->queue[i].ibv_attr->num_of_specs != 1)
1138                                 break;
1139                         parser->queue[i].ibv_attr->type =
1140                                                 IBV_FLOW_ATTR_MC_DEFAULT;
1141                 }
1142         }
1143 exit_free:
1144         /* Only verification is expected, all resources should be released. */
1145         if (!parser->create) {
1146                 for (i = 0; i != hash_rxq_init_n; ++i) {
1147                         if (parser->queue[i].ibv_attr) {
1148                                 rte_free(parser->queue[i].ibv_attr);
1149                                 parser->queue[i].ibv_attr = NULL;
1150                         }
1151                 }
1152         }
1153         return ret;
1154 exit_enomem:
1155         for (i = 0; i != hash_rxq_init_n; ++i) {
1156                 if (parser->queue[i].ibv_attr) {
1157                         rte_free(parser->queue[i].ibv_attr);
1158                         parser->queue[i].ibv_attr = NULL;
1159                 }
1160         }
1161         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1162                            NULL, "cannot allocate verbs spec attributes.");
1163         return ret;
1164 exit_count_error:
1165         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1166                            NULL, "cannot create counter.");
1167         return rte_errno;
1168 }
1169
1170 /**
1171  * Copy the specification created into the flow.
1172  *
1173  * @param parser
1174  *   Internal parser structure.
1175  * @param src
1176  *   Create specification.
1177  * @param size
1178  *   Size in bytes of the specification to copy.
1179  */
1180 static void
1181 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1182                       unsigned int size)
1183 {
1184         unsigned int i;
1185         void *dst;
1186
1187         for (i = 0; i != hash_rxq_init_n; ++i) {
1188                 if (!parser->queue[i].ibv_attr)
1189                         continue;
1190                 /* Specification must be the same l3 type or none. */
1191                 if (parser->layer == HASH_RXQ_ETH ||
1192                     (hash_rxq_init[parser->layer].ip_version ==
1193                      hash_rxq_init[i].ip_version) ||
1194                     (hash_rxq_init[i].ip_version == 0)) {
1195                         dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1196                                         parser->queue[i].offset);
1197                         memcpy(dst, src, size);
1198                         ++parser->queue[i].ibv_attr->num_of_specs;
1199                         parser->queue[i].offset += size;
1200                 }
1201         }
1202 }
1203
1204 /**
1205  * Convert Ethernet item to Verbs specification.
1206  *
1207  * @param item[in]
1208  *   Item specification.
1209  * @param default_mask[in]
1210  *   Default bit-masks to use when item->mask is not provided.
1211  * @param data[in, out]
1212  *   User structure.
1213  */
1214 static int
1215 mlx5_flow_create_eth(const struct rte_flow_item *item,
1216                      const void *default_mask,
1217                      void *data)
1218 {
1219         const struct rte_flow_item_eth *spec = item->spec;
1220         const struct rte_flow_item_eth *mask = item->mask;
1221         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1222         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1223         struct ibv_flow_spec_eth eth = {
1224                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1225                 .size = eth_size,
1226         };
1227
1228         /* Don't update layer for the inner pattern. */
1229         if (!parser->inner)
1230                 parser->layer = HASH_RXQ_ETH;
1231         if (spec) {
1232                 unsigned int i;
1233
1234                 if (!mask)
1235                         mask = default_mask;
1236                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1237                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1238                 eth.val.ether_type = spec->type;
1239                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1240                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1241                 eth.mask.ether_type = mask->type;
1242                 /* Remove unwanted bits from values. */
1243                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1244                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1245                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1246                 }
1247                 eth.val.ether_type &= eth.mask.ether_type;
1248         }
1249         mlx5_flow_create_copy(parser, &eth, eth_size);
1250         parser->allmulti = eth.val.dst_mac[0] & 1;
1251         return 0;
1252 }
1253
1254 /**
1255  * Convert VLAN item to Verbs specification.
1256  *
1257  * @param item[in]
1258  *   Item specification.
1259  * @param default_mask[in]
1260  *   Default bit-masks to use when item->mask is not provided.
1261  * @param data[in, out]
1262  *   User structure.
1263  */
1264 static int
1265 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1266                       const void *default_mask,
1267                       void *data)
1268 {
1269         const struct rte_flow_item_vlan *spec = item->spec;
1270         const struct rte_flow_item_vlan *mask = item->mask;
1271         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1272         struct ibv_flow_spec_eth *eth;
1273         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1274
1275         if (spec) {
1276                 unsigned int i;
1277                 if (!mask)
1278                         mask = default_mask;
1279
1280                 for (i = 0; i != hash_rxq_init_n; ++i) {
1281                         if (!parser->queue[i].ibv_attr)
1282                                 continue;
1283
1284                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1285                                        parser->queue[i].offset - eth_size);
1286                         eth->val.vlan_tag = spec->tci;
1287                         eth->mask.vlan_tag = mask->tci;
1288                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1289                 }
1290         }
1291         return 0;
1292 }
1293
1294 /**
1295  * Convert IPv4 item to Verbs specification.
1296  *
1297  * @param item[in]
1298  *   Item specification.
1299  * @param default_mask[in]
1300  *   Default bit-masks to use when item->mask is not provided.
1301  * @param data[in, out]
1302  *   User structure.
1303  */
1304 static int
1305 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1306                       const void *default_mask,
1307                       void *data)
1308 {
1309         const struct rte_flow_item_ipv4 *spec = item->spec;
1310         const struct rte_flow_item_ipv4 *mask = item->mask;
1311         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1312         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1313         struct ibv_flow_spec_ipv4_ext ipv4 = {
1314                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1315                 .size = ipv4_size,
1316         };
1317
1318         /* Don't update layer for the inner pattern. */
1319         if (!parser->inner)
1320                 parser->layer = HASH_RXQ_IPV4;
1321         if (spec) {
1322                 if (!mask)
1323                         mask = default_mask;
1324                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1325                         .src_ip = spec->hdr.src_addr,
1326                         .dst_ip = spec->hdr.dst_addr,
1327                         .proto = spec->hdr.next_proto_id,
1328                         .tos = spec->hdr.type_of_service,
1329                 };
1330                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1331                         .src_ip = mask->hdr.src_addr,
1332                         .dst_ip = mask->hdr.dst_addr,
1333                         .proto = mask->hdr.next_proto_id,
1334                         .tos = mask->hdr.type_of_service,
1335                 };
1336                 /* Remove unwanted bits from values. */
1337                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1338                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1339                 ipv4.val.proto &= ipv4.mask.proto;
1340                 ipv4.val.tos &= ipv4.mask.tos;
1341         }
1342         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1343         return 0;
1344 }
1345
1346 /**
1347  * Convert IPv6 item to Verbs specification.
1348  *
1349  * @param item[in]
1350  *   Item specification.
1351  * @param default_mask[in]
1352  *   Default bit-masks to use when item->mask is not provided.
1353  * @param data[in, out]
1354  *   User structure.
1355  */
1356 static int
1357 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1358                       const void *default_mask,
1359                       void *data)
1360 {
1361         const struct rte_flow_item_ipv6 *spec = item->spec;
1362         const struct rte_flow_item_ipv6 *mask = item->mask;
1363         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1364         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1365         struct ibv_flow_spec_ipv6 ipv6 = {
1366                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1367                 .size = ipv6_size,
1368         };
1369
1370         /* Don't update layer for the inner pattern. */
1371         if (!parser->inner)
1372                 parser->layer = HASH_RXQ_IPV6;
1373         if (spec) {
1374                 unsigned int i;
1375                 uint32_t vtc_flow_val;
1376                 uint32_t vtc_flow_mask;
1377
1378                 if (!mask)
1379                         mask = default_mask;
1380                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1381                        RTE_DIM(ipv6.val.src_ip));
1382                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1383                        RTE_DIM(ipv6.val.dst_ip));
1384                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1385                        RTE_DIM(ipv6.mask.src_ip));
1386                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1387                        RTE_DIM(ipv6.mask.dst_ip));
1388                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1389                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1390                 ipv6.val.flow_label =
1391                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1392                                          IPV6_HDR_FL_SHIFT);
1393                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1394                                          IPV6_HDR_TC_SHIFT;
1395                 ipv6.val.next_hdr = spec->hdr.proto;
1396                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1397                 ipv6.mask.flow_label =
1398                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1399                                          IPV6_HDR_FL_SHIFT);
1400                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1401                                           IPV6_HDR_TC_SHIFT;
1402                 ipv6.mask.next_hdr = mask->hdr.proto;
1403                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1404                 /* Remove unwanted bits from values. */
1405                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1406                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1407                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1408                 }
1409                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1410                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1411                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1412                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1413         }
1414         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1415         return 0;
1416 }
1417
1418 /**
1419  * Convert UDP item to Verbs specification.
1420  *
1421  * @param item[in]
1422  *   Item specification.
1423  * @param default_mask[in]
1424  *   Default bit-masks to use when item->mask is not provided.
1425  * @param data[in, out]
1426  *   User structure.
1427  */
1428 static int
1429 mlx5_flow_create_udp(const struct rte_flow_item *item,
1430                      const void *default_mask,
1431                      void *data)
1432 {
1433         const struct rte_flow_item_udp *spec = item->spec;
1434         const struct rte_flow_item_udp *mask = item->mask;
1435         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1436         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1437         struct ibv_flow_spec_tcp_udp udp = {
1438                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1439                 .size = udp_size,
1440         };
1441
1442         /* Don't update layer for the inner pattern. */
1443         if (!parser->inner) {
1444                 if (parser->layer == HASH_RXQ_IPV4)
1445                         parser->layer = HASH_RXQ_UDPV4;
1446                 else
1447                         parser->layer = HASH_RXQ_UDPV6;
1448         }
1449         if (spec) {
1450                 if (!mask)
1451                         mask = default_mask;
1452                 udp.val.dst_port = spec->hdr.dst_port;
1453                 udp.val.src_port = spec->hdr.src_port;
1454                 udp.mask.dst_port = mask->hdr.dst_port;
1455                 udp.mask.src_port = mask->hdr.src_port;
1456                 /* Remove unwanted bits from values. */
1457                 udp.val.src_port &= udp.mask.src_port;
1458                 udp.val.dst_port &= udp.mask.dst_port;
1459         }
1460         mlx5_flow_create_copy(parser, &udp, udp_size);
1461         return 0;
1462 }
1463
1464 /**
1465  * Convert TCP item to Verbs specification.
1466  *
1467  * @param item[in]
1468  *   Item specification.
1469  * @param default_mask[in]
1470  *   Default bit-masks to use when item->mask is not provided.
1471  * @param data[in, out]
1472  *   User structure.
1473  */
1474 static int
1475 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1476                      const void *default_mask,
1477                      void *data)
1478 {
1479         const struct rte_flow_item_tcp *spec = item->spec;
1480         const struct rte_flow_item_tcp *mask = item->mask;
1481         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1482         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1483         struct ibv_flow_spec_tcp_udp tcp = {
1484                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1485                 .size = tcp_size,
1486         };
1487
1488         /* Don't update layer for the inner pattern. */
1489         if (!parser->inner) {
1490                 if (parser->layer == HASH_RXQ_IPV4)
1491                         parser->layer = HASH_RXQ_TCPV4;
1492                 else
1493                         parser->layer = HASH_RXQ_TCPV6;
1494         }
1495         if (spec) {
1496                 if (!mask)
1497                         mask = default_mask;
1498                 tcp.val.dst_port = spec->hdr.dst_port;
1499                 tcp.val.src_port = spec->hdr.src_port;
1500                 tcp.mask.dst_port = mask->hdr.dst_port;
1501                 tcp.mask.src_port = mask->hdr.src_port;
1502                 /* Remove unwanted bits from values. */
1503                 tcp.val.src_port &= tcp.mask.src_port;
1504                 tcp.val.dst_port &= tcp.mask.dst_port;
1505         }
1506         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1507         return 0;
1508 }
1509
1510 /**
1511  * Convert VXLAN item to Verbs specification.
1512  *
1513  * @param item[in]
1514  *   Item specification.
1515  * @param default_mask[in]
1516  *   Default bit-masks to use when item->mask is not provided.
1517  * @param data[in, out]
1518  *   User structure.
1519  */
1520 static int
1521 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1522                        const void *default_mask,
1523                        void *data)
1524 {
1525         const struct rte_flow_item_vxlan *spec = item->spec;
1526         const struct rte_flow_item_vxlan *mask = item->mask;
1527         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1528         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1529         struct ibv_flow_spec_tunnel vxlan = {
1530                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1531                 .size = size,
1532         };
1533         union vni {
1534                 uint32_t vlan_id;
1535                 uint8_t vni[4];
1536         } id;
1537
1538         id.vni[0] = 0;
1539         parser->inner = IBV_FLOW_SPEC_INNER;
1540         if (spec) {
1541                 if (!mask)
1542                         mask = default_mask;
1543                 memcpy(&id.vni[1], spec->vni, 3);
1544                 vxlan.val.tunnel_id = id.vlan_id;
1545                 memcpy(&id.vni[1], mask->vni, 3);
1546                 vxlan.mask.tunnel_id = id.vlan_id;
1547                 /* Remove unwanted bits from values. */
1548                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1549         }
1550         /*
1551          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1552          * layer is defined in the Verbs specification it is interpreted as
1553          * wildcard and all packets will match this rule, if it follows a full
1554          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1555          * before will also match this rule.
1556          * To avoid such situation, VNI 0 is currently refused.
1557          */
1558         if (!vxlan.val.tunnel_id)
1559                 return EINVAL;
1560         mlx5_flow_create_copy(parser, &vxlan, size);
1561         return 0;
1562 }
1563
1564 /**
1565  * Convert mark/flag action to Verbs specification.
1566  *
1567  * @param parser
1568  *   Internal parser structure.
1569  * @param mark_id
1570  *   Mark identifier.
1571  */
1572 static int
1573 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1574 {
1575         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1576         struct ibv_flow_spec_action_tag tag = {
1577                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1578                 .size = size,
1579                 .tag_id = mlx5_flow_mark_set(mark_id),
1580         };
1581
1582         assert(parser->mark);
1583         mlx5_flow_create_copy(parser, &tag, size);
1584         return 0;
1585 }
1586
1587 /**
1588  * Convert count action to Verbs specification.
1589  *
1590  * @param priv
1591  *   Pointer to private structure.
1592  * @param parser
1593  *   Pointer to MLX5 flow parser structure.
1594  *
1595  * @return
1596  *   0 on success, errno value on failure.
1597  */
1598 static int
1599 mlx5_flow_create_count(struct priv *priv __rte_unused,
1600                        struct mlx5_flow_parse *parser __rte_unused)
1601 {
1602 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1603         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1604         struct ibv_counter_set_init_attr init_attr = {0};
1605         struct ibv_flow_spec_counter_action counter = {
1606                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1607                 .size = size,
1608                 .counter_set_handle = 0,
1609         };
1610
1611         init_attr.counter_set_id = 0;
1612         parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1613         if (!parser->cs)
1614                 return EINVAL;
1615         counter.counter_set_handle = parser->cs->handle;
1616         mlx5_flow_create_copy(parser, &counter, size);
1617 #endif
1618         return 0;
1619 }
1620
1621 /**
1622  * Complete flow rule creation with a drop queue.
1623  *
1624  * @param priv
1625  *   Pointer to private structure.
1626  * @param parser
1627  *   Internal parser structure.
1628  * @param flow
1629  *   Pointer to the rte_flow.
1630  * @param[out] error
1631  *   Perform verbose error reporting if not NULL.
1632  *
1633  * @return
1634  *   0 on success, errno value on failure.
1635  */
1636 static int
1637 priv_flow_create_action_queue_drop(struct priv *priv,
1638                                    struct mlx5_flow_parse *parser,
1639                                    struct rte_flow *flow,
1640                                    struct rte_flow_error *error)
1641 {
1642         struct ibv_flow_spec_action_drop *drop;
1643         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1644         int err = 0;
1645
1646         assert(priv->pd);
1647         assert(priv->ctx);
1648         flow->drop = 1;
1649         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1650                         parser->queue[HASH_RXQ_ETH].offset);
1651         *drop = (struct ibv_flow_spec_action_drop){
1652                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1653                         .size = size,
1654         };
1655         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1656         parser->queue[HASH_RXQ_ETH].offset += size;
1657         flow->frxq[HASH_RXQ_ETH].ibv_attr =
1658                 parser->queue[HASH_RXQ_ETH].ibv_attr;
1659         if (parser->count)
1660                 flow->cs = parser->cs;
1661         if (!priv->dev->data->dev_started)
1662                 return 0;
1663         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1664         flow->frxq[HASH_RXQ_ETH].ibv_flow =
1665                 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1666                                        flow->frxq[HASH_RXQ_ETH].ibv_attr);
1667         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1668                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1669                                    NULL, "flow rule creation failure");
1670                 err = ENOMEM;
1671                 goto error;
1672         }
1673         return 0;
1674 error:
1675         assert(flow);
1676         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1677                 claim_zero(mlx5_glue->destroy_flow
1678                            (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1679                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1680         }
1681         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1682                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1683                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1684         }
1685         if (flow->cs) {
1686                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1687                 flow->cs = NULL;
1688                 parser->cs = NULL;
1689         }
1690         return err;
1691 }
1692
1693 /**
1694  * Create hash Rx queues when RSS is enabled.
1695  *
1696  * @param priv
1697  *   Pointer to private structure.
1698  * @param parser
1699  *   Internal parser structure.
1700  * @param flow
1701  *   Pointer to the rte_flow.
1702  * @param[out] error
1703  *   Perform verbose error reporting if not NULL.
1704  *
1705  * @return
1706  *   0 on success, a errno value otherwise and rte_errno is set.
1707  */
1708 static int
1709 priv_flow_create_action_queue_rss(struct priv *priv,
1710                                   struct mlx5_flow_parse *parser,
1711                                   struct rte_flow *flow,
1712                                   struct rte_flow_error *error)
1713 {
1714         unsigned int i;
1715
1716         for (i = 0; i != hash_rxq_init_n; ++i) {
1717                 uint64_t hash_fields;
1718
1719                 if (!parser->queue[i].ibv_attr)
1720                         continue;
1721                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1722                 parser->queue[i].ibv_attr = NULL;
1723                 hash_fields = hash_rxq_init[i].hash_fields;
1724                 if (!priv->dev->data->dev_started)
1725                         continue;
1726                 flow->frxq[i].hrxq =
1727                         mlx5_priv_hrxq_get(priv,
1728                                            parser->rss_conf.rss_key,
1729                                            parser->rss_conf.rss_key_len,
1730                                            hash_fields,
1731                                            parser->queues,
1732                                            parser->queues_n);
1733                 if (flow->frxq[i].hrxq)
1734                         continue;
1735                 flow->frxq[i].hrxq =
1736                         mlx5_priv_hrxq_new(priv,
1737                                            parser->rss_conf.rss_key,
1738                                            parser->rss_conf.rss_key_len,
1739                                            hash_fields,
1740                                            parser->queues,
1741                                            parser->queues_n);
1742                 if (!flow->frxq[i].hrxq) {
1743                         rte_flow_error_set(error, ENOMEM,
1744                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1745                                            NULL, "cannot create hash rxq");
1746                         return ENOMEM;
1747                 }
1748         }
1749         return 0;
1750 }
1751
1752 /**
1753  * Complete flow rule creation.
1754  *
1755  * @param priv
1756  *   Pointer to private structure.
1757  * @param parser
1758  *   Internal parser structure.
1759  * @param flow
1760  *   Pointer to the rte_flow.
1761  * @param[out] error
1762  *   Perform verbose error reporting if not NULL.
1763  *
1764  * @return
1765  *   0 on success, a errno value otherwise and rte_errno is set.
1766  */
1767 static int
1768 priv_flow_create_action_queue(struct priv *priv,
1769                               struct mlx5_flow_parse *parser,
1770                               struct rte_flow *flow,
1771                               struct rte_flow_error *error)
1772 {
1773         int err = 0;
1774         unsigned int i;
1775
1776         assert(priv->pd);
1777         assert(priv->ctx);
1778         assert(!parser->drop);
1779         err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1780         if (err)
1781                 goto error;
1782         if (parser->count)
1783                 flow->cs = parser->cs;
1784         if (!priv->dev->data->dev_started)
1785                 return 0;
1786         for (i = 0; i != hash_rxq_init_n; ++i) {
1787                 if (!flow->frxq[i].hrxq)
1788                         continue;
1789                 flow->frxq[i].ibv_flow =
1790                         mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1791                                                flow->frxq[i].ibv_attr);
1792                 if (!flow->frxq[i].ibv_flow) {
1793                         rte_flow_error_set(error, ENOMEM,
1794                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1795                                            NULL, "flow rule creation failure");
1796                         err = ENOMEM;
1797                         goto error;
1798                 }
1799                 DEBUG("%p type %d QP %p ibv_flow %p",
1800                       (void *)flow, i,
1801                       (void *)flow->frxq[i].hrxq,
1802                       (void *)flow->frxq[i].ibv_flow);
1803         }
1804         for (i = 0; i != parser->queues_n; ++i) {
1805                 struct mlx5_rxq_data *q =
1806                         (*priv->rxqs)[parser->queues[i]];
1807
1808                 q->mark |= parser->mark;
1809         }
1810         return 0;
1811 error:
1812         assert(flow);
1813         for (i = 0; i != hash_rxq_init_n; ++i) {
1814                 if (flow->frxq[i].ibv_flow) {
1815                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1816
1817                         claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1818                 }
1819                 if (flow->frxq[i].hrxq)
1820                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1821                 if (flow->frxq[i].ibv_attr)
1822                         rte_free(flow->frxq[i].ibv_attr);
1823         }
1824         if (flow->cs) {
1825                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1826                 flow->cs = NULL;
1827                 parser->cs = NULL;
1828         }
1829         return err;
1830 }
1831
1832 /**
1833  * Convert a flow.
1834  *
1835  * @param priv
1836  *   Pointer to private structure.
1837  * @param list
1838  *   Pointer to a TAILQ flow list.
1839  * @param[in] attr
1840  *   Flow rule attributes.
1841  * @param[in] pattern
1842  *   Pattern specification (list terminated by the END pattern item).
1843  * @param[in] actions
1844  *   Associated actions (list terminated by the END action).
1845  * @param[out] error
1846  *   Perform verbose error reporting if not NULL.
1847  *
1848  * @return
1849  *   A flow on success, NULL otherwise.
1850  */
1851 static struct rte_flow *
1852 priv_flow_create(struct priv *priv,
1853                  struct mlx5_flows *list,
1854                  const struct rte_flow_attr *attr,
1855                  const struct rte_flow_item items[],
1856                  const struct rte_flow_action actions[],
1857                  struct rte_flow_error *error)
1858 {
1859         struct mlx5_flow_parse parser = { .create = 1, };
1860         struct rte_flow *flow = NULL;
1861         unsigned int i;
1862         int err;
1863
1864         err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1865         if (err)
1866                 goto exit;
1867         flow = rte_calloc(__func__, 1,
1868                           sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1869                           0);
1870         if (!flow) {
1871                 rte_flow_error_set(error, ENOMEM,
1872                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1873                                    NULL,
1874                                    "cannot allocate flow memory");
1875                 return NULL;
1876         }
1877         /* Copy queues configuration. */
1878         flow->queues = (uint16_t (*)[])(flow + 1);
1879         memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1880         flow->queues_n = parser.queues_n;
1881         flow->mark = parser.mark;
1882         /* Copy RSS configuration. */
1883         flow->rss_conf = parser.rss_conf;
1884         flow->rss_conf.rss_key = flow->rss_key;
1885         memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1886         /* finalise the flow. */
1887         if (parser.drop)
1888                 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1889                                                          error);
1890         else
1891                 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1892         if (err)
1893                 goto exit;
1894         TAILQ_INSERT_TAIL(list, flow, next);
1895         DEBUG("Flow created %p", (void *)flow);
1896         return flow;
1897 exit:
1898         for (i = 0; i != hash_rxq_init_n; ++i) {
1899                 if (parser.queue[i].ibv_attr)
1900                         rte_free(parser.queue[i].ibv_attr);
1901         }
1902         rte_free(flow);
1903         return NULL;
1904 }
1905
1906 /**
1907  * Validate a flow supported by the NIC.
1908  *
1909  * @see rte_flow_validate()
1910  * @see rte_flow_ops
1911  */
1912 int
1913 mlx5_flow_validate(struct rte_eth_dev *dev,
1914                    const struct rte_flow_attr *attr,
1915                    const struct rte_flow_item items[],
1916                    const struct rte_flow_action actions[],
1917                    struct rte_flow_error *error)
1918 {
1919         struct priv *priv = dev->data->dev_private;
1920         int ret;
1921         struct mlx5_flow_parse parser = { .create = 0, };
1922
1923         priv_lock(priv);
1924         ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1925         priv_unlock(priv);
1926         return ret;
1927 }
1928
1929 /**
1930  * Create a flow.
1931  *
1932  * @see rte_flow_create()
1933  * @see rte_flow_ops
1934  */
1935 struct rte_flow *
1936 mlx5_flow_create(struct rte_eth_dev *dev,
1937                  const struct rte_flow_attr *attr,
1938                  const struct rte_flow_item items[],
1939                  const struct rte_flow_action actions[],
1940                  struct rte_flow_error *error)
1941 {
1942         struct priv *priv = dev->data->dev_private;
1943         struct rte_flow *flow;
1944
1945         priv_lock(priv);
1946         flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1947                                 error);
1948         priv_unlock(priv);
1949         return flow;
1950 }
1951
1952 /**
1953  * Destroy a flow.
1954  *
1955  * @param priv
1956  *   Pointer to private structure.
1957  * @param list
1958  *   Pointer to a TAILQ flow list.
1959  * @param[in] flow
1960  *   Flow to destroy.
1961  */
1962 static void
1963 priv_flow_destroy(struct priv *priv,
1964                   struct mlx5_flows *list,
1965                   struct rte_flow *flow)
1966 {
1967         unsigned int i;
1968
1969         if (flow->drop || !flow->mark)
1970                 goto free;
1971         for (i = 0; i != flow->queues_n; ++i) {
1972                 struct rte_flow *tmp;
1973                 int mark = 0;
1974
1975                 /*
1976                  * To remove the mark from the queue, the queue must not be
1977                  * present in any other marked flow (RSS or not).
1978                  */
1979                 TAILQ_FOREACH(tmp, list, next) {
1980                         unsigned int j;
1981                         uint16_t *tqs = NULL;
1982                         uint16_t tq_n = 0;
1983
1984                         if (!tmp->mark)
1985                                 continue;
1986                         for (j = 0; j != hash_rxq_init_n; ++j) {
1987                                 if (!tmp->frxq[j].hrxq)
1988                                         continue;
1989                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
1990                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1991                         }
1992                         if (!tq_n)
1993                                 continue;
1994                         for (j = 0; (j != tq_n) && !mark; j++)
1995                                 if (tqs[j] == (*flow->queues)[i])
1996                                         mark = 1;
1997                 }
1998                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
1999         }
2000 free:
2001         if (flow->drop) {
2002                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2003                         claim_zero(mlx5_glue->destroy_flow
2004                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2005                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2006         } else {
2007                 for (i = 0; i != hash_rxq_init_n; ++i) {
2008                         struct mlx5_flow *frxq = &flow->frxq[i];
2009
2010                         if (frxq->ibv_flow)
2011                                 claim_zero(mlx5_glue->destroy_flow
2012                                            (frxq->ibv_flow));
2013                         if (frxq->hrxq)
2014                                 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2015                         if (frxq->ibv_attr)
2016                                 rte_free(frxq->ibv_attr);
2017                 }
2018         }
2019         if (flow->cs) {
2020                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2021                 flow->cs = NULL;
2022         }
2023         TAILQ_REMOVE(list, flow, next);
2024         DEBUG("Flow destroyed %p", (void *)flow);
2025         rte_free(flow);
2026 }
2027
2028 /**
2029  * Destroy all flows.
2030  *
2031  * @param priv
2032  *   Pointer to private structure.
2033  * @param list
2034  *   Pointer to a TAILQ flow list.
2035  */
2036 void
2037 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2038 {
2039         while (!TAILQ_EMPTY(list)) {
2040                 struct rte_flow *flow;
2041
2042                 flow = TAILQ_FIRST(list);
2043                 priv_flow_destroy(priv, list, flow);
2044         }
2045 }
2046
2047 /**
2048  * Create drop queue.
2049  *
2050  * @param priv
2051  *   Pointer to private structure.
2052  *
2053  * @return
2054  *   0 on success.
2055  */
2056 int
2057 priv_flow_create_drop_queue(struct priv *priv)
2058 {
2059         struct mlx5_hrxq_drop *fdq = NULL;
2060
2061         assert(priv->pd);
2062         assert(priv->ctx);
2063         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2064         if (!fdq) {
2065                 WARN("cannot allocate memory for drop queue");
2066                 goto error;
2067         }
2068         fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2069         if (!fdq->cq) {
2070                 WARN("cannot allocate CQ for drop queue");
2071                 goto error;
2072         }
2073         fdq->wq = mlx5_glue->create_wq
2074                 (priv->ctx,
2075                  &(struct ibv_wq_init_attr){
2076                         .wq_type = IBV_WQT_RQ,
2077                         .max_wr = 1,
2078                         .max_sge = 1,
2079                         .pd = priv->pd,
2080                         .cq = fdq->cq,
2081                  });
2082         if (!fdq->wq) {
2083                 WARN("cannot allocate WQ for drop queue");
2084                 goto error;
2085         }
2086         fdq->ind_table = mlx5_glue->create_rwq_ind_table
2087                 (priv->ctx,
2088                  &(struct ibv_rwq_ind_table_init_attr){
2089                         .log_ind_tbl_size = 0,
2090                         .ind_tbl = &fdq->wq,
2091                         .comp_mask = 0,
2092                  });
2093         if (!fdq->ind_table) {
2094                 WARN("cannot allocate indirection table for drop queue");
2095                 goto error;
2096         }
2097         fdq->qp = mlx5_glue->create_qp_ex
2098                 (priv->ctx,
2099                  &(struct ibv_qp_init_attr_ex){
2100                         .qp_type = IBV_QPT_RAW_PACKET,
2101                         .comp_mask =
2102                                 IBV_QP_INIT_ATTR_PD |
2103                                 IBV_QP_INIT_ATTR_IND_TABLE |
2104                                 IBV_QP_INIT_ATTR_RX_HASH,
2105                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2106                                 .rx_hash_function =
2107                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2108                                 .rx_hash_key_len = rss_hash_default_key_len,
2109                                 .rx_hash_key = rss_hash_default_key,
2110                                 .rx_hash_fields_mask = 0,
2111                                 },
2112                         .rwq_ind_tbl = fdq->ind_table,
2113                         .pd = priv->pd
2114                  });
2115         if (!fdq->qp) {
2116                 WARN("cannot allocate QP for drop queue");
2117                 goto error;
2118         }
2119         priv->flow_drop_queue = fdq;
2120         return 0;
2121 error:
2122         if (fdq->qp)
2123                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2124         if (fdq->ind_table)
2125                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2126         if (fdq->wq)
2127                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2128         if (fdq->cq)
2129                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2130         if (fdq)
2131                 rte_free(fdq);
2132         priv->flow_drop_queue = NULL;
2133         return -1;
2134 }
2135
2136 /**
2137  * Delete drop queue.
2138  *
2139  * @param priv
2140  *   Pointer to private structure.
2141  */
2142 void
2143 priv_flow_delete_drop_queue(struct priv *priv)
2144 {
2145         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2146
2147         if (!fdq)
2148                 return;
2149         if (fdq->qp)
2150                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2151         if (fdq->ind_table)
2152                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2153         if (fdq->wq)
2154                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2155         if (fdq->cq)
2156                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2157         rte_free(fdq);
2158         priv->flow_drop_queue = NULL;
2159 }
2160
2161 /**
2162  * Remove all flows.
2163  *
2164  * @param priv
2165  *   Pointer to private structure.
2166  * @param list
2167  *   Pointer to a TAILQ flow list.
2168  */
2169 void
2170 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2171 {
2172         struct rte_flow *flow;
2173
2174         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2175                 unsigned int i;
2176                 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2177
2178                 if (flow->drop) {
2179                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2180                                 continue;
2181                         claim_zero(mlx5_glue->destroy_flow
2182                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2183                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2184                         DEBUG("Flow %p removed", (void *)flow);
2185                         /* Next flow. */
2186                         continue;
2187                 }
2188                 /* Verify the flow has not already been cleaned. */
2189                 for (i = 0; i != hash_rxq_init_n; ++i) {
2190                         if (!flow->frxq[i].ibv_flow)
2191                                 continue;
2192                         /*
2193                          * Indirection table may be necessary to remove the
2194                          * flags in the Rx queues.
2195                          * This helps to speed-up the process by avoiding
2196                          * another loop.
2197                          */
2198                         ind_tbl = flow->frxq[i].hrxq->ind_table;
2199                         break;
2200                 }
2201                 if (i == hash_rxq_init_n)
2202                         return;
2203                 if (flow->mark) {
2204                         assert(ind_tbl);
2205                         for (i = 0; i != ind_tbl->queues_n; ++i)
2206                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2207                 }
2208                 for (i = 0; i != hash_rxq_init_n; ++i) {
2209                         if (!flow->frxq[i].ibv_flow)
2210                                 continue;
2211                         claim_zero(mlx5_glue->destroy_flow
2212                                    (flow->frxq[i].ibv_flow));
2213                         flow->frxq[i].ibv_flow = NULL;
2214                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2215                         flow->frxq[i].hrxq = NULL;
2216                 }
2217                 DEBUG("Flow %p removed", (void *)flow);
2218         }
2219 }
2220
2221 /**
2222  * Add all flows.
2223  *
2224  * @param priv
2225  *   Pointer to private structure.
2226  * @param list
2227  *   Pointer to a TAILQ flow list.
2228  *
2229  * @return
2230  *   0 on success, a errno value otherwise and rte_errno is set.
2231  */
2232 int
2233 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2234 {
2235         struct rte_flow *flow;
2236
2237         TAILQ_FOREACH(flow, list, next) {
2238                 unsigned int i;
2239
2240                 if (flow->drop) {
2241                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2242                                 mlx5_glue->create_flow
2243                                 (priv->flow_drop_queue->qp,
2244                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2245                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2246                                 DEBUG("Flow %p cannot be applied",
2247                                       (void *)flow);
2248                                 rte_errno = EINVAL;
2249                                 return rte_errno;
2250                         }
2251                         DEBUG("Flow %p applied", (void *)flow);
2252                         /* Next flow. */
2253                         continue;
2254                 }
2255                 for (i = 0; i != hash_rxq_init_n; ++i) {
2256                         if (!flow->frxq[i].ibv_attr)
2257                                 continue;
2258                         flow->frxq[i].hrxq =
2259                                 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2260                                                    flow->rss_conf.rss_key_len,
2261                                                    hash_rxq_init[i].hash_fields,
2262                                                    (*flow->queues),
2263                                                    flow->queues_n);
2264                         if (flow->frxq[i].hrxq)
2265                                 goto flow_create;
2266                         flow->frxq[i].hrxq =
2267                                 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2268                                                    flow->rss_conf.rss_key_len,
2269                                                    hash_rxq_init[i].hash_fields,
2270                                                    (*flow->queues),
2271                                                    flow->queues_n);
2272                         if (!flow->frxq[i].hrxq) {
2273                                 DEBUG("Flow %p cannot be applied",
2274                                       (void *)flow);
2275                                 rte_errno = EINVAL;
2276                                 return rte_errno;
2277                         }
2278 flow_create:
2279                         flow->frxq[i].ibv_flow =
2280                                 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2281                                                        flow->frxq[i].ibv_attr);
2282                         if (!flow->frxq[i].ibv_flow) {
2283                                 DEBUG("Flow %p cannot be applied",
2284                                       (void *)flow);
2285                                 rte_errno = EINVAL;
2286                                 return rte_errno;
2287                         }
2288                         DEBUG("Flow %p applied", (void *)flow);
2289                 }
2290                 if (!flow->mark)
2291                         continue;
2292                 for (i = 0; i != flow->queues_n; ++i)
2293                         (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2294         }
2295         return 0;
2296 }
2297
2298 /**
2299  * Verify the flow list is empty
2300  *
2301  * @param priv
2302  *  Pointer to private structure.
2303  *
2304  * @return the number of flows not released.
2305  */
2306 int
2307 priv_flow_verify(struct priv *priv)
2308 {
2309         struct rte_flow *flow;
2310         int ret = 0;
2311
2312         TAILQ_FOREACH(flow, &priv->flows, next) {
2313                 DEBUG("%p: flow %p still referenced", (void *)priv,
2314                       (void *)flow);
2315                 ++ret;
2316         }
2317         return ret;
2318 }
2319
2320 /**
2321  * Enable a control flow configured from the control plane.
2322  *
2323  * @param dev
2324  *   Pointer to Ethernet device.
2325  * @param eth_spec
2326  *   An Ethernet flow spec to apply.
2327  * @param eth_mask
2328  *   An Ethernet flow mask to apply.
2329  * @param vlan_spec
2330  *   A VLAN flow spec to apply.
2331  * @param vlan_mask
2332  *   A VLAN flow mask to apply.
2333  *
2334  * @return
2335  *   0 on success.
2336  */
2337 int
2338 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2339                     struct rte_flow_item_eth *eth_spec,
2340                     struct rte_flow_item_eth *eth_mask,
2341                     struct rte_flow_item_vlan *vlan_spec,
2342                     struct rte_flow_item_vlan *vlan_mask)
2343 {
2344         struct priv *priv = dev->data->dev_private;
2345         const struct rte_flow_attr attr = {
2346                 .ingress = 1,
2347                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2348         };
2349         struct rte_flow_item items[] = {
2350                 {
2351                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2352                         .spec = eth_spec,
2353                         .last = NULL,
2354                         .mask = eth_mask,
2355                 },
2356                 {
2357                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2358                                 RTE_FLOW_ITEM_TYPE_END,
2359                         .spec = vlan_spec,
2360                         .last = NULL,
2361                         .mask = vlan_mask,
2362                 },
2363                 {
2364                         .type = RTE_FLOW_ITEM_TYPE_END,
2365                 },
2366         };
2367         struct rte_flow_action actions[] = {
2368                 {
2369                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2370                 },
2371                 {
2372                         .type = RTE_FLOW_ACTION_TYPE_END,
2373                 },
2374         };
2375         struct rte_flow *flow;
2376         struct rte_flow_error error;
2377         unsigned int i;
2378         union {
2379                 struct rte_flow_action_rss rss;
2380                 struct {
2381                         const struct rte_eth_rss_conf *rss_conf;
2382                         uint16_t num;
2383                         uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2384                 } local;
2385         } action_rss;
2386
2387         if (!priv->reta_idx_n)
2388                 return EINVAL;
2389         for (i = 0; i != priv->reta_idx_n; ++i)
2390                 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2391         action_rss.local.rss_conf = &priv->rss_conf;
2392         action_rss.local.num = priv->reta_idx_n;
2393         actions[0].conf = (const void *)&action_rss.rss;
2394         flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2395                                 &error);
2396         if (!flow)
2397                 return rte_errno;
2398         return 0;
2399 }
2400
2401 /**
2402  * Enable a flow control configured from the control plane.
2403  *
2404  * @param dev
2405  *   Pointer to Ethernet device.
2406  * @param eth_spec
2407  *   An Ethernet flow spec to apply.
2408  * @param eth_mask
2409  *   An Ethernet flow mask to apply.
2410  *
2411  * @return
2412  *   0 on success.
2413  */
2414 int
2415 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2416                struct rte_flow_item_eth *eth_spec,
2417                struct rte_flow_item_eth *eth_mask)
2418 {
2419         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2420 }
2421
2422 /**
2423  * Destroy a flow.
2424  *
2425  * @see rte_flow_destroy()
2426  * @see rte_flow_ops
2427  */
2428 int
2429 mlx5_flow_destroy(struct rte_eth_dev *dev,
2430                   struct rte_flow *flow,
2431                   struct rte_flow_error *error)
2432 {
2433         struct priv *priv = dev->data->dev_private;
2434
2435         (void)error;
2436         priv_lock(priv);
2437         priv_flow_destroy(priv, &priv->flows, flow);
2438         priv_unlock(priv);
2439         return 0;
2440 }
2441
2442 /**
2443  * Destroy all flows.
2444  *
2445  * @see rte_flow_flush()
2446  * @see rte_flow_ops
2447  */
2448 int
2449 mlx5_flow_flush(struct rte_eth_dev *dev,
2450                 struct rte_flow_error *error)
2451 {
2452         struct priv *priv = dev->data->dev_private;
2453
2454         (void)error;
2455         priv_lock(priv);
2456         priv_flow_flush(priv, &priv->flows);
2457         priv_unlock(priv);
2458         return 0;
2459 }
2460
2461 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2462 /**
2463  * Query flow counter.
2464  *
2465  * @param cs
2466  *   the counter set.
2467  * @param counter_value
2468  *   returned data from the counter.
2469  *
2470  * @return
2471  *   0 on success, a errno value otherwise and rte_errno is set.
2472  */
2473 static int
2474 priv_flow_query_count(struct ibv_counter_set *cs,
2475                       struct mlx5_flow_counter_stats *counter_stats,
2476                       struct rte_flow_query_count *query_count,
2477                       struct rte_flow_error *error)
2478 {
2479         uint64_t counters[2];
2480         struct ibv_query_counter_set_attr query_cs_attr = {
2481                 .cs = cs,
2482                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2483         };
2484         struct ibv_counter_set_data query_out = {
2485                 .out = counters,
2486                 .outlen = 2 * sizeof(uint64_t),
2487         };
2488         int res = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2489
2490         if (res) {
2491                 rte_flow_error_set(error, -res,
2492                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2493                                    NULL,
2494                                    "cannot read counter");
2495                 return -res;
2496         }
2497         query_count->hits_set = 1;
2498         query_count->bytes_set = 1;
2499         query_count->hits = counters[0] - counter_stats->hits;
2500         query_count->bytes = counters[1] - counter_stats->bytes;
2501         if (query_count->reset) {
2502                 counter_stats->hits = counters[0];
2503                 counter_stats->bytes = counters[1];
2504         }
2505         return 0;
2506 }
2507
2508 /**
2509  * Query a flows.
2510  *
2511  * @see rte_flow_query()
2512  * @see rte_flow_ops
2513  */
2514 int
2515 mlx5_flow_query(struct rte_eth_dev *dev,
2516                 struct rte_flow *flow,
2517                 enum rte_flow_action_type action __rte_unused,
2518                 void *data,
2519                 struct rte_flow_error *error)
2520 {
2521         struct priv *priv = dev->data->dev_private;
2522         int res = EINVAL;
2523
2524         priv_lock(priv);
2525         if (flow->cs) {
2526                 res = priv_flow_query_count(flow->cs,
2527                                         &flow->counter_stats,
2528                                         (struct rte_flow_query_count *)data,
2529                                         error);
2530         } else {
2531                 rte_flow_error_set(error, res,
2532                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2533                                    NULL,
2534                                    "no counter found for flow");
2535         }
2536         priv_unlock(priv);
2537         return -res;
2538 }
2539 #endif
2540
2541 /**
2542  * Isolated mode.
2543  *
2544  * @see rte_flow_isolate()
2545  * @see rte_flow_ops
2546  */
2547 int
2548 mlx5_flow_isolate(struct rte_eth_dev *dev,
2549                   int enable,
2550                   struct rte_flow_error *error)
2551 {
2552         struct priv *priv = dev->data->dev_private;
2553
2554         priv_lock(priv);
2555         if (dev->data->dev_started) {
2556                 rte_flow_error_set(error, EBUSY,
2557                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2558                                    NULL,
2559                                    "port must be stopped first");
2560                 priv_unlock(priv);
2561                 return -rte_errno;
2562         }
2563         priv->isolated = !!enable;
2564         if (enable)
2565                 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2566         else
2567                 priv->dev->dev_ops = &mlx5_dev_ops;
2568         priv_unlock(priv);
2569         return 0;
2570 }
2571
2572 /**
2573  * Convert a flow director filter to a generic flow.
2574  *
2575  * @param priv
2576  *   Private structure.
2577  * @param fdir_filter
2578  *   Flow director filter to add.
2579  * @param attributes
2580  *   Generic flow parameters structure.
2581  *
2582  * @return
2583  *  0 on success, errno value on error.
2584  */
2585 static int
2586 priv_fdir_filter_convert(struct priv *priv,
2587                          const struct rte_eth_fdir_filter *fdir_filter,
2588                          struct mlx5_fdir *attributes)
2589 {
2590         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2591
2592         /* Validate queue number. */
2593         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2594                 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2595                 return EINVAL;
2596         }
2597         attributes->attr.ingress = 1;
2598         attributes->items[0] = (struct rte_flow_item) {
2599                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2600                 .spec = &attributes->l2,
2601                 .mask = &attributes->l2_mask,
2602         };
2603         switch (fdir_filter->action.behavior) {
2604         case RTE_ETH_FDIR_ACCEPT:
2605                 attributes->actions[0] = (struct rte_flow_action){
2606                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2607                         .conf = &attributes->queue,
2608                 };
2609                 break;
2610         case RTE_ETH_FDIR_REJECT:
2611                 attributes->actions[0] = (struct rte_flow_action){
2612                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2613                 };
2614                 break;
2615         default:
2616                 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2617                 return ENOTSUP;
2618         }
2619         attributes->queue.index = fdir_filter->action.rx_queue;
2620         switch (fdir_filter->input.flow_type) {
2621         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2622                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2623                         .src_addr = input->flow.udp4_flow.ip.src_ip,
2624                         .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2625                         .time_to_live = input->flow.udp4_flow.ip.ttl,
2626                         .type_of_service = input->flow.udp4_flow.ip.tos,
2627                         .next_proto_id = input->flow.udp4_flow.ip.proto,
2628                 };
2629                 attributes->l4.udp.hdr = (struct udp_hdr){
2630                         .src_port = input->flow.udp4_flow.src_port,
2631                         .dst_port = input->flow.udp4_flow.dst_port,
2632                 };
2633                 attributes->items[1] = (struct rte_flow_item){
2634                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2635                         .spec = &attributes->l3,
2636                         .mask = &attributes->l3,
2637                 };
2638                 attributes->items[2] = (struct rte_flow_item){
2639                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2640                         .spec = &attributes->l4,
2641                         .mask = &attributes->l4,
2642                 };
2643                 break;
2644         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2645                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2646                         .src_addr = input->flow.tcp4_flow.ip.src_ip,
2647                         .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2648                         .time_to_live = input->flow.tcp4_flow.ip.ttl,
2649                         .type_of_service = input->flow.tcp4_flow.ip.tos,
2650                         .next_proto_id = input->flow.tcp4_flow.ip.proto,
2651                 };
2652                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2653                         .src_port = input->flow.tcp4_flow.src_port,
2654                         .dst_port = input->flow.tcp4_flow.dst_port,
2655                 };
2656                 attributes->items[1] = (struct rte_flow_item){
2657                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2658                         .spec = &attributes->l3,
2659                         .mask = &attributes->l3,
2660                 };
2661                 attributes->items[2] = (struct rte_flow_item){
2662                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2663                         .spec = &attributes->l4,
2664                         .mask = &attributes->l4,
2665                 };
2666                 break;
2667         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2668                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2669                         .src_addr = input->flow.ip4_flow.src_ip,
2670                         .dst_addr = input->flow.ip4_flow.dst_ip,
2671                         .time_to_live = input->flow.ip4_flow.ttl,
2672                         .type_of_service = input->flow.ip4_flow.tos,
2673                         .next_proto_id = input->flow.ip4_flow.proto,
2674                 };
2675                 attributes->items[1] = (struct rte_flow_item){
2676                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2677                         .spec = &attributes->l3,
2678                         .mask = &attributes->l3,
2679                 };
2680                 break;
2681         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2682                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2683                         .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2684                         .proto = input->flow.udp6_flow.ip.proto,
2685                 };
2686                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2687                        input->flow.udp6_flow.ip.src_ip,
2688                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2689                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2690                        input->flow.udp6_flow.ip.dst_ip,
2691                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2692                 attributes->l4.udp.hdr = (struct udp_hdr){
2693                         .src_port = input->flow.udp6_flow.src_port,
2694                         .dst_port = input->flow.udp6_flow.dst_port,
2695                 };
2696                 attributes->items[1] = (struct rte_flow_item){
2697                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2698                         .spec = &attributes->l3,
2699                         .mask = &attributes->l3,
2700                 };
2701                 attributes->items[2] = (struct rte_flow_item){
2702                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2703                         .spec = &attributes->l4,
2704                         .mask = &attributes->l4,
2705                 };
2706                 break;
2707         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2708                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2709                         .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2710                         .proto = input->flow.tcp6_flow.ip.proto,
2711                 };
2712                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2713                        input->flow.tcp6_flow.ip.src_ip,
2714                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2715                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2716                        input->flow.tcp6_flow.ip.dst_ip,
2717                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2718                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2719                         .src_port = input->flow.tcp6_flow.src_port,
2720                         .dst_port = input->flow.tcp6_flow.dst_port,
2721                 };
2722                 attributes->items[1] = (struct rte_flow_item){
2723                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2724                         .spec = &attributes->l3,
2725                         .mask = &attributes->l3,
2726                 };
2727                 attributes->items[2] = (struct rte_flow_item){
2728                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2729                         .spec = &attributes->l4,
2730                         .mask = &attributes->l4,
2731                 };
2732                 break;
2733         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2734                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2735                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2736                         .proto = input->flow.ipv6_flow.proto,
2737                 };
2738                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2739                        input->flow.ipv6_flow.src_ip,
2740                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2741                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2742                        input->flow.ipv6_flow.dst_ip,
2743                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2744                 attributes->items[1] = (struct rte_flow_item){
2745                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2746                         .spec = &attributes->l3,
2747                         .mask = &attributes->l3,
2748                 };
2749                 break;
2750         default:
2751                 ERROR("invalid flow type%d",
2752                       fdir_filter->input.flow_type);
2753                 return ENOTSUP;
2754         }
2755         return 0;
2756 }
2757
2758 /**
2759  * Add new flow director filter and store it in list.
2760  *
2761  * @param priv
2762  *   Private structure.
2763  * @param fdir_filter
2764  *   Flow director filter to add.
2765  *
2766  * @return
2767  *   0 on success, errno value on failure.
2768  */
2769 static int
2770 priv_fdir_filter_add(struct priv *priv,
2771                      const struct rte_eth_fdir_filter *fdir_filter)
2772 {
2773         struct mlx5_fdir attributes = {
2774                 .attr.group = 0,
2775                 .l2_mask = {
2776                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2777                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2778                         .type = 0,
2779                 },
2780         };
2781         struct mlx5_flow_parse parser = {
2782                 .layer = HASH_RXQ_ETH,
2783         };
2784         struct rte_flow_error error;
2785         struct rte_flow *flow;
2786         int ret;
2787
2788         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2789         if (ret)
2790                 return -ret;
2791         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2792                                 attributes.actions, &error, &parser);
2793         if (ret)
2794                 return -ret;
2795         flow = priv_flow_create(priv,
2796                                 &priv->flows,
2797                                 &attributes.attr,
2798                                 attributes.items,
2799                                 attributes.actions,
2800                                 &error);
2801         if (flow) {
2802                 DEBUG("FDIR created %p", (void *)flow);
2803                 return 0;
2804         }
2805         return ENOTSUP;
2806 }
2807
2808 /**
2809  * Delete specific filter.
2810  *
2811  * @param priv
2812  *   Private structure.
2813  * @param fdir_filter
2814  *   Filter to be deleted.
2815  *
2816  * @return
2817  *   0 on success, errno value on failure.
2818  */
2819 static int
2820 priv_fdir_filter_delete(struct priv *priv,
2821                         const struct rte_eth_fdir_filter *fdir_filter)
2822 {
2823         struct mlx5_fdir attributes = {
2824                 .attr.group = 0,
2825         };
2826         struct mlx5_flow_parse parser = {
2827                 .create = 1,
2828                 .layer = HASH_RXQ_ETH,
2829         };
2830         struct rte_flow_error error;
2831         struct rte_flow *flow;
2832         unsigned int i;
2833         int ret;
2834
2835         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2836         if (ret)
2837                 return -ret;
2838         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2839                                 attributes.actions, &error, &parser);
2840         if (ret)
2841                 goto exit;
2842         /*
2843          * Special case for drop action which is only set in the
2844          * specifications when the flow is created.  In this situation the
2845          * drop specification is missing.
2846          */
2847         if (parser.drop) {
2848                 struct ibv_flow_spec_action_drop *drop;
2849
2850                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2851                                 parser.queue[HASH_RXQ_ETH].offset);
2852                 *drop = (struct ibv_flow_spec_action_drop){
2853                         .type = IBV_FLOW_SPEC_ACTION_DROP,
2854                         .size = sizeof(struct ibv_flow_spec_action_drop),
2855                 };
2856                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2857         }
2858         TAILQ_FOREACH(flow, &priv->flows, next) {
2859                 struct ibv_flow_attr *attr;
2860                 struct ibv_spec_header *attr_h;
2861                 void *spec;
2862                 struct ibv_flow_attr *flow_attr;
2863                 struct ibv_spec_header *flow_h;
2864                 void *flow_spec;
2865                 unsigned int specs_n;
2866
2867                 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2868                 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2869                 /* Compare first the attributes. */
2870                 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2871                         continue;
2872                 if (attr->num_of_specs == 0)
2873                         continue;
2874                 spec = (void *)((uintptr_t)attr +
2875                                 sizeof(struct ibv_flow_attr));
2876                 flow_spec = (void *)((uintptr_t)flow_attr +
2877                                      sizeof(struct ibv_flow_attr));
2878                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2879                 for (i = 0; i != specs_n; ++i) {
2880                         attr_h = spec;
2881                         flow_h = flow_spec;
2882                         if (memcmp(spec, flow_spec,
2883                                    RTE_MIN(attr_h->size, flow_h->size)))
2884                                 goto wrong_flow;
2885                         spec = (void *)((uintptr_t)spec + attr_h->size);
2886                         flow_spec = (void *)((uintptr_t)flow_spec +
2887                                              flow_h->size);
2888                 }
2889                 /* At this point, the flow match. */
2890                 break;
2891 wrong_flow:
2892                 /* The flow does not match. */
2893                 continue;
2894         }
2895         if (flow)
2896                 priv_flow_destroy(priv, &priv->flows, flow);
2897 exit:
2898         for (i = 0; i != hash_rxq_init_n; ++i) {
2899                 if (parser.queue[i].ibv_attr)
2900                         rte_free(parser.queue[i].ibv_attr);
2901         }
2902         return -ret;
2903 }
2904
2905 /**
2906  * Update queue for specific filter.
2907  *
2908  * @param priv
2909  *   Private structure.
2910  * @param fdir_filter
2911  *   Filter to be updated.
2912  *
2913  * @return
2914  *   0 on success, errno value on failure.
2915  */
2916 static int
2917 priv_fdir_filter_update(struct priv *priv,
2918                         const struct rte_eth_fdir_filter *fdir_filter)
2919 {
2920         int ret;
2921
2922         ret = priv_fdir_filter_delete(priv, fdir_filter);
2923         if (ret)
2924                 return ret;
2925         ret = priv_fdir_filter_add(priv, fdir_filter);
2926         return ret;
2927 }
2928
2929 /**
2930  * Flush all filters.
2931  *
2932  * @param priv
2933  *   Private structure.
2934  */
2935 static void
2936 priv_fdir_filter_flush(struct priv *priv)
2937 {
2938         priv_flow_flush(priv, &priv->flows);
2939 }
2940
2941 /**
2942  * Get flow director information.
2943  *
2944  * @param priv
2945  *   Private structure.
2946  * @param[out] fdir_info
2947  *   Resulting flow director information.
2948  */
2949 static void
2950 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2951 {
2952         struct rte_eth_fdir_masks *mask =
2953                 &priv->dev->data->dev_conf.fdir_conf.mask;
2954
2955         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2956         fdir_info->guarant_spc = 0;
2957         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2958         fdir_info->max_flexpayload = 0;
2959         fdir_info->flow_types_mask[0] = 0;
2960         fdir_info->flex_payload_unit = 0;
2961         fdir_info->max_flex_payload_segment_num = 0;
2962         fdir_info->flex_payload_limit = 0;
2963         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2964 }
2965
2966 /**
2967  * Deal with flow director operations.
2968  *
2969  * @param priv
2970  *   Pointer to private structure.
2971  * @param filter_op
2972  *   Operation to perform.
2973  * @param arg
2974  *   Pointer to operation-specific structure.
2975  *
2976  * @return
2977  *   0 on success, errno value on failure.
2978  */
2979 static int
2980 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2981 {
2982         enum rte_fdir_mode fdir_mode =
2983                 priv->dev->data->dev_conf.fdir_conf.mode;
2984         int ret = 0;
2985
2986         if (filter_op == RTE_ETH_FILTER_NOP)
2987                 return 0;
2988         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2989             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2990                 ERROR("%p: flow director mode %d not supported",
2991                       (void *)priv, fdir_mode);
2992                 return EINVAL;
2993         }
2994         switch (filter_op) {
2995         case RTE_ETH_FILTER_ADD:
2996                 ret = priv_fdir_filter_add(priv, arg);
2997                 break;
2998         case RTE_ETH_FILTER_UPDATE:
2999                 ret = priv_fdir_filter_update(priv, arg);
3000                 break;
3001         case RTE_ETH_FILTER_DELETE:
3002                 ret = priv_fdir_filter_delete(priv, arg);
3003                 break;
3004         case RTE_ETH_FILTER_FLUSH:
3005                 priv_fdir_filter_flush(priv);
3006                 break;
3007         case RTE_ETH_FILTER_INFO:
3008                 priv_fdir_info_get(priv, arg);
3009                 break;
3010         default:
3011                 DEBUG("%p: unknown operation %u", (void *)priv,
3012                       filter_op);
3013                 ret = EINVAL;
3014                 break;
3015         }
3016         return ret;
3017 }
3018
3019 /**
3020  * Manage filter operations.
3021  *
3022  * @param dev
3023  *   Pointer to Ethernet device structure.
3024  * @param filter_type
3025  *   Filter type.
3026  * @param filter_op
3027  *   Operation to perform.
3028  * @param arg
3029  *   Pointer to operation-specific structure.
3030  *
3031  * @return
3032  *   0 on success, negative errno value on failure.
3033  */
3034 int
3035 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3036                      enum rte_filter_type filter_type,
3037                      enum rte_filter_op filter_op,
3038                      void *arg)
3039 {
3040         int ret = EINVAL;
3041         struct priv *priv = dev->data->dev_private;
3042
3043         switch (filter_type) {
3044         case RTE_ETH_FILTER_GENERIC:
3045                 if (filter_op != RTE_ETH_FILTER_GET)
3046                         return -EINVAL;
3047                 *(const void **)arg = &mlx5_flow_ops;
3048                 return 0;
3049         case RTE_ETH_FILTER_FDIR:
3050                 priv_lock(priv);
3051                 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3052                 priv_unlock(priv);
3053                 break;
3054         default:
3055                 ERROR("%p: filter type (%d) not supported",
3056                       (void *)dev, filter_type);
3057                 break;
3058         }
3059         return -ret;
3060 }