net/mlx5: prefix all functions with mlx5
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox.
4  */
5
6 #include <sys/queue.h>
7 #include <string.h>
8
9 /* Verbs header. */
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
11 #ifdef PEDANTIC
12 #pragma GCC diagnostic ignored "-Wpedantic"
13 #endif
14 #include <infiniband/verbs.h>
15 #ifdef PEDANTIC
16 #pragma GCC diagnostic error "-Wpedantic"
17 #endif
18
19 #include <rte_ethdev_driver.h>
20 #include <rte_flow.h>
21 #include <rte_flow_driver.h>
22 #include <rte_malloc.h>
23 #include <rte_ip.h>
24
25 #include "mlx5.h"
26 #include "mlx5_defs.h"
27 #include "mlx5_prm.h"
28 #include "mlx5_glue.h"
29
30 /* Define minimal priority for control plane flows. */
31 #define MLX5_CTRL_FLOW_PRIORITY 4
32
33 /* Internet Protocol versions. */
34 #define MLX5_IPV4 4
35 #define MLX5_IPV6 6
36
37 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
38 struct ibv_flow_spec_counter_action {
39         int dummy;
40 };
41 #endif
42
43 /* Dev ops structure defined in mlx5.c */
44 extern const struct eth_dev_ops mlx5_dev_ops;
45 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
46
47 static int
48 mlx5_flow_create_eth(const struct rte_flow_item *item,
49                      const void *default_mask,
50                      void *data);
51
52 static int
53 mlx5_flow_create_vlan(const struct rte_flow_item *item,
54                       const void *default_mask,
55                       void *data);
56
57 static int
58 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
59                       const void *default_mask,
60                       void *data);
61
62 static int
63 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
64                       const void *default_mask,
65                       void *data);
66
67 static int
68 mlx5_flow_create_udp(const struct rte_flow_item *item,
69                      const void *default_mask,
70                      void *data);
71
72 static int
73 mlx5_flow_create_tcp(const struct rte_flow_item *item,
74                      const void *default_mask,
75                      void *data);
76
77 static int
78 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
79                        const void *default_mask,
80                        void *data);
81
82 struct mlx5_flow_parse;
83
84 static void
85 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
86                       unsigned int size);
87
88 static int
89 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
90
91 static int
92 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
93
94 /* Hash RX queue types. */
95 enum hash_rxq_type {
96         HASH_RXQ_TCPV4,
97         HASH_RXQ_UDPV4,
98         HASH_RXQ_IPV4,
99         HASH_RXQ_TCPV6,
100         HASH_RXQ_UDPV6,
101         HASH_RXQ_IPV6,
102         HASH_RXQ_ETH,
103 };
104
105 /* Initialization data for hash RX queue. */
106 struct hash_rxq_init {
107         uint64_t hash_fields; /* Fields that participate in the hash. */
108         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
109         unsigned int flow_priority; /* Flow priority to use. */
110         unsigned int ip_version; /* Internet protocol. */
111 };
112
113 /* Initialization data for hash RX queues. */
114 const struct hash_rxq_init hash_rxq_init[] = {
115         [HASH_RXQ_TCPV4] = {
116                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
117                                 IBV_RX_HASH_DST_IPV4 |
118                                 IBV_RX_HASH_SRC_PORT_TCP |
119                                 IBV_RX_HASH_DST_PORT_TCP),
120                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
121                 .flow_priority = 0,
122                 .ip_version = MLX5_IPV4,
123         },
124         [HASH_RXQ_UDPV4] = {
125                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
126                                 IBV_RX_HASH_DST_IPV4 |
127                                 IBV_RX_HASH_SRC_PORT_UDP |
128                                 IBV_RX_HASH_DST_PORT_UDP),
129                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
130                 .flow_priority = 0,
131                 .ip_version = MLX5_IPV4,
132         },
133         [HASH_RXQ_IPV4] = {
134                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135                                 IBV_RX_HASH_DST_IPV4),
136                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
137                                 ETH_RSS_FRAG_IPV4),
138                 .flow_priority = 1,
139                 .ip_version = MLX5_IPV4,
140         },
141         [HASH_RXQ_TCPV6] = {
142                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
143                                 IBV_RX_HASH_DST_IPV6 |
144                                 IBV_RX_HASH_SRC_PORT_TCP |
145                                 IBV_RX_HASH_DST_PORT_TCP),
146                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
147                 .flow_priority = 0,
148                 .ip_version = MLX5_IPV6,
149         },
150         [HASH_RXQ_UDPV6] = {
151                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
152                                 IBV_RX_HASH_DST_IPV6 |
153                                 IBV_RX_HASH_SRC_PORT_UDP |
154                                 IBV_RX_HASH_DST_PORT_UDP),
155                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
156                 .flow_priority = 0,
157                 .ip_version = MLX5_IPV6,
158         },
159         [HASH_RXQ_IPV6] = {
160                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161                                 IBV_RX_HASH_DST_IPV6),
162                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
163                                 ETH_RSS_FRAG_IPV6),
164                 .flow_priority = 1,
165                 .ip_version = MLX5_IPV6,
166         },
167         [HASH_RXQ_ETH] = {
168                 .hash_fields = 0,
169                 .dpdk_rss_hf = 0,
170                 .flow_priority = 2,
171         },
172 };
173
174 /* Number of entries in hash_rxq_init[]. */
175 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
176
177 /** Structure for holding counter stats. */
178 struct mlx5_flow_counter_stats {
179         uint64_t hits; /**< Number of packets matched by the rule. */
180         uint64_t bytes; /**< Number of bytes matched by the rule. */
181 };
182
183 /** Structure for Drop queue. */
184 struct mlx5_hrxq_drop {
185         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
186         struct ibv_qp *qp; /**< Verbs queue pair. */
187         struct ibv_wq *wq; /**< Verbs work queue. */
188         struct ibv_cq *cq; /**< Verbs completion queue. */
189 };
190
191 /* Flows structures. */
192 struct mlx5_flow {
193         uint64_t hash_fields; /**< Fields that participate in the hash. */
194         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
195         struct ibv_flow *ibv_flow; /**< Verbs flow. */
196         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
197 };
198
199 /* Drop flows structures. */
200 struct mlx5_flow_drop {
201         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
202         struct ibv_flow *ibv_flow; /**< Verbs flow. */
203 };
204
205 struct rte_flow {
206         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
207         uint32_t mark:1; /**< Set if the flow is marked. */
208         uint32_t drop:1; /**< Drop queue. */
209         uint16_t queues_n; /**< Number of entries in queue[]. */
210         uint16_t (*queues)[]; /**< Queues indexes to use. */
211         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
212         uint8_t rss_key[40]; /**< copy of the RSS key. */
213         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
214         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
215         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
216         /**< Flow with Rx queue. */
217 };
218
219 /** Static initializer for items. */
220 #define ITEMS(...) \
221         (const enum rte_flow_item_type []){ \
222                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
223         }
224
225 /** Structure to generate a simple graph of layers supported by the NIC. */
226 struct mlx5_flow_items {
227         /** List of possible actions for these items. */
228         const enum rte_flow_action_type *const actions;
229         /** Bit-masks corresponding to the possibilities for the item. */
230         const void *mask;
231         /**
232          * Default bit-masks to use when item->mask is not provided. When
233          * \default_mask is also NULL, the full supported bit-mask (\mask) is
234          * used instead.
235          */
236         const void *default_mask;
237         /** Bit-masks size in bytes. */
238         const unsigned int mask_sz;
239         /**
240          * Conversion function from rte_flow to NIC specific flow.
241          *
242          * @param item
243          *   rte_flow item to convert.
244          * @param default_mask
245          *   Default bit-masks to use when item->mask is not provided.
246          * @param data
247          *   Internal structure to store the conversion.
248          *
249          * @return
250          *   0 on success, negative value otherwise.
251          */
252         int (*convert)(const struct rte_flow_item *item,
253                        const void *default_mask,
254                        void *data);
255         /** Size in bytes of the destination structure. */
256         const unsigned int dst_sz;
257         /** List of possible following items.  */
258         const enum rte_flow_item_type *const items;
259 };
260
261 /** Valid action for this PMD. */
262 static const enum rte_flow_action_type valid_actions[] = {
263         RTE_FLOW_ACTION_TYPE_DROP,
264         RTE_FLOW_ACTION_TYPE_QUEUE,
265         RTE_FLOW_ACTION_TYPE_MARK,
266         RTE_FLOW_ACTION_TYPE_FLAG,
267 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
268         RTE_FLOW_ACTION_TYPE_COUNT,
269 #endif
270         RTE_FLOW_ACTION_TYPE_END,
271 };
272
273 /** Graph of supported items and associated actions. */
274 static const struct mlx5_flow_items mlx5_flow_items[] = {
275         [RTE_FLOW_ITEM_TYPE_END] = {
276                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
277                                RTE_FLOW_ITEM_TYPE_VXLAN),
278         },
279         [RTE_FLOW_ITEM_TYPE_ETH] = {
280                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
281                                RTE_FLOW_ITEM_TYPE_IPV4,
282                                RTE_FLOW_ITEM_TYPE_IPV6),
283                 .actions = valid_actions,
284                 .mask = &(const struct rte_flow_item_eth){
285                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
286                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
287                         .type = -1,
288                 },
289                 .default_mask = &rte_flow_item_eth_mask,
290                 .mask_sz = sizeof(struct rte_flow_item_eth),
291                 .convert = mlx5_flow_create_eth,
292                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
293         },
294         [RTE_FLOW_ITEM_TYPE_VLAN] = {
295                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
296                                RTE_FLOW_ITEM_TYPE_IPV6),
297                 .actions = valid_actions,
298                 .mask = &(const struct rte_flow_item_vlan){
299                         .tci = -1,
300                 },
301                 .default_mask = &rte_flow_item_vlan_mask,
302                 .mask_sz = sizeof(struct rte_flow_item_vlan),
303                 .convert = mlx5_flow_create_vlan,
304                 .dst_sz = 0,
305         },
306         [RTE_FLOW_ITEM_TYPE_IPV4] = {
307                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
308                                RTE_FLOW_ITEM_TYPE_TCP),
309                 .actions = valid_actions,
310                 .mask = &(const struct rte_flow_item_ipv4){
311                         .hdr = {
312                                 .src_addr = -1,
313                                 .dst_addr = -1,
314                                 .type_of_service = -1,
315                                 .next_proto_id = -1,
316                         },
317                 },
318                 .default_mask = &rte_flow_item_ipv4_mask,
319                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
320                 .convert = mlx5_flow_create_ipv4,
321                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
322         },
323         [RTE_FLOW_ITEM_TYPE_IPV6] = {
324                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
325                                RTE_FLOW_ITEM_TYPE_TCP),
326                 .actions = valid_actions,
327                 .mask = &(const struct rte_flow_item_ipv6){
328                         .hdr = {
329                                 .src_addr = {
330                                         0xff, 0xff, 0xff, 0xff,
331                                         0xff, 0xff, 0xff, 0xff,
332                                         0xff, 0xff, 0xff, 0xff,
333                                         0xff, 0xff, 0xff, 0xff,
334                                 },
335                                 .dst_addr = {
336                                         0xff, 0xff, 0xff, 0xff,
337                                         0xff, 0xff, 0xff, 0xff,
338                                         0xff, 0xff, 0xff, 0xff,
339                                         0xff, 0xff, 0xff, 0xff,
340                                 },
341                                 .vtc_flow = -1,
342                                 .proto = -1,
343                                 .hop_limits = -1,
344                         },
345                 },
346                 .default_mask = &rte_flow_item_ipv6_mask,
347                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
348                 .convert = mlx5_flow_create_ipv6,
349                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
350         },
351         [RTE_FLOW_ITEM_TYPE_UDP] = {
352                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
353                 .actions = valid_actions,
354                 .mask = &(const struct rte_flow_item_udp){
355                         .hdr = {
356                                 .src_port = -1,
357                                 .dst_port = -1,
358                         },
359                 },
360                 .default_mask = &rte_flow_item_udp_mask,
361                 .mask_sz = sizeof(struct rte_flow_item_udp),
362                 .convert = mlx5_flow_create_udp,
363                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
364         },
365         [RTE_FLOW_ITEM_TYPE_TCP] = {
366                 .actions = valid_actions,
367                 .mask = &(const struct rte_flow_item_tcp){
368                         .hdr = {
369                                 .src_port = -1,
370                                 .dst_port = -1,
371                         },
372                 },
373                 .default_mask = &rte_flow_item_tcp_mask,
374                 .mask_sz = sizeof(struct rte_flow_item_tcp),
375                 .convert = mlx5_flow_create_tcp,
376                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
377         },
378         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
379                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
380                 .actions = valid_actions,
381                 .mask = &(const struct rte_flow_item_vxlan){
382                         .vni = "\xff\xff\xff",
383                 },
384                 .default_mask = &rte_flow_item_vxlan_mask,
385                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
386                 .convert = mlx5_flow_create_vxlan,
387                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
388         },
389 };
390
391 /** Structure to pass to the conversion function. */
392 struct mlx5_flow_parse {
393         uint32_t inner; /**< Set once VXLAN is encountered. */
394         uint32_t create:1;
395         /**< Whether resources should remain after a validate. */
396         uint32_t drop:1; /**< Target is a drop queue. */
397         uint32_t mark:1; /**< Mark is present in the flow. */
398         uint32_t count:1; /**< Count is present in the flow. */
399         uint32_t mark_id; /**< Mark identifier. */
400         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
401         uint16_t queues_n; /**< Number of entries in queue[]. */
402         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
403         uint8_t rss_key[40]; /**< copy of the RSS key. */
404         enum hash_rxq_type layer; /**< Last pattern layer detected. */
405         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
406         struct {
407                 struct ibv_flow_attr *ibv_attr;
408                 /**< Pointer to Verbs attributes. */
409                 unsigned int offset;
410                 /**< Current position or total size of the attribute. */
411         } queue[RTE_DIM(hash_rxq_init)];
412 };
413
414 static const struct rte_flow_ops mlx5_flow_ops = {
415         .validate = mlx5_flow_validate,
416         .create = mlx5_flow_create,
417         .destroy = mlx5_flow_destroy,
418         .flush = mlx5_flow_flush,
419 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
420         .query = mlx5_flow_query,
421 #else
422         .query = NULL,
423 #endif
424         .isolate = mlx5_flow_isolate,
425 };
426
427 /* Convert FDIR request to Generic flow. */
428 struct mlx5_fdir {
429         struct rte_flow_attr attr;
430         struct rte_flow_action actions[2];
431         struct rte_flow_item items[4];
432         struct rte_flow_item_eth l2;
433         struct rte_flow_item_eth l2_mask;
434         union {
435                 struct rte_flow_item_ipv4 ipv4;
436                 struct rte_flow_item_ipv6 ipv6;
437         } l3;
438         union {
439                 struct rte_flow_item_udp udp;
440                 struct rte_flow_item_tcp tcp;
441         } l4;
442         struct rte_flow_action_queue queue;
443 };
444
445 /* Verbs specification header. */
446 struct ibv_spec_header {
447         enum ibv_flow_spec_type type;
448         uint16_t size;
449 };
450
451 /**
452  * Check support for a given item.
453  *
454  * @param item[in]
455  *   Item specification.
456  * @param mask[in]
457  *   Bit-masks covering supported fields to compare with spec, last and mask in
458  *   \item.
459  * @param size
460  *   Bit-Mask size in bytes.
461  *
462  * @return
463  *   0 on success.
464  */
465 static int
466 mlx5_flow_item_validate(const struct rte_flow_item *item,
467                         const uint8_t *mask, unsigned int size)
468 {
469         int ret = 0;
470
471         if (!item->spec && (item->mask || item->last))
472                 return -1;
473         if (item->spec && !item->mask) {
474                 unsigned int i;
475                 const uint8_t *spec = item->spec;
476
477                 for (i = 0; i < size; ++i)
478                         if ((spec[i] | mask[i]) != mask[i])
479                                 return -1;
480         }
481         if (item->last && !item->mask) {
482                 unsigned int i;
483                 const uint8_t *spec = item->last;
484
485                 for (i = 0; i < size; ++i)
486                         if ((spec[i] | mask[i]) != mask[i])
487                                 return -1;
488         }
489         if (item->mask) {
490                 unsigned int i;
491                 const uint8_t *spec = item->spec;
492
493                 for (i = 0; i < size; ++i)
494                         if ((spec[i] | mask[i]) != mask[i])
495                                 return -1;
496         }
497         if (item->spec && item->last) {
498                 uint8_t spec[size];
499                 uint8_t last[size];
500                 const uint8_t *apply = mask;
501                 unsigned int i;
502
503                 if (item->mask)
504                         apply = item->mask;
505                 for (i = 0; i < size; ++i) {
506                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
507                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
508                 }
509                 ret = memcmp(spec, last, size);
510         }
511         return ret;
512 }
513
514 /**
515  * Copy the RSS configuration from the user ones, of the rss_conf is null,
516  * uses the driver one.
517  *
518  * @param parser
519  *   Internal parser structure.
520  * @param rss_conf
521  *   User RSS configuration to save.
522  *
523  * @return
524  *   0 on success, errno value on failure.
525  */
526 static int
527 mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
528                            const struct rte_eth_rss_conf *rss_conf)
529 {
530         /*
531          * This function is also called at the beginning of
532          * mlx5_flow_convert_actions() to initialize the parser with the
533          * device default RSS configuration.
534          */
535         if (rss_conf) {
536                 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
537                         return EINVAL;
538                 if (rss_conf->rss_key_len != 40)
539                         return EINVAL;
540                 if (rss_conf->rss_key_len && rss_conf->rss_key) {
541                         parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
542                         memcpy(parser->rss_key, rss_conf->rss_key,
543                                rss_conf->rss_key_len);
544                         parser->rss_conf.rss_key = parser->rss_key;
545                 }
546                 parser->rss_conf.rss_hf = rss_conf->rss_hf;
547         }
548         return 0;
549 }
550
551 /**
552  * Extract attribute to the parser.
553  *
554  * @param[in] attr
555  *   Flow rule attributes.
556  * @param[out] error
557  *   Perform verbose error reporting if not NULL.
558  *
559  * @return
560  *   0 on success, a negative errno value otherwise and rte_errno is set.
561  */
562 static int
563 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
564                              struct rte_flow_error *error)
565 {
566         if (attr->group) {
567                 rte_flow_error_set(error, ENOTSUP,
568                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
569                                    NULL,
570                                    "groups are not supported");
571                 return -rte_errno;
572         }
573         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
574                 rte_flow_error_set(error, ENOTSUP,
575                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
576                                    NULL,
577                                    "priorities are not supported");
578                 return -rte_errno;
579         }
580         if (attr->egress) {
581                 rte_flow_error_set(error, ENOTSUP,
582                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
583                                    NULL,
584                                    "egress is not supported");
585                 return -rte_errno;
586         }
587         if (!attr->ingress) {
588                 rte_flow_error_set(error, ENOTSUP,
589                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
590                                    NULL,
591                                    "only ingress is supported");
592                 return -rte_errno;
593         }
594         return 0;
595 }
596
597 /**
598  * Extract actions request to the parser.
599  *
600  * @param dev
601  *   Pointer to Ethernet device.
602  * @param[in] actions
603  *   Associated actions (list terminated by the END action).
604  * @param[out] error
605  *   Perform verbose error reporting if not NULL.
606  * @param[in, out] parser
607  *   Internal parser structure.
608  *
609  * @return
610  *   0 on success, a negative errno value otherwise and rte_errno is set.
611  */
612 static int
613 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
614                           const struct rte_flow_action actions[],
615                           struct rte_flow_error *error,
616                           struct mlx5_flow_parse *parser)
617 {
618         struct priv *priv = dev->data->dev_private;
619
620         /*
621          * Add default RSS configuration necessary for Verbs to create QP even
622          * if no RSS is necessary.
623          */
624         mlx5_flow_convert_rss_conf(parser,
625                                    (const struct rte_eth_rss_conf *)
626                                    &priv->rss_conf);
627         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
628                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
629                         continue;
630                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
631                         parser->drop = 1;
632                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
633                         const struct rte_flow_action_queue *queue =
634                                 (const struct rte_flow_action_queue *)
635                                 actions->conf;
636                         uint16_t n;
637                         uint16_t found = 0;
638
639                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
640                                 goto exit_action_not_supported;
641                         for (n = 0; n < parser->queues_n; ++n) {
642                                 if (parser->queues[n] == queue->index) {
643                                         found = 1;
644                                         break;
645                                 }
646                         }
647                         if (parser->queues_n > 1 && !found) {
648                                 rte_flow_error_set(error, ENOTSUP,
649                                            RTE_FLOW_ERROR_TYPE_ACTION,
650                                            actions,
651                                            "queue action not in RSS queues");
652                                 return -rte_errno;
653                         }
654                         if (!found) {
655                                 parser->queues_n = 1;
656                                 parser->queues[0] = queue->index;
657                         }
658                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
659                         const struct rte_flow_action_rss *rss =
660                                 (const struct rte_flow_action_rss *)
661                                 actions->conf;
662                         uint16_t n;
663
664                         if (!rss || !rss->num) {
665                                 rte_flow_error_set(error, EINVAL,
666                                                    RTE_FLOW_ERROR_TYPE_ACTION,
667                                                    actions,
668                                                    "no valid queues");
669                                 return -rte_errno;
670                         }
671                         if (parser->queues_n == 1) {
672                                 uint16_t found = 0;
673
674                                 assert(parser->queues_n);
675                                 for (n = 0; n < rss->num; ++n) {
676                                         if (parser->queues[0] ==
677                                             rss->queue[n]) {
678                                                 found = 1;
679                                                 break;
680                                         }
681                                 }
682                                 if (!found) {
683                                         rte_flow_error_set(error, ENOTSUP,
684                                                    RTE_FLOW_ERROR_TYPE_ACTION,
685                                                    actions,
686                                                    "queue action not in RSS"
687                                                    " queues");
688                                         return -rte_errno;
689                                 }
690                         }
691                         for (n = 0; n < rss->num; ++n) {
692                                 if (rss->queue[n] >= priv->rxqs_n) {
693                                         rte_flow_error_set(error, EINVAL,
694                                                    RTE_FLOW_ERROR_TYPE_ACTION,
695                                                    actions,
696                                                    "queue id > number of"
697                                                    " queues");
698                                         return -rte_errno;
699                                 }
700                         }
701                         for (n = 0; n < rss->num; ++n)
702                                 parser->queues[n] = rss->queue[n];
703                         parser->queues_n = rss->num;
704                         if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
705                                 rte_flow_error_set(error, EINVAL,
706                                                    RTE_FLOW_ERROR_TYPE_ACTION,
707                                                    actions,
708                                                    "wrong RSS configuration");
709                                 return -rte_errno;
710                         }
711                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
712                         const struct rte_flow_action_mark *mark =
713                                 (const struct rte_flow_action_mark *)
714                                 actions->conf;
715
716                         if (!mark) {
717                                 rte_flow_error_set(error, EINVAL,
718                                                    RTE_FLOW_ERROR_TYPE_ACTION,
719                                                    actions,
720                                                    "mark must be defined");
721                                 return -rte_errno;
722                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
723                                 rte_flow_error_set(error, ENOTSUP,
724                                                    RTE_FLOW_ERROR_TYPE_ACTION,
725                                                    actions,
726                                                    "mark must be between 0"
727                                                    " and 16777199");
728                                 return -rte_errno;
729                         }
730                         parser->mark = 1;
731                         parser->mark_id = mark->id;
732                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
733                         parser->mark = 1;
734                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
735                            priv->config.flow_counter_en) {
736                         parser->count = 1;
737                 } else {
738                         goto exit_action_not_supported;
739                 }
740         }
741         if (parser->drop && parser->mark)
742                 parser->mark = 0;
743         if (!parser->queues_n && !parser->drop) {
744                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
745                                    NULL, "no valid action");
746                 return -rte_errno;
747         }
748         return 0;
749 exit_action_not_supported:
750         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
751                            actions, "action not supported");
752         return -rte_errno;
753 }
754
755 /**
756  * Validate items.
757  *
758  * @param[in] items
759  *   Pattern specification (list terminated by the END pattern item).
760  * @param[out] error
761  *   Perform verbose error reporting if not NULL.
762  * @param[in, out] parser
763  *   Internal parser structure.
764  *
765  * @return
766  *   0 on success, a negative errno value otherwise and rte_errno is set.
767  */
768 static int
769 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
770                                  struct rte_flow_error *error,
771                                  struct mlx5_flow_parse *parser)
772 {
773         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
774         unsigned int i;
775
776         /* Initialise the offsets to start after verbs attribute. */
777         for (i = 0; i != hash_rxq_init_n; ++i)
778                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
779         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
780                 const struct mlx5_flow_items *token = NULL;
781                 unsigned int n;
782                 int err;
783
784                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
785                         continue;
786                 for (i = 0;
787                      cur_item->items &&
788                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
789                      ++i) {
790                         if (cur_item->items[i] == items->type) {
791                                 token = &mlx5_flow_items[items->type];
792                                 break;
793                         }
794                 }
795                 if (!token)
796                         goto exit_item_not_supported;
797                 cur_item = token;
798                 err = mlx5_flow_item_validate(items,
799                                               (const uint8_t *)cur_item->mask,
800                                               cur_item->mask_sz);
801                 if (err)
802                         goto exit_item_not_supported;
803                 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
804                         if (parser->inner) {
805                                 rte_flow_error_set(error, ENOTSUP,
806                                                    RTE_FLOW_ERROR_TYPE_ITEM,
807                                                    items,
808                                                    "cannot recognize multiple"
809                                                    " VXLAN encapsulations");
810                                 return -rte_errno;
811                         }
812                         parser->inner = IBV_FLOW_SPEC_INNER;
813                 }
814                 if (parser->drop) {
815                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
816                 } else {
817                         for (n = 0; n != hash_rxq_init_n; ++n)
818                                 parser->queue[n].offset += cur_item->dst_sz;
819                 }
820         }
821         if (parser->drop) {
822                 parser->queue[HASH_RXQ_ETH].offset +=
823                         sizeof(struct ibv_flow_spec_action_drop);
824         }
825         if (parser->mark) {
826                 for (i = 0; i != hash_rxq_init_n; ++i)
827                         parser->queue[i].offset +=
828                                 sizeof(struct ibv_flow_spec_action_tag);
829         }
830         if (parser->count) {
831                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
832
833                 for (i = 0; i != hash_rxq_init_n; ++i)
834                         parser->queue[i].offset += size;
835         }
836         return 0;
837 exit_item_not_supported:
838         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
839                            items, "item not supported");
840         return -rte_errno;
841 }
842
843 /**
844  * Allocate memory space to store verbs flow attributes.
845  *
846  * @param[in] priority
847  *   Flow priority.
848  * @param[in] size
849  *   Amount of byte to allocate.
850  * @param[out] error
851  *   Perform verbose error reporting if not NULL.
852  *
853  * @return
854  *   A verbs flow attribute on success, NULL otherwise.
855  */
856 static struct ibv_flow_attr *
857 mlx5_flow_convert_allocate(unsigned int priority,
858                            unsigned int size,
859                            struct rte_flow_error *error)
860 {
861         struct ibv_flow_attr *ibv_attr;
862
863         ibv_attr = rte_calloc(__func__, 1, size, 0);
864         if (!ibv_attr) {
865                 rte_flow_error_set(error, ENOMEM,
866                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
867                                    NULL,
868                                    "cannot allocate verbs spec attributes.");
869                 return NULL;
870         }
871         ibv_attr->priority = priority;
872         return ibv_attr;
873 }
874
875 /**
876  * Finalise verbs flow attributes.
877  *
878  * @param[in, out] parser
879  *   Internal parser structure.
880  */
881 static void
882 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
883 {
884         const unsigned int ipv4 =
885                 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
886         const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
887         const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
888         const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
889         const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
890         const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
891         unsigned int i;
892
893         /* Remove any other flow not matching the pattern. */
894         if (parser->queues_n == 1) {
895                 for (i = 0; i != hash_rxq_init_n; ++i) {
896                         if (i == HASH_RXQ_ETH)
897                                 continue;
898                         rte_free(parser->queue[i].ibv_attr);
899                         parser->queue[i].ibv_attr = NULL;
900                 }
901                 return;
902         }
903         if (parser->layer == HASH_RXQ_ETH) {
904                 goto fill;
905         } else {
906                 /*
907                  * This layer becomes useless as the pattern define under
908                  * layers.
909                  */
910                 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
911                 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
912         }
913         /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
914         for (i = ohmin; i != (ohmax + 1); ++i) {
915                 if (!parser->queue[i].ibv_attr)
916                         continue;
917                 rte_free(parser->queue[i].ibv_attr);
918                 parser->queue[i].ibv_attr = NULL;
919         }
920         /* Remove impossible flow according to the RSS configuration. */
921         if (hash_rxq_init[parser->layer].dpdk_rss_hf &
922             parser->rss_conf.rss_hf) {
923                 /* Remove any other flow. */
924                 for (i = hmin; i != (hmax + 1); ++i) {
925                         if ((i == parser->layer) ||
926                              (!parser->queue[i].ibv_attr))
927                                 continue;
928                         rte_free(parser->queue[i].ibv_attr);
929                         parser->queue[i].ibv_attr = NULL;
930                 }
931         } else  if (!parser->queue[ip].ibv_attr) {
932                 /* no RSS possible with the current configuration. */
933                 parser->queues_n = 1;
934                 return;
935         }
936 fill:
937         /*
938          * Fill missing layers in verbs specifications, or compute the correct
939          * offset to allocate the memory space for the attributes and
940          * specifications.
941          */
942         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
943                 union {
944                         struct ibv_flow_spec_ipv4_ext ipv4;
945                         struct ibv_flow_spec_ipv6 ipv6;
946                         struct ibv_flow_spec_tcp_udp udp_tcp;
947                 } specs;
948                 void *dst;
949                 uint16_t size;
950
951                 if (i == parser->layer)
952                         continue;
953                 if (parser->layer == HASH_RXQ_ETH) {
954                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
955                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
956                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
957                                         .type = IBV_FLOW_SPEC_IPV4_EXT,
958                                         .size = size,
959                                 };
960                         } else {
961                                 size = sizeof(struct ibv_flow_spec_ipv6);
962                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
963                                         .type = IBV_FLOW_SPEC_IPV6,
964                                         .size = size,
965                                 };
966                         }
967                         if (parser->queue[i].ibv_attr) {
968                                 dst = (void *)((uintptr_t)
969                                                parser->queue[i].ibv_attr +
970                                                parser->queue[i].offset);
971                                 memcpy(dst, &specs, size);
972                                 ++parser->queue[i].ibv_attr->num_of_specs;
973                         }
974                         parser->queue[i].offset += size;
975                 }
976                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
977                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
978                         size = sizeof(struct ibv_flow_spec_tcp_udp);
979                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
980                                 .type = ((i == HASH_RXQ_UDPV4 ||
981                                           i == HASH_RXQ_UDPV6) ?
982                                          IBV_FLOW_SPEC_UDP :
983                                          IBV_FLOW_SPEC_TCP),
984                                 .size = size,
985                         };
986                         if (parser->queue[i].ibv_attr) {
987                                 dst = (void *)((uintptr_t)
988                                                parser->queue[i].ibv_attr +
989                                                parser->queue[i].offset);
990                                 memcpy(dst, &specs, size);
991                                 ++parser->queue[i].ibv_attr->num_of_specs;
992                         }
993                         parser->queue[i].offset += size;
994                 }
995         }
996 }
997
998 /**
999  * Validate and convert a flow supported by the NIC.
1000  *
1001  * @param dev
1002  *   Pointer to Ethernet device.
1003  * @param[in] attr
1004  *   Flow rule attributes.
1005  * @param[in] pattern
1006  *   Pattern specification (list terminated by the END pattern item).
1007  * @param[in] actions
1008  *   Associated actions (list terminated by the END action).
1009  * @param[out] error
1010  *   Perform verbose error reporting if not NULL.
1011  * @param[in, out] parser
1012  *   Internal parser structure.
1013  *
1014  * @return
1015  *   0 on success, a negative errno value otherwise and rte_errno is set.
1016  */
1017 static int
1018 mlx5_flow_convert(struct rte_eth_dev *dev,
1019                   const struct rte_flow_attr *attr,
1020                   const struct rte_flow_item items[],
1021                   const struct rte_flow_action actions[],
1022                   struct rte_flow_error *error,
1023                   struct mlx5_flow_parse *parser)
1024 {
1025         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1026         unsigned int i;
1027         int ret;
1028
1029         /* First step. Validate the attributes, items and actions. */
1030         *parser = (struct mlx5_flow_parse){
1031                 .create = parser->create,
1032                 .layer = HASH_RXQ_ETH,
1033                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1034         };
1035         ret = mlx5_flow_convert_attributes(attr, error);
1036         if (ret)
1037                 return ret;
1038         ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1039         if (ret)
1040                 return ret;
1041         ret = mlx5_flow_convert_items_validate(items, error, parser);
1042         if (ret)
1043                 return ret;
1044         mlx5_flow_convert_finalise(parser);
1045         /*
1046          * Second step.
1047          * Allocate the memory space to store verbs specifications.
1048          */
1049         if (parser->drop) {
1050                 unsigned int priority =
1051                         attr->priority +
1052                         hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1053                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1054
1055                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1056                         mlx5_flow_convert_allocate(priority, offset, error);
1057                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1058                         return ENOMEM;
1059                 parser->queue[HASH_RXQ_ETH].offset =
1060                         sizeof(struct ibv_flow_attr);
1061         } else {
1062                 for (i = 0; i != hash_rxq_init_n; ++i) {
1063                         unsigned int priority =
1064                                 attr->priority +
1065                                 hash_rxq_init[i].flow_priority;
1066                         unsigned int offset;
1067
1068                         if (!(parser->rss_conf.rss_hf &
1069                               hash_rxq_init[i].dpdk_rss_hf) &&
1070                             (i != HASH_RXQ_ETH))
1071                                 continue;
1072                         offset = parser->queue[i].offset;
1073                         parser->queue[i].ibv_attr =
1074                                 mlx5_flow_convert_allocate(priority,
1075                                                            offset, error);
1076                         if (!parser->queue[i].ibv_attr)
1077                                 goto exit_enomem;
1078                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1079                 }
1080         }
1081         /* Third step. Conversion parse, fill the specifications. */
1082         parser->inner = 0;
1083         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1084                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1085                         continue;
1086                 cur_item = &mlx5_flow_items[items->type];
1087                 ret = cur_item->convert(items,
1088                                         (cur_item->default_mask ?
1089                                          cur_item->default_mask :
1090                                          cur_item->mask),
1091                                         parser);
1092                 if (ret) {
1093                         rte_flow_error_set(error, ret,
1094                                            RTE_FLOW_ERROR_TYPE_ITEM,
1095                                            items, "item not supported");
1096                         goto exit_free;
1097                 }
1098         }
1099         if (parser->mark)
1100                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1101         if (parser->count && parser->create) {
1102                 mlx5_flow_create_count(dev, parser);
1103                 if (!parser->cs)
1104                         goto exit_count_error;
1105         }
1106         /*
1107          * Last step. Complete missing specification to reach the RSS
1108          * configuration.
1109          */
1110         if (!parser->drop) {
1111                 mlx5_flow_convert_finalise(parser);
1112         } else {
1113                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1114                         attr->priority +
1115                         hash_rxq_init[parser->layer].flow_priority;
1116         }
1117 exit_free:
1118         /* Only verification is expected, all resources should be released. */
1119         if (!parser->create) {
1120                 for (i = 0; i != hash_rxq_init_n; ++i) {
1121                         if (parser->queue[i].ibv_attr) {
1122                                 rte_free(parser->queue[i].ibv_attr);
1123                                 parser->queue[i].ibv_attr = NULL;
1124                         }
1125                 }
1126         }
1127         return ret;
1128 exit_enomem:
1129         for (i = 0; i != hash_rxq_init_n; ++i) {
1130                 if (parser->queue[i].ibv_attr) {
1131                         rte_free(parser->queue[i].ibv_attr);
1132                         parser->queue[i].ibv_attr = NULL;
1133                 }
1134         }
1135         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1136                            NULL, "cannot allocate verbs spec attributes.");
1137         return ret;
1138 exit_count_error:
1139         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1140                            NULL, "cannot create counter.");
1141         return rte_errno;
1142 }
1143
1144 /**
1145  * Copy the specification created into the flow.
1146  *
1147  * @param parser
1148  *   Internal parser structure.
1149  * @param src
1150  *   Create specification.
1151  * @param size
1152  *   Size in bytes of the specification to copy.
1153  */
1154 static void
1155 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1156                       unsigned int size)
1157 {
1158         unsigned int i;
1159         void *dst;
1160
1161         for (i = 0; i != hash_rxq_init_n; ++i) {
1162                 if (!parser->queue[i].ibv_attr)
1163                         continue;
1164                 /* Specification must be the same l3 type or none. */
1165                 if (parser->layer == HASH_RXQ_ETH ||
1166                     (hash_rxq_init[parser->layer].ip_version ==
1167                      hash_rxq_init[i].ip_version) ||
1168                     (hash_rxq_init[i].ip_version == 0)) {
1169                         dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1170                                         parser->queue[i].offset);
1171                         memcpy(dst, src, size);
1172                         ++parser->queue[i].ibv_attr->num_of_specs;
1173                         parser->queue[i].offset += size;
1174                 }
1175         }
1176 }
1177
1178 /**
1179  * Convert Ethernet item to Verbs specification.
1180  *
1181  * @param item[in]
1182  *   Item specification.
1183  * @param default_mask[in]
1184  *   Default bit-masks to use when item->mask is not provided.
1185  * @param data[in, out]
1186  *   User structure.
1187  */
1188 static int
1189 mlx5_flow_create_eth(const struct rte_flow_item *item,
1190                      const void *default_mask,
1191                      void *data)
1192 {
1193         const struct rte_flow_item_eth *spec = item->spec;
1194         const struct rte_flow_item_eth *mask = item->mask;
1195         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1196         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1197         struct ibv_flow_spec_eth eth = {
1198                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1199                 .size = eth_size,
1200         };
1201
1202         /* Don't update layer for the inner pattern. */
1203         if (!parser->inner)
1204                 parser->layer = HASH_RXQ_ETH;
1205         if (spec) {
1206                 unsigned int i;
1207
1208                 if (!mask)
1209                         mask = default_mask;
1210                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1211                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1212                 eth.val.ether_type = spec->type;
1213                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1214                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1215                 eth.mask.ether_type = mask->type;
1216                 /* Remove unwanted bits from values. */
1217                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1218                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1219                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1220                 }
1221                 eth.val.ether_type &= eth.mask.ether_type;
1222         }
1223         mlx5_flow_create_copy(parser, &eth, eth_size);
1224         return 0;
1225 }
1226
1227 /**
1228  * Convert VLAN item to Verbs specification.
1229  *
1230  * @param item[in]
1231  *   Item specification.
1232  * @param default_mask[in]
1233  *   Default bit-masks to use when item->mask is not provided.
1234  * @param data[in, out]
1235  *   User structure.
1236  */
1237 static int
1238 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1239                       const void *default_mask,
1240                       void *data)
1241 {
1242         const struct rte_flow_item_vlan *spec = item->spec;
1243         const struct rte_flow_item_vlan *mask = item->mask;
1244         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1245         struct ibv_flow_spec_eth *eth;
1246         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1247
1248         if (spec) {
1249                 unsigned int i;
1250                 if (!mask)
1251                         mask = default_mask;
1252
1253                 for (i = 0; i != hash_rxq_init_n; ++i) {
1254                         if (!parser->queue[i].ibv_attr)
1255                                 continue;
1256
1257                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1258                                        parser->queue[i].offset - eth_size);
1259                         eth->val.vlan_tag = spec->tci;
1260                         eth->mask.vlan_tag = mask->tci;
1261                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1262                 }
1263         }
1264         return 0;
1265 }
1266
1267 /**
1268  * Convert IPv4 item to Verbs specification.
1269  *
1270  * @param item[in]
1271  *   Item specification.
1272  * @param default_mask[in]
1273  *   Default bit-masks to use when item->mask is not provided.
1274  * @param data[in, out]
1275  *   User structure.
1276  */
1277 static int
1278 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1279                       const void *default_mask,
1280                       void *data)
1281 {
1282         const struct rte_flow_item_ipv4 *spec = item->spec;
1283         const struct rte_flow_item_ipv4 *mask = item->mask;
1284         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1285         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1286         struct ibv_flow_spec_ipv4_ext ipv4 = {
1287                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1288                 .size = ipv4_size,
1289         };
1290
1291         /* Don't update layer for the inner pattern. */
1292         if (!parser->inner)
1293                 parser->layer = HASH_RXQ_IPV4;
1294         if (spec) {
1295                 if (!mask)
1296                         mask = default_mask;
1297                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1298                         .src_ip = spec->hdr.src_addr,
1299                         .dst_ip = spec->hdr.dst_addr,
1300                         .proto = spec->hdr.next_proto_id,
1301                         .tos = spec->hdr.type_of_service,
1302                 };
1303                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1304                         .src_ip = mask->hdr.src_addr,
1305                         .dst_ip = mask->hdr.dst_addr,
1306                         .proto = mask->hdr.next_proto_id,
1307                         .tos = mask->hdr.type_of_service,
1308                 };
1309                 /* Remove unwanted bits from values. */
1310                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1311                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1312                 ipv4.val.proto &= ipv4.mask.proto;
1313                 ipv4.val.tos &= ipv4.mask.tos;
1314         }
1315         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1316         return 0;
1317 }
1318
1319 /**
1320  * Convert IPv6 item to Verbs specification.
1321  *
1322  * @param item[in]
1323  *   Item specification.
1324  * @param default_mask[in]
1325  *   Default bit-masks to use when item->mask is not provided.
1326  * @param data[in, out]
1327  *   User structure.
1328  */
1329 static int
1330 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1331                       const void *default_mask,
1332                       void *data)
1333 {
1334         const struct rte_flow_item_ipv6 *spec = item->spec;
1335         const struct rte_flow_item_ipv6 *mask = item->mask;
1336         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1337         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1338         struct ibv_flow_spec_ipv6 ipv6 = {
1339                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1340                 .size = ipv6_size,
1341         };
1342
1343         /* Don't update layer for the inner pattern. */
1344         if (!parser->inner)
1345                 parser->layer = HASH_RXQ_IPV6;
1346         if (spec) {
1347                 unsigned int i;
1348                 uint32_t vtc_flow_val;
1349                 uint32_t vtc_flow_mask;
1350
1351                 if (!mask)
1352                         mask = default_mask;
1353                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1354                        RTE_DIM(ipv6.val.src_ip));
1355                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1356                        RTE_DIM(ipv6.val.dst_ip));
1357                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1358                        RTE_DIM(ipv6.mask.src_ip));
1359                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1360                        RTE_DIM(ipv6.mask.dst_ip));
1361                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1362                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1363                 ipv6.val.flow_label =
1364                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1365                                          IPV6_HDR_FL_SHIFT);
1366                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1367                                          IPV6_HDR_TC_SHIFT;
1368                 ipv6.val.next_hdr = spec->hdr.proto;
1369                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1370                 ipv6.mask.flow_label =
1371                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1372                                          IPV6_HDR_FL_SHIFT);
1373                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1374                                           IPV6_HDR_TC_SHIFT;
1375                 ipv6.mask.next_hdr = mask->hdr.proto;
1376                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1377                 /* Remove unwanted bits from values. */
1378                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1379                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1380                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1381                 }
1382                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1383                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1384                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1385                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1386         }
1387         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1388         return 0;
1389 }
1390
1391 /**
1392  * Convert UDP item to Verbs specification.
1393  *
1394  * @param item[in]
1395  *   Item specification.
1396  * @param default_mask[in]
1397  *   Default bit-masks to use when item->mask is not provided.
1398  * @param data[in, out]
1399  *   User structure.
1400  */
1401 static int
1402 mlx5_flow_create_udp(const struct rte_flow_item *item,
1403                      const void *default_mask,
1404                      void *data)
1405 {
1406         const struct rte_flow_item_udp *spec = item->spec;
1407         const struct rte_flow_item_udp *mask = item->mask;
1408         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1409         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1410         struct ibv_flow_spec_tcp_udp udp = {
1411                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1412                 .size = udp_size,
1413         };
1414
1415         /* Don't update layer for the inner pattern. */
1416         if (!parser->inner) {
1417                 if (parser->layer == HASH_RXQ_IPV4)
1418                         parser->layer = HASH_RXQ_UDPV4;
1419                 else
1420                         parser->layer = HASH_RXQ_UDPV6;
1421         }
1422         if (spec) {
1423                 if (!mask)
1424                         mask = default_mask;
1425                 udp.val.dst_port = spec->hdr.dst_port;
1426                 udp.val.src_port = spec->hdr.src_port;
1427                 udp.mask.dst_port = mask->hdr.dst_port;
1428                 udp.mask.src_port = mask->hdr.src_port;
1429                 /* Remove unwanted bits from values. */
1430                 udp.val.src_port &= udp.mask.src_port;
1431                 udp.val.dst_port &= udp.mask.dst_port;
1432         }
1433         mlx5_flow_create_copy(parser, &udp, udp_size);
1434         return 0;
1435 }
1436
1437 /**
1438  * Convert TCP item to Verbs specification.
1439  *
1440  * @param item[in]
1441  *   Item specification.
1442  * @param default_mask[in]
1443  *   Default bit-masks to use when item->mask is not provided.
1444  * @param data[in, out]
1445  *   User structure.
1446  */
1447 static int
1448 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1449                      const void *default_mask,
1450                      void *data)
1451 {
1452         const struct rte_flow_item_tcp *spec = item->spec;
1453         const struct rte_flow_item_tcp *mask = item->mask;
1454         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1455         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1456         struct ibv_flow_spec_tcp_udp tcp = {
1457                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1458                 .size = tcp_size,
1459         };
1460
1461         /* Don't update layer for the inner pattern. */
1462         if (!parser->inner) {
1463                 if (parser->layer == HASH_RXQ_IPV4)
1464                         parser->layer = HASH_RXQ_TCPV4;
1465                 else
1466                         parser->layer = HASH_RXQ_TCPV6;
1467         }
1468         if (spec) {
1469                 if (!mask)
1470                         mask = default_mask;
1471                 tcp.val.dst_port = spec->hdr.dst_port;
1472                 tcp.val.src_port = spec->hdr.src_port;
1473                 tcp.mask.dst_port = mask->hdr.dst_port;
1474                 tcp.mask.src_port = mask->hdr.src_port;
1475                 /* Remove unwanted bits from values. */
1476                 tcp.val.src_port &= tcp.mask.src_port;
1477                 tcp.val.dst_port &= tcp.mask.dst_port;
1478         }
1479         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1480         return 0;
1481 }
1482
1483 /**
1484  * Convert VXLAN item to Verbs specification.
1485  *
1486  * @param item[in]
1487  *   Item specification.
1488  * @param default_mask[in]
1489  *   Default bit-masks to use when item->mask is not provided.
1490  * @param data[in, out]
1491  *   User structure.
1492  */
1493 static int
1494 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1495                        const void *default_mask,
1496                        void *data)
1497 {
1498         const struct rte_flow_item_vxlan *spec = item->spec;
1499         const struct rte_flow_item_vxlan *mask = item->mask;
1500         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1501         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1502         struct ibv_flow_spec_tunnel vxlan = {
1503                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1504                 .size = size,
1505         };
1506         union vni {
1507                 uint32_t vlan_id;
1508                 uint8_t vni[4];
1509         } id;
1510
1511         id.vni[0] = 0;
1512         parser->inner = IBV_FLOW_SPEC_INNER;
1513         if (spec) {
1514                 if (!mask)
1515                         mask = default_mask;
1516                 memcpy(&id.vni[1], spec->vni, 3);
1517                 vxlan.val.tunnel_id = id.vlan_id;
1518                 memcpy(&id.vni[1], mask->vni, 3);
1519                 vxlan.mask.tunnel_id = id.vlan_id;
1520                 /* Remove unwanted bits from values. */
1521                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1522         }
1523         /*
1524          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1525          * layer is defined in the Verbs specification it is interpreted as
1526          * wildcard and all packets will match this rule, if it follows a full
1527          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1528          * before will also match this rule.
1529          * To avoid such situation, VNI 0 is currently refused.
1530          */
1531         if (!vxlan.val.tunnel_id)
1532                 return EINVAL;
1533         mlx5_flow_create_copy(parser, &vxlan, size);
1534         return 0;
1535 }
1536
1537 /**
1538  * Convert mark/flag action to Verbs specification.
1539  *
1540  * @param parser
1541  *   Internal parser structure.
1542  * @param mark_id
1543  *   Mark identifier.
1544  */
1545 static int
1546 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1547 {
1548         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1549         struct ibv_flow_spec_action_tag tag = {
1550                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1551                 .size = size,
1552                 .tag_id = mlx5_flow_mark_set(mark_id),
1553         };
1554
1555         assert(parser->mark);
1556         mlx5_flow_create_copy(parser, &tag, size);
1557         return 0;
1558 }
1559
1560 /**
1561  * Convert count action to Verbs specification.
1562  *
1563  * @param dev
1564  *   Pointer to Ethernet device.
1565  * @param parser
1566  *   Pointer to MLX5 flow parser structure.
1567  *
1568  * @return
1569  *   0 on success, errno value on failure.
1570  */
1571 static int
1572 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1573                        struct mlx5_flow_parse *parser __rte_unused)
1574 {
1575 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1576         struct priv *priv = dev->data->dev_private;
1577         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1578         struct ibv_counter_set_init_attr init_attr = {0};
1579         struct ibv_flow_spec_counter_action counter = {
1580                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1581                 .size = size,
1582                 .counter_set_handle = 0,
1583         };
1584
1585         init_attr.counter_set_id = 0;
1586         parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1587         if (!parser->cs)
1588                 return EINVAL;
1589         counter.counter_set_handle = parser->cs->handle;
1590         mlx5_flow_create_copy(parser, &counter, size);
1591 #endif
1592         return 0;
1593 }
1594
1595 /**
1596  * Complete flow rule creation with a drop queue.
1597  *
1598  * @param dev
1599  *   Pointer to Ethernet device.
1600  * @param parser
1601  *   Internal parser structure.
1602  * @param flow
1603  *   Pointer to the rte_flow.
1604  * @param[out] error
1605  *   Perform verbose error reporting if not NULL.
1606  *
1607  * @return
1608  *   0 on success, errno value on failure.
1609  */
1610 static int
1611 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1612                                    struct mlx5_flow_parse *parser,
1613                                    struct rte_flow *flow,
1614                                    struct rte_flow_error *error)
1615 {
1616         struct priv *priv = dev->data->dev_private;
1617         struct ibv_flow_spec_action_drop *drop;
1618         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1619         int err = 0;
1620
1621         assert(priv->pd);
1622         assert(priv->ctx);
1623         flow->drop = 1;
1624         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1625                         parser->queue[HASH_RXQ_ETH].offset);
1626         *drop = (struct ibv_flow_spec_action_drop){
1627                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1628                         .size = size,
1629         };
1630         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1631         parser->queue[HASH_RXQ_ETH].offset += size;
1632         flow->frxq[HASH_RXQ_ETH].ibv_attr =
1633                 parser->queue[HASH_RXQ_ETH].ibv_attr;
1634         if (parser->count)
1635                 flow->cs = parser->cs;
1636         if (!priv->dev->data->dev_started)
1637                 return 0;
1638         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1639         flow->frxq[HASH_RXQ_ETH].ibv_flow =
1640                 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1641                                        flow->frxq[HASH_RXQ_ETH].ibv_attr);
1642         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1643                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1644                                    NULL, "flow rule creation failure");
1645                 err = ENOMEM;
1646                 goto error;
1647         }
1648         return 0;
1649 error:
1650         assert(flow);
1651         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1652                 claim_zero(mlx5_glue->destroy_flow
1653                            (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1654                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1655         }
1656         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1657                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1658                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1659         }
1660         if (flow->cs) {
1661                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1662                 flow->cs = NULL;
1663                 parser->cs = NULL;
1664         }
1665         return err;
1666 }
1667
1668 /**
1669  * Create hash Rx queues when RSS is enabled.
1670  *
1671  * @param dev
1672  *   Pointer to Ethernet device.
1673  * @param parser
1674  *   Internal parser structure.
1675  * @param flow
1676  *   Pointer to the rte_flow.
1677  * @param[out] error
1678  *   Perform verbose error reporting if not NULL.
1679  *
1680  * @return
1681  *   0 on success, a errno value otherwise and rte_errno is set.
1682  */
1683 static int
1684 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1685                                   struct mlx5_flow_parse *parser,
1686                                   struct rte_flow *flow,
1687                                   struct rte_flow_error *error)
1688 {
1689         struct priv *priv = dev->data->dev_private;
1690         unsigned int i;
1691
1692         for (i = 0; i != hash_rxq_init_n; ++i) {
1693                 uint64_t hash_fields;
1694
1695                 if (!parser->queue[i].ibv_attr)
1696                         continue;
1697                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1698                 parser->queue[i].ibv_attr = NULL;
1699                 hash_fields = hash_rxq_init[i].hash_fields;
1700                 if (!priv->dev->data->dev_started)
1701                         continue;
1702                 flow->frxq[i].hrxq =
1703                         mlx5_hrxq_get(dev,
1704                                       parser->rss_conf.rss_key,
1705                                       parser->rss_conf.rss_key_len,
1706                                       hash_fields,
1707                                       parser->queues,
1708                                       parser->queues_n);
1709                 if (flow->frxq[i].hrxq)
1710                         continue;
1711                 flow->frxq[i].hrxq =
1712                         mlx5_hrxq_new(dev,
1713                                       parser->rss_conf.rss_key,
1714                                       parser->rss_conf.rss_key_len,
1715                                       hash_fields,
1716                                       parser->queues,
1717                                       parser->queues_n);
1718                 if (!flow->frxq[i].hrxq) {
1719                         rte_flow_error_set(error, ENOMEM,
1720                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1721                                            NULL, "cannot create hash rxq");
1722                         return ENOMEM;
1723                 }
1724         }
1725         return 0;
1726 }
1727
1728 /**
1729  * Complete flow rule creation.
1730  *
1731  * @param dev
1732  *   Pointer to Ethernet device.
1733  * @param parser
1734  *   Internal parser structure.
1735  * @param flow
1736  *   Pointer to the rte_flow.
1737  * @param[out] error
1738  *   Perform verbose error reporting if not NULL.
1739  *
1740  * @return
1741  *   0 on success, a errno value otherwise and rte_errno is set.
1742  */
1743 static int
1744 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1745                               struct mlx5_flow_parse *parser,
1746                               struct rte_flow *flow,
1747                               struct rte_flow_error *error)
1748 {
1749         struct priv *priv = dev->data->dev_private;
1750         int err = 0;
1751         unsigned int i;
1752         unsigned int flows_n = 0;
1753
1754         assert(priv->pd);
1755         assert(priv->ctx);
1756         assert(!parser->drop);
1757         err = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1758         if (err)
1759                 goto error;
1760         if (parser->count)
1761                 flow->cs = parser->cs;
1762         if (!priv->dev->data->dev_started)
1763                 return 0;
1764         for (i = 0; i != hash_rxq_init_n; ++i) {
1765                 if (!flow->frxq[i].hrxq)
1766                         continue;
1767                 flow->frxq[i].ibv_flow =
1768                         mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1769                                                flow->frxq[i].ibv_attr);
1770                 if (!flow->frxq[i].ibv_flow) {
1771                         rte_flow_error_set(error, ENOMEM,
1772                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1773                                            NULL, "flow rule creation failure");
1774                         err = ENOMEM;
1775                         goto error;
1776                 }
1777                 ++flows_n;
1778                 DEBUG("%p type %d QP %p ibv_flow %p",
1779                       (void *)flow, i,
1780                       (void *)flow->frxq[i].hrxq,
1781                       (void *)flow->frxq[i].ibv_flow);
1782         }
1783         if (!flows_n) {
1784                 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1785                                    NULL, "internal error in flow creation");
1786                 goto error;
1787         }
1788         for (i = 0; i != parser->queues_n; ++i) {
1789                 struct mlx5_rxq_data *q =
1790                         (*priv->rxqs)[parser->queues[i]];
1791
1792                 q->mark |= parser->mark;
1793         }
1794         return 0;
1795 error:
1796         assert(flow);
1797         for (i = 0; i != hash_rxq_init_n; ++i) {
1798                 if (flow->frxq[i].ibv_flow) {
1799                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1800
1801                         claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1802                 }
1803                 if (flow->frxq[i].hrxq)
1804                         mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1805                 if (flow->frxq[i].ibv_attr)
1806                         rte_free(flow->frxq[i].ibv_attr);
1807         }
1808         if (flow->cs) {
1809                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1810                 flow->cs = NULL;
1811                 parser->cs = NULL;
1812         }
1813         return err;
1814 }
1815
1816 /**
1817  * Convert a flow.
1818  *
1819  * @param dev
1820  *   Pointer to Ethernet device.
1821  * @param list
1822  *   Pointer to a TAILQ flow list.
1823  * @param[in] attr
1824  *   Flow rule attributes.
1825  * @param[in] pattern
1826  *   Pattern specification (list terminated by the END pattern item).
1827  * @param[in] actions
1828  *   Associated actions (list terminated by the END action).
1829  * @param[out] error
1830  *   Perform verbose error reporting if not NULL.
1831  *
1832  * @return
1833  *   A flow on success, NULL otherwise.
1834  */
1835 static struct rte_flow *
1836 mlx5_flow_list_create(struct rte_eth_dev *dev,
1837                       struct mlx5_flows *list,
1838                       const struct rte_flow_attr *attr,
1839                       const struct rte_flow_item items[],
1840                       const struct rte_flow_action actions[],
1841                       struct rte_flow_error *error)
1842 {
1843         struct mlx5_flow_parse parser = { .create = 1, };
1844         struct rte_flow *flow = NULL;
1845         unsigned int i;
1846         int err;
1847
1848         err = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1849         if (err)
1850                 goto exit;
1851         flow = rte_calloc(__func__, 1,
1852                           sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1853                           0);
1854         if (!flow) {
1855                 rte_flow_error_set(error, ENOMEM,
1856                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1857                                    NULL,
1858                                    "cannot allocate flow memory");
1859                 return NULL;
1860         }
1861         /* Copy queues configuration. */
1862         flow->queues = (uint16_t (*)[])(flow + 1);
1863         memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1864         flow->queues_n = parser.queues_n;
1865         flow->mark = parser.mark;
1866         /* Copy RSS configuration. */
1867         flow->rss_conf = parser.rss_conf;
1868         flow->rss_conf.rss_key = flow->rss_key;
1869         memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1870         /* finalise the flow. */
1871         if (parser.drop)
1872                 err = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1873                                                          error);
1874         else
1875                 err = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1876         if (err)
1877                 goto exit;
1878         TAILQ_INSERT_TAIL(list, flow, next);
1879         DEBUG("Flow created %p", (void *)flow);
1880         return flow;
1881 exit:
1882         ERROR("flow creation error: %s", error->message);
1883         for (i = 0; i != hash_rxq_init_n; ++i) {
1884                 if (parser.queue[i].ibv_attr)
1885                         rte_free(parser.queue[i].ibv_attr);
1886         }
1887         rte_free(flow);
1888         return NULL;
1889 }
1890
1891 /**
1892  * Validate a flow supported by the NIC.
1893  *
1894  * @see rte_flow_validate()
1895  * @see rte_flow_ops
1896  */
1897 int
1898 mlx5_flow_validate(struct rte_eth_dev *dev,
1899                    const struct rte_flow_attr *attr,
1900                    const struct rte_flow_item items[],
1901                    const struct rte_flow_action actions[],
1902                    struct rte_flow_error *error)
1903 {
1904         int ret;
1905         struct mlx5_flow_parse parser = { .create = 0, };
1906
1907         ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1908         return ret;
1909 }
1910
1911 /**
1912  * Create a flow.
1913  *
1914  * @see rte_flow_create()
1915  * @see rte_flow_ops
1916  */
1917 struct rte_flow *
1918 mlx5_flow_create(struct rte_eth_dev *dev,
1919                  const struct rte_flow_attr *attr,
1920                  const struct rte_flow_item items[],
1921                  const struct rte_flow_action actions[],
1922                  struct rte_flow_error *error)
1923 {
1924         struct priv *priv = dev->data->dev_private;
1925
1926         return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
1927                                      error);
1928 }
1929
1930 /**
1931  * Destroy a flow in a list.
1932  *
1933  * @param dev
1934  *   Pointer to Ethernet device.
1935  * @param list
1936  *   Pointer to a TAILQ flow list.
1937  * @param[in] flow
1938  *   Flow to destroy.
1939  */
1940 static void
1941 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
1942                        struct rte_flow *flow)
1943 {
1944         struct priv *priv = dev->data->dev_private;
1945         unsigned int i;
1946
1947         if (flow->drop || !flow->mark)
1948                 goto free;
1949         for (i = 0; i != flow->queues_n; ++i) {
1950                 struct rte_flow *tmp;
1951                 int mark = 0;
1952
1953                 /*
1954                  * To remove the mark from the queue, the queue must not be
1955                  * present in any other marked flow (RSS or not).
1956                  */
1957                 TAILQ_FOREACH(tmp, list, next) {
1958                         unsigned int j;
1959                         uint16_t *tqs = NULL;
1960                         uint16_t tq_n = 0;
1961
1962                         if (!tmp->mark)
1963                                 continue;
1964                         for (j = 0; j != hash_rxq_init_n; ++j) {
1965                                 if (!tmp->frxq[j].hrxq)
1966                                         continue;
1967                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
1968                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1969                         }
1970                         if (!tq_n)
1971                                 continue;
1972                         for (j = 0; (j != tq_n) && !mark; j++)
1973                                 if (tqs[j] == (*flow->queues)[i])
1974                                         mark = 1;
1975                 }
1976                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
1977         }
1978 free:
1979         if (flow->drop) {
1980                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
1981                         claim_zero(mlx5_glue->destroy_flow
1982                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1983                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1984         } else {
1985                 for (i = 0; i != hash_rxq_init_n; ++i) {
1986                         struct mlx5_flow *frxq = &flow->frxq[i];
1987
1988                         if (frxq->ibv_flow)
1989                                 claim_zero(mlx5_glue->destroy_flow
1990                                            (frxq->ibv_flow));
1991                         if (frxq->hrxq)
1992                                 mlx5_hrxq_release(dev, frxq->hrxq);
1993                         if (frxq->ibv_attr)
1994                                 rte_free(frxq->ibv_attr);
1995                 }
1996         }
1997         if (flow->cs) {
1998                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1999                 flow->cs = NULL;
2000         }
2001         TAILQ_REMOVE(list, flow, next);
2002         DEBUG("Flow destroyed %p", (void *)flow);
2003         rte_free(flow);
2004 }
2005
2006 /**
2007  * Destroy all flows.
2008  *
2009  * @param dev
2010  *   Pointer to Ethernet device.
2011  * @param list
2012  *   Pointer to a TAILQ flow list.
2013  */
2014 void
2015 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2016 {
2017         while (!TAILQ_EMPTY(list)) {
2018                 struct rte_flow *flow;
2019
2020                 flow = TAILQ_FIRST(list);
2021                 mlx5_flow_list_destroy(dev, list, flow);
2022         }
2023 }
2024
2025 /**
2026  * Create drop queue.
2027  *
2028  * @param dev
2029  *   Pointer to Ethernet device.
2030  *
2031  * @return
2032  *   0 on success.
2033  */
2034 int
2035 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2036 {
2037         struct priv *priv = dev->data->dev_private;
2038         struct mlx5_hrxq_drop *fdq = NULL;
2039
2040         assert(priv->pd);
2041         assert(priv->ctx);
2042         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2043         if (!fdq) {
2044                 WARN("cannot allocate memory for drop queue");
2045                 goto error;
2046         }
2047         fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2048         if (!fdq->cq) {
2049                 WARN("cannot allocate CQ for drop queue");
2050                 goto error;
2051         }
2052         fdq->wq = mlx5_glue->create_wq
2053                 (priv->ctx,
2054                  &(struct ibv_wq_init_attr){
2055                         .wq_type = IBV_WQT_RQ,
2056                         .max_wr = 1,
2057                         .max_sge = 1,
2058                         .pd = priv->pd,
2059                         .cq = fdq->cq,
2060                  });
2061         if (!fdq->wq) {
2062                 WARN("cannot allocate WQ for drop queue");
2063                 goto error;
2064         }
2065         fdq->ind_table = mlx5_glue->create_rwq_ind_table
2066                 (priv->ctx,
2067                  &(struct ibv_rwq_ind_table_init_attr){
2068                         .log_ind_tbl_size = 0,
2069                         .ind_tbl = &fdq->wq,
2070                         .comp_mask = 0,
2071                  });
2072         if (!fdq->ind_table) {
2073                 WARN("cannot allocate indirection table for drop queue");
2074                 goto error;
2075         }
2076         fdq->qp = mlx5_glue->create_qp_ex
2077                 (priv->ctx,
2078                  &(struct ibv_qp_init_attr_ex){
2079                         .qp_type = IBV_QPT_RAW_PACKET,
2080                         .comp_mask =
2081                                 IBV_QP_INIT_ATTR_PD |
2082                                 IBV_QP_INIT_ATTR_IND_TABLE |
2083                                 IBV_QP_INIT_ATTR_RX_HASH,
2084                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2085                                 .rx_hash_function =
2086                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2087                                 .rx_hash_key_len = rss_hash_default_key_len,
2088                                 .rx_hash_key = rss_hash_default_key,
2089                                 .rx_hash_fields_mask = 0,
2090                                 },
2091                         .rwq_ind_tbl = fdq->ind_table,
2092                         .pd = priv->pd
2093                  });
2094         if (!fdq->qp) {
2095                 WARN("cannot allocate QP for drop queue");
2096                 goto error;
2097         }
2098         priv->flow_drop_queue = fdq;
2099         return 0;
2100 error:
2101         if (fdq->qp)
2102                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2103         if (fdq->ind_table)
2104                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2105         if (fdq->wq)
2106                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2107         if (fdq->cq)
2108                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2109         if (fdq)
2110                 rte_free(fdq);
2111         priv->flow_drop_queue = NULL;
2112         return -1;
2113 }
2114
2115 /**
2116  * Delete drop queue.
2117  *
2118  * @param dev
2119  *   Pointer to Ethernet device.
2120  */
2121 void
2122 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2123 {
2124         struct priv *priv = dev->data->dev_private;
2125         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2126
2127         if (!fdq)
2128                 return;
2129         if (fdq->qp)
2130                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2131         if (fdq->ind_table)
2132                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2133         if (fdq->wq)
2134                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2135         if (fdq->cq)
2136                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2137         rte_free(fdq);
2138         priv->flow_drop_queue = NULL;
2139 }
2140
2141 /**
2142  * Remove all flows.
2143  *
2144  * @param dev
2145  *   Pointer to Ethernet device.
2146  * @param list
2147  *   Pointer to a TAILQ flow list.
2148  */
2149 void
2150 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2151 {
2152         struct priv *priv = dev->data->dev_private;
2153         struct rte_flow *flow;
2154
2155         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2156                 unsigned int i;
2157                 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2158
2159                 if (flow->drop) {
2160                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2161                                 continue;
2162                         claim_zero(mlx5_glue->destroy_flow
2163                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2164                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2165                         DEBUG("Flow %p removed", (void *)flow);
2166                         /* Next flow. */
2167                         continue;
2168                 }
2169                 /* Verify the flow has not already been cleaned. */
2170                 for (i = 0; i != hash_rxq_init_n; ++i) {
2171                         if (!flow->frxq[i].ibv_flow)
2172                                 continue;
2173                         /*
2174                          * Indirection table may be necessary to remove the
2175                          * flags in the Rx queues.
2176                          * This helps to speed-up the process by avoiding
2177                          * another loop.
2178                          */
2179                         ind_tbl = flow->frxq[i].hrxq->ind_table;
2180                         break;
2181                 }
2182                 if (i == hash_rxq_init_n)
2183                         return;
2184                 if (flow->mark) {
2185                         assert(ind_tbl);
2186                         for (i = 0; i != ind_tbl->queues_n; ++i)
2187                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2188                 }
2189                 for (i = 0; i != hash_rxq_init_n; ++i) {
2190                         if (!flow->frxq[i].ibv_flow)
2191                                 continue;
2192                         claim_zero(mlx5_glue->destroy_flow
2193                                    (flow->frxq[i].ibv_flow));
2194                         flow->frxq[i].ibv_flow = NULL;
2195                         mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2196                         flow->frxq[i].hrxq = NULL;
2197                 }
2198                 DEBUG("Flow %p removed", (void *)flow);
2199         }
2200 }
2201
2202 /**
2203  * Add all flows.
2204  *
2205  * @param dev
2206  *   Pointer to Ethernet device.
2207  * @param list
2208  *   Pointer to a TAILQ flow list.
2209  *
2210  * @return
2211  *   0 on success, a errno value otherwise and rte_errno is set.
2212  */
2213 int
2214 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2215 {
2216         struct priv *priv = dev->data->dev_private;
2217         struct rte_flow *flow;
2218
2219         TAILQ_FOREACH(flow, list, next) {
2220                 unsigned int i;
2221
2222                 if (flow->drop) {
2223                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2224                                 mlx5_glue->create_flow
2225                                 (priv->flow_drop_queue->qp,
2226                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2227                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2228                                 DEBUG("Flow %p cannot be applied",
2229                                       (void *)flow);
2230                                 rte_errno = EINVAL;
2231                                 return rte_errno;
2232                         }
2233                         DEBUG("Flow %p applied", (void *)flow);
2234                         /* Next flow. */
2235                         continue;
2236                 }
2237                 for (i = 0; i != hash_rxq_init_n; ++i) {
2238                         if (!flow->frxq[i].ibv_attr)
2239                                 continue;
2240                         flow->frxq[i].hrxq =
2241                                 mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
2242                                               flow->rss_conf.rss_key_len,
2243                                               hash_rxq_init[i].hash_fields,
2244                                               (*flow->queues),
2245                                               flow->queues_n);
2246                         if (flow->frxq[i].hrxq)
2247                                 goto flow_create;
2248                         flow->frxq[i].hrxq =
2249                                 mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
2250                                               flow->rss_conf.rss_key_len,
2251                                               hash_rxq_init[i].hash_fields,
2252                                               (*flow->queues),
2253                                               flow->queues_n);
2254                         if (!flow->frxq[i].hrxq) {
2255                                 DEBUG("Flow %p cannot be applied",
2256                                       (void *)flow);
2257                                 rte_errno = EINVAL;
2258                                 return rte_errno;
2259                         }
2260 flow_create:
2261                         flow->frxq[i].ibv_flow =
2262                                 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2263                                                        flow->frxq[i].ibv_attr);
2264                         if (!flow->frxq[i].ibv_flow) {
2265                                 DEBUG("Flow %p cannot be applied",
2266                                       (void *)flow);
2267                                 rte_errno = EINVAL;
2268                                 return rte_errno;
2269                         }
2270                         DEBUG("Flow %p applied", (void *)flow);
2271                 }
2272                 if (!flow->mark)
2273                         continue;
2274                 for (i = 0; i != flow->queues_n; ++i)
2275                         (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2276         }
2277         return 0;
2278 }
2279
2280 /**
2281  * Verify the flow list is empty
2282  *
2283  * @param dev
2284  *  Pointer to Ethernet device.
2285  *
2286  * @return the number of flows not released.
2287  */
2288 int
2289 mlx5_flow_verify(struct rte_eth_dev *dev)
2290 {
2291         struct priv *priv = dev->data->dev_private;
2292         struct rte_flow *flow;
2293         int ret = 0;
2294
2295         TAILQ_FOREACH(flow, &priv->flows, next) {
2296                 DEBUG("%p: flow %p still referenced", (void *)dev,
2297                       (void *)flow);
2298                 ++ret;
2299         }
2300         return ret;
2301 }
2302
2303 /**
2304  * Enable a control flow configured from the control plane.
2305  *
2306  * @param dev
2307  *   Pointer to Ethernet device.
2308  * @param eth_spec
2309  *   An Ethernet flow spec to apply.
2310  * @param eth_mask
2311  *   An Ethernet flow mask to apply.
2312  * @param vlan_spec
2313  *   A VLAN flow spec to apply.
2314  * @param vlan_mask
2315  *   A VLAN flow mask to apply.
2316  *
2317  * @return
2318  *   0 on success.
2319  */
2320 int
2321 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2322                     struct rte_flow_item_eth *eth_spec,
2323                     struct rte_flow_item_eth *eth_mask,
2324                     struct rte_flow_item_vlan *vlan_spec,
2325                     struct rte_flow_item_vlan *vlan_mask)
2326 {
2327         struct priv *priv = dev->data->dev_private;
2328         const struct rte_flow_attr attr = {
2329                 .ingress = 1,
2330                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2331         };
2332         struct rte_flow_item items[] = {
2333                 {
2334                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2335                         .spec = eth_spec,
2336                         .last = NULL,
2337                         .mask = eth_mask,
2338                 },
2339                 {
2340                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2341                                 RTE_FLOW_ITEM_TYPE_END,
2342                         .spec = vlan_spec,
2343                         .last = NULL,
2344                         .mask = vlan_mask,
2345                 },
2346                 {
2347                         .type = RTE_FLOW_ITEM_TYPE_END,
2348                 },
2349         };
2350         struct rte_flow_action actions[] = {
2351                 {
2352                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2353                 },
2354                 {
2355                         .type = RTE_FLOW_ACTION_TYPE_END,
2356                 },
2357         };
2358         struct rte_flow *flow;
2359         struct rte_flow_error error;
2360         unsigned int i;
2361         union {
2362                 struct rte_flow_action_rss rss;
2363                 struct {
2364                         const struct rte_eth_rss_conf *rss_conf;
2365                         uint16_t num;
2366                         uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2367                 } local;
2368         } action_rss;
2369
2370         if (!priv->reta_idx_n)
2371                 return EINVAL;
2372         for (i = 0; i != priv->reta_idx_n; ++i)
2373                 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2374         action_rss.local.rss_conf = &priv->rss_conf;
2375         action_rss.local.num = priv->reta_idx_n;
2376         actions[0].conf = (const void *)&action_rss.rss;
2377         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2378                                      actions, &error);
2379         if (!flow)
2380                 return rte_errno;
2381         return 0;
2382 }
2383
2384 /**
2385  * Enable a flow control configured from the control plane.
2386  *
2387  * @param dev
2388  *   Pointer to Ethernet device.
2389  * @param eth_spec
2390  *   An Ethernet flow spec to apply.
2391  * @param eth_mask
2392  *   An Ethernet flow mask to apply.
2393  *
2394  * @return
2395  *   0 on success.
2396  */
2397 int
2398 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2399                struct rte_flow_item_eth *eth_spec,
2400                struct rte_flow_item_eth *eth_mask)
2401 {
2402         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2403 }
2404
2405 /**
2406  * Destroy a flow.
2407  *
2408  * @see rte_flow_destroy()
2409  * @see rte_flow_ops
2410  */
2411 int
2412 mlx5_flow_destroy(struct rte_eth_dev *dev,
2413                   struct rte_flow *flow,
2414                   struct rte_flow_error *error __rte_unused)
2415 {
2416         struct priv *priv = dev->data->dev_private;
2417
2418         mlx5_flow_list_destroy(dev, &priv->flows, flow);
2419         return 0;
2420 }
2421
2422 /**
2423  * Destroy all flows.
2424  *
2425  * @see rte_flow_flush()
2426  * @see rte_flow_ops
2427  */
2428 int
2429 mlx5_flow_flush(struct rte_eth_dev *dev,
2430                 struct rte_flow_error *error __rte_unused)
2431 {
2432         struct priv *priv = dev->data->dev_private;
2433
2434         mlx5_flow_list_flush(dev, &priv->flows);
2435         return 0;
2436 }
2437
2438 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2439 /**
2440  * Query flow counter.
2441  *
2442  * @param cs
2443  *   the counter set.
2444  * @param counter_value
2445  *   returned data from the counter.
2446  *
2447  * @return
2448  *   0 on success, a errno value otherwise and rte_errno is set.
2449  */
2450 static int
2451 mlx5_flow_query_count(struct ibv_counter_set *cs,
2452                       struct mlx5_flow_counter_stats *counter_stats,
2453                       struct rte_flow_query_count *query_count,
2454                       struct rte_flow_error *error)
2455 {
2456         uint64_t counters[2];
2457         struct ibv_query_counter_set_attr query_cs_attr = {
2458                 .cs = cs,
2459                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2460         };
2461         struct ibv_counter_set_data query_out = {
2462                 .out = counters,
2463                 .outlen = 2 * sizeof(uint64_t),
2464         };
2465         int res = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2466
2467         if (res) {
2468                 rte_flow_error_set(error, -res,
2469                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2470                                    NULL,
2471                                    "cannot read counter");
2472                 return -res;
2473         }
2474         query_count->hits_set = 1;
2475         query_count->bytes_set = 1;
2476         query_count->hits = counters[0] - counter_stats->hits;
2477         query_count->bytes = counters[1] - counter_stats->bytes;
2478         if (query_count->reset) {
2479                 counter_stats->hits = counters[0];
2480                 counter_stats->bytes = counters[1];
2481         }
2482         return 0;
2483 }
2484
2485 /**
2486  * Query a flows.
2487  *
2488  * @see rte_flow_query()
2489  * @see rte_flow_ops
2490  */
2491 int
2492 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2493                 struct rte_flow *flow,
2494                 enum rte_flow_action_type action __rte_unused,
2495                 void *data,
2496                 struct rte_flow_error *error)
2497 {
2498         int res = EINVAL;
2499
2500         if (flow->cs) {
2501                 res = mlx5_flow_query_count(flow->cs,
2502                                         &flow->counter_stats,
2503                                         (struct rte_flow_query_count *)data,
2504                                         error);
2505         } else {
2506                 rte_flow_error_set(error, res,
2507                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2508                                    NULL,
2509                                    "no counter found for flow");
2510         }
2511         return -res;
2512 }
2513 #endif
2514
2515 /**
2516  * Isolated mode.
2517  *
2518  * @see rte_flow_isolate()
2519  * @see rte_flow_ops
2520  */
2521 int
2522 mlx5_flow_isolate(struct rte_eth_dev *dev,
2523                   int enable,
2524                   struct rte_flow_error *error)
2525 {
2526         struct priv *priv = dev->data->dev_private;
2527
2528         if (dev->data->dev_started) {
2529                 rte_flow_error_set(error, EBUSY,
2530                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2531                                    NULL,
2532                                    "port must be stopped first");
2533                 return -rte_errno;
2534         }
2535         priv->isolated = !!enable;
2536         if (enable)
2537                 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2538         else
2539                 priv->dev->dev_ops = &mlx5_dev_ops;
2540         return 0;
2541 }
2542
2543 /**
2544  * Convert a flow director filter to a generic flow.
2545  *
2546  * @param dev
2547  *   Pointer to Ethernet device.
2548  * @param fdir_filter
2549  *   Flow director filter to add.
2550  * @param attributes
2551  *   Generic flow parameters structure.
2552  *
2553  * @return
2554  *  0 on success, errno value on error.
2555  */
2556 static int
2557 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2558                          const struct rte_eth_fdir_filter *fdir_filter,
2559                          struct mlx5_fdir *attributes)
2560 {
2561         struct priv *priv = dev->data->dev_private;
2562         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2563
2564         /* Validate queue number. */
2565         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2566                 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2567                 return EINVAL;
2568         }
2569         attributes->attr.ingress = 1;
2570         attributes->items[0] = (struct rte_flow_item) {
2571                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2572                 .spec = &attributes->l2,
2573                 .mask = &attributes->l2_mask,
2574         };
2575         switch (fdir_filter->action.behavior) {
2576         case RTE_ETH_FDIR_ACCEPT:
2577                 attributes->actions[0] = (struct rte_flow_action){
2578                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2579                         .conf = &attributes->queue,
2580                 };
2581                 break;
2582         case RTE_ETH_FDIR_REJECT:
2583                 attributes->actions[0] = (struct rte_flow_action){
2584                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2585                 };
2586                 break;
2587         default:
2588                 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2589                 return ENOTSUP;
2590         }
2591         attributes->queue.index = fdir_filter->action.rx_queue;
2592         switch (fdir_filter->input.flow_type) {
2593         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2594                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2595                         .src_addr = input->flow.udp4_flow.ip.src_ip,
2596                         .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2597                         .time_to_live = input->flow.udp4_flow.ip.ttl,
2598                         .type_of_service = input->flow.udp4_flow.ip.tos,
2599                         .next_proto_id = input->flow.udp4_flow.ip.proto,
2600                 };
2601                 attributes->l4.udp.hdr = (struct udp_hdr){
2602                         .src_port = input->flow.udp4_flow.src_port,
2603                         .dst_port = input->flow.udp4_flow.dst_port,
2604                 };
2605                 attributes->items[1] = (struct rte_flow_item){
2606                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2607                         .spec = &attributes->l3,
2608                         .mask = &attributes->l3,
2609                 };
2610                 attributes->items[2] = (struct rte_flow_item){
2611                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2612                         .spec = &attributes->l4,
2613                         .mask = &attributes->l4,
2614                 };
2615                 break;
2616         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2617                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2618                         .src_addr = input->flow.tcp4_flow.ip.src_ip,
2619                         .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2620                         .time_to_live = input->flow.tcp4_flow.ip.ttl,
2621                         .type_of_service = input->flow.tcp4_flow.ip.tos,
2622                         .next_proto_id = input->flow.tcp4_flow.ip.proto,
2623                 };
2624                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2625                         .src_port = input->flow.tcp4_flow.src_port,
2626                         .dst_port = input->flow.tcp4_flow.dst_port,
2627                 };
2628                 attributes->items[1] = (struct rte_flow_item){
2629                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2630                         .spec = &attributes->l3,
2631                         .mask = &attributes->l3,
2632                 };
2633                 attributes->items[2] = (struct rte_flow_item){
2634                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2635                         .spec = &attributes->l4,
2636                         .mask = &attributes->l4,
2637                 };
2638                 break;
2639         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2640                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2641                         .src_addr = input->flow.ip4_flow.src_ip,
2642                         .dst_addr = input->flow.ip4_flow.dst_ip,
2643                         .time_to_live = input->flow.ip4_flow.ttl,
2644                         .type_of_service = input->flow.ip4_flow.tos,
2645                         .next_proto_id = input->flow.ip4_flow.proto,
2646                 };
2647                 attributes->items[1] = (struct rte_flow_item){
2648                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2649                         .spec = &attributes->l3,
2650                         .mask = &attributes->l3,
2651                 };
2652                 break;
2653         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2654                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2655                         .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2656                         .proto = input->flow.udp6_flow.ip.proto,
2657                 };
2658                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2659                        input->flow.udp6_flow.ip.src_ip,
2660                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2661                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2662                        input->flow.udp6_flow.ip.dst_ip,
2663                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2664                 attributes->l4.udp.hdr = (struct udp_hdr){
2665                         .src_port = input->flow.udp6_flow.src_port,
2666                         .dst_port = input->flow.udp6_flow.dst_port,
2667                 };
2668                 attributes->items[1] = (struct rte_flow_item){
2669                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2670                         .spec = &attributes->l3,
2671                         .mask = &attributes->l3,
2672                 };
2673                 attributes->items[2] = (struct rte_flow_item){
2674                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2675                         .spec = &attributes->l4,
2676                         .mask = &attributes->l4,
2677                 };
2678                 break;
2679         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2680                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2681                         .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2682                         .proto = input->flow.tcp6_flow.ip.proto,
2683                 };
2684                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2685                        input->flow.tcp6_flow.ip.src_ip,
2686                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2687                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2688                        input->flow.tcp6_flow.ip.dst_ip,
2689                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2690                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2691                         .src_port = input->flow.tcp6_flow.src_port,
2692                         .dst_port = input->flow.tcp6_flow.dst_port,
2693                 };
2694                 attributes->items[1] = (struct rte_flow_item){
2695                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2696                         .spec = &attributes->l3,
2697                         .mask = &attributes->l3,
2698                 };
2699                 attributes->items[2] = (struct rte_flow_item){
2700                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2701                         .spec = &attributes->l4,
2702                         .mask = &attributes->l4,
2703                 };
2704                 break;
2705         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2706                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2707                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2708                         .proto = input->flow.ipv6_flow.proto,
2709                 };
2710                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2711                        input->flow.ipv6_flow.src_ip,
2712                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2713                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2714                        input->flow.ipv6_flow.dst_ip,
2715                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2716                 attributes->items[1] = (struct rte_flow_item){
2717                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2718                         .spec = &attributes->l3,
2719                         .mask = &attributes->l3,
2720                 };
2721                 break;
2722         default:
2723                 ERROR("invalid flow type%d",
2724                       fdir_filter->input.flow_type);
2725                 return ENOTSUP;
2726         }
2727         return 0;
2728 }
2729
2730 /**
2731  * Add new flow director filter and store it in list.
2732  *
2733  * @param dev
2734  *   Pointer to Ethernet device.
2735  * @param fdir_filter
2736  *   Flow director filter to add.
2737  *
2738  * @return
2739  *   0 on success, errno value on failure.
2740  */
2741 static int
2742 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2743                      const struct rte_eth_fdir_filter *fdir_filter)
2744 {
2745         struct priv *priv = dev->data->dev_private;
2746         struct mlx5_fdir attributes = {
2747                 .attr.group = 0,
2748                 .l2_mask = {
2749                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2750                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2751                         .type = 0,
2752                 },
2753         };
2754         struct mlx5_flow_parse parser = {
2755                 .layer = HASH_RXQ_ETH,
2756         };
2757         struct rte_flow_error error;
2758         struct rte_flow *flow;
2759         int ret;
2760
2761         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2762         if (ret)
2763                 return -ret;
2764         ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2765                                 attributes.actions, &error, &parser);
2766         if (ret)
2767                 return -ret;
2768         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2769                                      attributes.items, attributes.actions,
2770                                      &error);
2771         if (flow) {
2772                 DEBUG("FDIR created %p", (void *)flow);
2773                 return 0;
2774         }
2775         return ENOTSUP;
2776 }
2777
2778 /**
2779  * Delete specific filter.
2780  *
2781  * @param dev
2782  *   Pointer to Ethernet device.
2783  * @param fdir_filter
2784  *   Filter to be deleted.
2785  *
2786  * @return
2787  *   0 on success, errno value on failure.
2788  */
2789 static int
2790 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2791                         const struct rte_eth_fdir_filter *fdir_filter)
2792 {
2793         struct priv *priv = dev->data->dev_private;
2794         struct mlx5_fdir attributes = {
2795                 .attr.group = 0,
2796         };
2797         struct mlx5_flow_parse parser = {
2798                 .create = 1,
2799                 .layer = HASH_RXQ_ETH,
2800         };
2801         struct rte_flow_error error;
2802         struct rte_flow *flow;
2803         unsigned int i;
2804         int ret;
2805
2806         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2807         if (ret)
2808                 return -ret;
2809         ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2810                                 attributes.actions, &error, &parser);
2811         if (ret)
2812                 goto exit;
2813         /*
2814          * Special case for drop action which is only set in the
2815          * specifications when the flow is created.  In this situation the
2816          * drop specification is missing.
2817          */
2818         if (parser.drop) {
2819                 struct ibv_flow_spec_action_drop *drop;
2820
2821                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2822                                 parser.queue[HASH_RXQ_ETH].offset);
2823                 *drop = (struct ibv_flow_spec_action_drop){
2824                         .type = IBV_FLOW_SPEC_ACTION_DROP,
2825                         .size = sizeof(struct ibv_flow_spec_action_drop),
2826                 };
2827                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2828         }
2829         TAILQ_FOREACH(flow, &priv->flows, next) {
2830                 struct ibv_flow_attr *attr;
2831                 struct ibv_spec_header *attr_h;
2832                 void *spec;
2833                 struct ibv_flow_attr *flow_attr;
2834                 struct ibv_spec_header *flow_h;
2835                 void *flow_spec;
2836                 unsigned int specs_n;
2837
2838                 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2839                 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2840                 /* Compare first the attributes. */
2841                 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2842                         continue;
2843                 if (attr->num_of_specs == 0)
2844                         continue;
2845                 spec = (void *)((uintptr_t)attr +
2846                                 sizeof(struct ibv_flow_attr));
2847                 flow_spec = (void *)((uintptr_t)flow_attr +
2848                                      sizeof(struct ibv_flow_attr));
2849                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2850                 for (i = 0; i != specs_n; ++i) {
2851                         attr_h = spec;
2852                         flow_h = flow_spec;
2853                         if (memcmp(spec, flow_spec,
2854                                    RTE_MIN(attr_h->size, flow_h->size)))
2855                                 goto wrong_flow;
2856                         spec = (void *)((uintptr_t)spec + attr_h->size);
2857                         flow_spec = (void *)((uintptr_t)flow_spec +
2858                                              flow_h->size);
2859                 }
2860                 /* At this point, the flow match. */
2861                 break;
2862 wrong_flow:
2863                 /* The flow does not match. */
2864                 continue;
2865         }
2866         if (flow)
2867                 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2868 exit:
2869         for (i = 0; i != hash_rxq_init_n; ++i) {
2870                 if (parser.queue[i].ibv_attr)
2871                         rte_free(parser.queue[i].ibv_attr);
2872         }
2873         return -ret;
2874 }
2875
2876 /**
2877  * Update queue for specific filter.
2878  *
2879  * @param dev
2880  *   Pointer to Ethernet device.
2881  * @param fdir_filter
2882  *   Filter to be updated.
2883  *
2884  * @return
2885  *   0 on success, errno value on failure.
2886  */
2887 static int
2888 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2889                         const struct rte_eth_fdir_filter *fdir_filter)
2890 {
2891         int ret;
2892
2893         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2894         if (ret)
2895                 return ret;
2896         ret = mlx5_fdir_filter_add(dev, fdir_filter);
2897         return ret;
2898 }
2899
2900 /**
2901  * Flush all filters.
2902  *
2903  * @param dev
2904  *   Pointer to Ethernet device.
2905  */
2906 static void
2907 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
2908 {
2909         struct priv *priv = dev->data->dev_private;
2910
2911         mlx5_flow_list_flush(dev, &priv->flows);
2912 }
2913
2914 /**
2915  * Get flow director information.
2916  *
2917  * @param dev
2918  *   Pointer to Ethernet device.
2919  * @param[out] fdir_info
2920  *   Resulting flow director information.
2921  */
2922 static void
2923 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
2924 {
2925         struct priv *priv = dev->data->dev_private;
2926         struct rte_eth_fdir_masks *mask =
2927                 &priv->dev->data->dev_conf.fdir_conf.mask;
2928
2929         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2930         fdir_info->guarant_spc = 0;
2931         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2932         fdir_info->max_flexpayload = 0;
2933         fdir_info->flow_types_mask[0] = 0;
2934         fdir_info->flex_payload_unit = 0;
2935         fdir_info->max_flex_payload_segment_num = 0;
2936         fdir_info->flex_payload_limit = 0;
2937         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2938 }
2939
2940 /**
2941  * Deal with flow director operations.
2942  *
2943  * @param dev
2944  *   Pointer to Ethernet device.
2945  * @param filter_op
2946  *   Operation to perform.
2947  * @param arg
2948  *   Pointer to operation-specific structure.
2949  *
2950  * @return
2951  *   0 on success, errno value on failure.
2952  */
2953 static int
2954 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
2955                     void *arg)
2956 {
2957         struct priv *priv = dev->data->dev_private;
2958         enum rte_fdir_mode fdir_mode =
2959                 priv->dev->data->dev_conf.fdir_conf.mode;
2960         int ret = 0;
2961
2962         if (filter_op == RTE_ETH_FILTER_NOP)
2963                 return 0;
2964         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2965             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2966                 ERROR("%p: flow director mode %d not supported",
2967                       (void *)dev, fdir_mode);
2968                 return EINVAL;
2969         }
2970         switch (filter_op) {
2971         case RTE_ETH_FILTER_ADD:
2972                 ret = mlx5_fdir_filter_add(dev, arg);
2973                 break;
2974         case RTE_ETH_FILTER_UPDATE:
2975                 ret = mlx5_fdir_filter_update(dev, arg);
2976                 break;
2977         case RTE_ETH_FILTER_DELETE:
2978                 ret = mlx5_fdir_filter_delete(dev, arg);
2979                 break;
2980         case RTE_ETH_FILTER_FLUSH:
2981                 mlx5_fdir_filter_flush(dev);
2982                 break;
2983         case RTE_ETH_FILTER_INFO:
2984                 mlx5_fdir_info_get(dev, arg);
2985                 break;
2986         default:
2987                 DEBUG("%p: unknown operation %u", (void *)dev,
2988                       filter_op);
2989                 ret = EINVAL;
2990                 break;
2991         }
2992         return ret;
2993 }
2994
2995 /**
2996  * Manage filter operations.
2997  *
2998  * @param dev
2999  *   Pointer to Ethernet device structure.
3000  * @param filter_type
3001  *   Filter type.
3002  * @param filter_op
3003  *   Operation to perform.
3004  * @param arg
3005  *   Pointer to operation-specific structure.
3006  *
3007  * @return
3008  *   0 on success, negative errno value on failure.
3009  */
3010 int
3011 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3012                      enum rte_filter_type filter_type,
3013                      enum rte_filter_op filter_op,
3014                      void *arg)
3015 {
3016         int ret = EINVAL;
3017
3018         switch (filter_type) {
3019         case RTE_ETH_FILTER_GENERIC:
3020                 if (filter_op != RTE_ETH_FILTER_GET)
3021                         return -EINVAL;
3022                 *(const void **)arg = &mlx5_flow_ops;
3023                 return 0;
3024         case RTE_ETH_FILTER_FDIR:
3025                 ret = mlx5_fdir_ctrl_func(dev, filter_op, arg);
3026                 break;
3027         default:
3028                 ERROR("%p: filter type (%d) not supported",
3029                       (void *)dev, filter_type);
3030                 break;
3031         }
3032         return -ret;
3033 }