net/mlx5: support 16 hardware priorities
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
36
37 /* Internet Protocol versions. */
38 #define MLX5_IPV4 4
39 #define MLX5_IPV6 6
40
41 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
42 struct ibv_flow_spec_counter_action {
43         int dummy;
44 };
45 #endif
46
47 /* Dev ops structure defined in mlx5.c */
48 extern const struct eth_dev_ops mlx5_dev_ops;
49 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
50
51 /** Structure give to the conversion functions. */
52 struct mlx5_flow_data {
53         struct mlx5_flow_parse *parser; /** Parser context. */
54         struct rte_flow_error *error; /** Error context. */
55 };
56
57 static int
58 mlx5_flow_create_eth(const struct rte_flow_item *item,
59                      const void *default_mask,
60                      struct mlx5_flow_data *data);
61
62 static int
63 mlx5_flow_create_vlan(const struct rte_flow_item *item,
64                       const void *default_mask,
65                       struct mlx5_flow_data *data);
66
67 static int
68 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
69                       const void *default_mask,
70                       struct mlx5_flow_data *data);
71
72 static int
73 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
74                       const void *default_mask,
75                       struct mlx5_flow_data *data);
76
77 static int
78 mlx5_flow_create_udp(const struct rte_flow_item *item,
79                      const void *default_mask,
80                      struct mlx5_flow_data *data);
81
82 static int
83 mlx5_flow_create_tcp(const struct rte_flow_item *item,
84                      const void *default_mask,
85                      struct mlx5_flow_data *data);
86
87 static int
88 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
89                        const void *default_mask,
90                        struct mlx5_flow_data *data);
91
92 struct mlx5_flow_parse;
93
94 static void
95 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
96                       unsigned int size);
97
98 static int
99 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
100
101 static int
102 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
103
104 /* Hash RX queue types. */
105 enum hash_rxq_type {
106         HASH_RXQ_TCPV4,
107         HASH_RXQ_UDPV4,
108         HASH_RXQ_IPV4,
109         HASH_RXQ_TCPV6,
110         HASH_RXQ_UDPV6,
111         HASH_RXQ_IPV6,
112         HASH_RXQ_ETH,
113 };
114
115 /* Initialization data for hash RX queue. */
116 struct hash_rxq_init {
117         uint64_t hash_fields; /* Fields that participate in the hash. */
118         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
119         unsigned int flow_priority; /* Flow priority to use. */
120         unsigned int ip_version; /* Internet protocol. */
121 };
122
123 /* Initialization data for hash RX queues. */
124 const struct hash_rxq_init hash_rxq_init[] = {
125         [HASH_RXQ_TCPV4] = {
126                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
127                                 IBV_RX_HASH_DST_IPV4 |
128                                 IBV_RX_HASH_SRC_PORT_TCP |
129                                 IBV_RX_HASH_DST_PORT_TCP),
130                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
131                 .flow_priority = 0,
132                 .ip_version = MLX5_IPV4,
133         },
134         [HASH_RXQ_UDPV4] = {
135                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
136                                 IBV_RX_HASH_DST_IPV4 |
137                                 IBV_RX_HASH_SRC_PORT_UDP |
138                                 IBV_RX_HASH_DST_PORT_UDP),
139                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
140                 .flow_priority = 0,
141                 .ip_version = MLX5_IPV4,
142         },
143         [HASH_RXQ_IPV4] = {
144                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
145                                 IBV_RX_HASH_DST_IPV4),
146                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
147                                 ETH_RSS_FRAG_IPV4),
148                 .flow_priority = 1,
149                 .ip_version = MLX5_IPV4,
150         },
151         [HASH_RXQ_TCPV6] = {
152                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
153                                 IBV_RX_HASH_DST_IPV6 |
154                                 IBV_RX_HASH_SRC_PORT_TCP |
155                                 IBV_RX_HASH_DST_PORT_TCP),
156                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
157                 .flow_priority = 0,
158                 .ip_version = MLX5_IPV6,
159         },
160         [HASH_RXQ_UDPV6] = {
161                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
162                                 IBV_RX_HASH_DST_IPV6 |
163                                 IBV_RX_HASH_SRC_PORT_UDP |
164                                 IBV_RX_HASH_DST_PORT_UDP),
165                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
166                 .flow_priority = 0,
167                 .ip_version = MLX5_IPV6,
168         },
169         [HASH_RXQ_IPV6] = {
170                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
171                                 IBV_RX_HASH_DST_IPV6),
172                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
173                                 ETH_RSS_FRAG_IPV6),
174                 .flow_priority = 1,
175                 .ip_version = MLX5_IPV6,
176         },
177         [HASH_RXQ_ETH] = {
178                 .hash_fields = 0,
179                 .dpdk_rss_hf = 0,
180                 .flow_priority = 2,
181         },
182 };
183
184 /* Number of entries in hash_rxq_init[]. */
185 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
186
187 /** Structure for holding counter stats. */
188 struct mlx5_flow_counter_stats {
189         uint64_t hits; /**< Number of packets matched by the rule. */
190         uint64_t bytes; /**< Number of bytes matched by the rule. */
191 };
192
193 /** Structure for Drop queue. */
194 struct mlx5_hrxq_drop {
195         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
196         struct ibv_qp *qp; /**< Verbs queue pair. */
197         struct ibv_wq *wq; /**< Verbs work queue. */
198         struct ibv_cq *cq; /**< Verbs completion queue. */
199 };
200
201 /* Flows structures. */
202 struct mlx5_flow {
203         uint64_t hash_fields; /**< Fields that participate in the hash. */
204         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
205         struct ibv_flow *ibv_flow; /**< Verbs flow. */
206         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
207 };
208
209 /* Drop flows structures. */
210 struct mlx5_flow_drop {
211         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
212         struct ibv_flow *ibv_flow; /**< Verbs flow. */
213 };
214
215 struct rte_flow {
216         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
217         uint32_t mark:1; /**< Set if the flow is marked. */
218         uint32_t drop:1; /**< Drop queue. */
219         struct rte_flow_action_rss rss_conf; /**< RSS configuration */
220         uint16_t (*queues)[]; /**< Queues indexes to use. */
221         uint8_t rss_key[40]; /**< copy of the RSS key. */
222         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
223         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
224         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
225         /**< Flow with Rx queue. */
226 };
227
228 /** Static initializer for items. */
229 #define ITEMS(...) \
230         (const enum rte_flow_item_type []){ \
231                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
232         }
233
234 /** Structure to generate a simple graph of layers supported by the NIC. */
235 struct mlx5_flow_items {
236         /** List of possible actions for these items. */
237         const enum rte_flow_action_type *const actions;
238         /** Bit-masks corresponding to the possibilities for the item. */
239         const void *mask;
240         /**
241          * Default bit-masks to use when item->mask is not provided. When
242          * \default_mask is also NULL, the full supported bit-mask (\mask) is
243          * used instead.
244          */
245         const void *default_mask;
246         /** Bit-masks size in bytes. */
247         const unsigned int mask_sz;
248         /**
249          * Conversion function from rte_flow to NIC specific flow.
250          *
251          * @param item
252          *   rte_flow item to convert.
253          * @param default_mask
254          *   Default bit-masks to use when item->mask is not provided.
255          * @param data
256          *   Internal structure to store the conversion.
257          *
258          * @return
259          *   0 on success, a negative errno value otherwise and rte_errno is
260          *   set.
261          */
262         int (*convert)(const struct rte_flow_item *item,
263                        const void *default_mask,
264                        struct mlx5_flow_data *data);
265         /** Size in bytes of the destination structure. */
266         const unsigned int dst_sz;
267         /** List of possible following items.  */
268         const enum rte_flow_item_type *const items;
269 };
270
271 /** Valid action for this PMD. */
272 static const enum rte_flow_action_type valid_actions[] = {
273         RTE_FLOW_ACTION_TYPE_DROP,
274         RTE_FLOW_ACTION_TYPE_QUEUE,
275         RTE_FLOW_ACTION_TYPE_MARK,
276         RTE_FLOW_ACTION_TYPE_FLAG,
277 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
278         RTE_FLOW_ACTION_TYPE_COUNT,
279 #endif
280         RTE_FLOW_ACTION_TYPE_END,
281 };
282
283 /** Graph of supported items and associated actions. */
284 static const struct mlx5_flow_items mlx5_flow_items[] = {
285         [RTE_FLOW_ITEM_TYPE_END] = {
286                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
287                                RTE_FLOW_ITEM_TYPE_VXLAN),
288         },
289         [RTE_FLOW_ITEM_TYPE_ETH] = {
290                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
291                                RTE_FLOW_ITEM_TYPE_IPV4,
292                                RTE_FLOW_ITEM_TYPE_IPV6),
293                 .actions = valid_actions,
294                 .mask = &(const struct rte_flow_item_eth){
295                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
296                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
297                         .type = -1,
298                 },
299                 .default_mask = &rte_flow_item_eth_mask,
300                 .mask_sz = sizeof(struct rte_flow_item_eth),
301                 .convert = mlx5_flow_create_eth,
302                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
303         },
304         [RTE_FLOW_ITEM_TYPE_VLAN] = {
305                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
306                                RTE_FLOW_ITEM_TYPE_IPV6),
307                 .actions = valid_actions,
308                 .mask = &(const struct rte_flow_item_vlan){
309                         .tci = -1,
310                         .inner_type = -1,
311                 },
312                 .default_mask = &rte_flow_item_vlan_mask,
313                 .mask_sz = sizeof(struct rte_flow_item_vlan),
314                 .convert = mlx5_flow_create_vlan,
315                 .dst_sz = 0,
316         },
317         [RTE_FLOW_ITEM_TYPE_IPV4] = {
318                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
319                                RTE_FLOW_ITEM_TYPE_TCP),
320                 .actions = valid_actions,
321                 .mask = &(const struct rte_flow_item_ipv4){
322                         .hdr = {
323                                 .src_addr = -1,
324                                 .dst_addr = -1,
325                                 .type_of_service = -1,
326                                 .next_proto_id = -1,
327                         },
328                 },
329                 .default_mask = &rte_flow_item_ipv4_mask,
330                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
331                 .convert = mlx5_flow_create_ipv4,
332                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
333         },
334         [RTE_FLOW_ITEM_TYPE_IPV6] = {
335                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
336                                RTE_FLOW_ITEM_TYPE_TCP),
337                 .actions = valid_actions,
338                 .mask = &(const struct rte_flow_item_ipv6){
339                         .hdr = {
340                                 .src_addr = {
341                                         0xff, 0xff, 0xff, 0xff,
342                                         0xff, 0xff, 0xff, 0xff,
343                                         0xff, 0xff, 0xff, 0xff,
344                                         0xff, 0xff, 0xff, 0xff,
345                                 },
346                                 .dst_addr = {
347                                         0xff, 0xff, 0xff, 0xff,
348                                         0xff, 0xff, 0xff, 0xff,
349                                         0xff, 0xff, 0xff, 0xff,
350                                         0xff, 0xff, 0xff, 0xff,
351                                 },
352                                 .vtc_flow = -1,
353                                 .proto = -1,
354                                 .hop_limits = -1,
355                         },
356                 },
357                 .default_mask = &rte_flow_item_ipv6_mask,
358                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
359                 .convert = mlx5_flow_create_ipv6,
360                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
361         },
362         [RTE_FLOW_ITEM_TYPE_UDP] = {
363                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
364                 .actions = valid_actions,
365                 .mask = &(const struct rte_flow_item_udp){
366                         .hdr = {
367                                 .src_port = -1,
368                                 .dst_port = -1,
369                         },
370                 },
371                 .default_mask = &rte_flow_item_udp_mask,
372                 .mask_sz = sizeof(struct rte_flow_item_udp),
373                 .convert = mlx5_flow_create_udp,
374                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
375         },
376         [RTE_FLOW_ITEM_TYPE_TCP] = {
377                 .actions = valid_actions,
378                 .mask = &(const struct rte_flow_item_tcp){
379                         .hdr = {
380                                 .src_port = -1,
381                                 .dst_port = -1,
382                         },
383                 },
384                 .default_mask = &rte_flow_item_tcp_mask,
385                 .mask_sz = sizeof(struct rte_flow_item_tcp),
386                 .convert = mlx5_flow_create_tcp,
387                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
388         },
389         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
390                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
391                 .actions = valid_actions,
392                 .mask = &(const struct rte_flow_item_vxlan){
393                         .vni = "\xff\xff\xff",
394                 },
395                 .default_mask = &rte_flow_item_vxlan_mask,
396                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
397                 .convert = mlx5_flow_create_vxlan,
398                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
399         },
400 };
401
402 /** Structure to pass to the conversion function. */
403 struct mlx5_flow_parse {
404         uint32_t inner; /**< Set once VXLAN is encountered. */
405         uint32_t create:1;
406         /**< Whether resources should remain after a validate. */
407         uint32_t drop:1; /**< Target is a drop queue. */
408         uint32_t mark:1; /**< Mark is present in the flow. */
409         uint32_t count:1; /**< Count is present in the flow. */
410         uint32_t mark_id; /**< Mark identifier. */
411         struct rte_flow_action_rss rss_conf; /**< RSS configuration */
412         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
413         uint8_t rss_key[40]; /**< copy of the RSS key. */
414         enum hash_rxq_type layer; /**< Last pattern layer detected. */
415         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
416         struct {
417                 struct ibv_flow_attr *ibv_attr;
418                 /**< Pointer to Verbs attributes. */
419                 unsigned int offset;
420                 /**< Current position or total size of the attribute. */
421         } queue[RTE_DIM(hash_rxq_init)];
422 };
423
424 static const struct rte_flow_ops mlx5_flow_ops = {
425         .validate = mlx5_flow_validate,
426         .create = mlx5_flow_create,
427         .destroy = mlx5_flow_destroy,
428         .flush = mlx5_flow_flush,
429 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
430         .query = mlx5_flow_query,
431 #else
432         .query = NULL,
433 #endif
434         .isolate = mlx5_flow_isolate,
435 };
436
437 /* Convert FDIR request to Generic flow. */
438 struct mlx5_fdir {
439         struct rte_flow_attr attr;
440         struct rte_flow_action actions[2];
441         struct rte_flow_item items[4];
442         struct rte_flow_item_eth l2;
443         struct rte_flow_item_eth l2_mask;
444         union {
445                 struct rte_flow_item_ipv4 ipv4;
446                 struct rte_flow_item_ipv6 ipv6;
447         } l3;
448         union {
449                 struct rte_flow_item_ipv4 ipv4;
450                 struct rte_flow_item_ipv6 ipv6;
451         } l3_mask;
452         union {
453                 struct rte_flow_item_udp udp;
454                 struct rte_flow_item_tcp tcp;
455         } l4;
456         union {
457                 struct rte_flow_item_udp udp;
458                 struct rte_flow_item_tcp tcp;
459         } l4_mask;
460         struct rte_flow_action_queue queue;
461 };
462
463 /* Verbs specification header. */
464 struct ibv_spec_header {
465         enum ibv_flow_spec_type type;
466         uint16_t size;
467 };
468
469 /**
470  * Check support for a given item.
471  *
472  * @param item[in]
473  *   Item specification.
474  * @param mask[in]
475  *   Bit-masks covering supported fields to compare with spec, last and mask in
476  *   \item.
477  * @param size
478  *   Bit-Mask size in bytes.
479  *
480  * @return
481  *   0 on success, a negative errno value otherwise and rte_errno is set.
482  */
483 static int
484 mlx5_flow_item_validate(const struct rte_flow_item *item,
485                         const uint8_t *mask, unsigned int size)
486 {
487         if (!item->spec && (item->mask || item->last)) {
488                 rte_errno = EINVAL;
489                 return -rte_errno;
490         }
491         if (item->spec && !item->mask) {
492                 unsigned int i;
493                 const uint8_t *spec = item->spec;
494
495                 for (i = 0; i < size; ++i)
496                         if ((spec[i] | mask[i]) != mask[i]) {
497                                 rte_errno = EINVAL;
498                                 return -rte_errno;
499                         }
500         }
501         if (item->last && !item->mask) {
502                 unsigned int i;
503                 const uint8_t *spec = item->last;
504
505                 for (i = 0; i < size; ++i)
506                         if ((spec[i] | mask[i]) != mask[i]) {
507                                 rte_errno = EINVAL;
508                                 return -rte_errno;
509                         }
510         }
511         if (item->mask) {
512                 unsigned int i;
513                 const uint8_t *spec = item->spec;
514
515                 for (i = 0; i < size; ++i)
516                         if ((spec[i] | mask[i]) != mask[i]) {
517                                 rte_errno = EINVAL;
518                                 return -rte_errno;
519                         }
520         }
521         if (item->spec && item->last) {
522                 uint8_t spec[size];
523                 uint8_t last[size];
524                 const uint8_t *apply = mask;
525                 unsigned int i;
526                 int ret;
527
528                 if (item->mask)
529                         apply = item->mask;
530                 for (i = 0; i < size; ++i) {
531                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
532                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
533                 }
534                 ret = memcmp(spec, last, size);
535                 if (ret != 0) {
536                         rte_errno = EINVAL;
537                         return -rte_errno;
538                 }
539         }
540         return 0;
541 }
542
543 /**
544  * Extract attribute to the parser.
545  *
546  * @param[in] attr
547  *   Flow rule attributes.
548  * @param[out] error
549  *   Perform verbose error reporting if not NULL.
550  *
551  * @return
552  *   0 on success, a negative errno value otherwise and rte_errno is set.
553  */
554 static int
555 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
556                              struct rte_flow_error *error)
557 {
558         if (attr->group) {
559                 rte_flow_error_set(error, ENOTSUP,
560                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
561                                    NULL,
562                                    "groups are not supported");
563                 return -rte_errno;
564         }
565         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
566                 rte_flow_error_set(error, ENOTSUP,
567                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
568                                    NULL,
569                                    "priorities are not supported");
570                 return -rte_errno;
571         }
572         if (attr->egress) {
573                 rte_flow_error_set(error, ENOTSUP,
574                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
575                                    NULL,
576                                    "egress is not supported");
577                 return -rte_errno;
578         }
579         if (attr->transfer) {
580                 rte_flow_error_set(error, ENOTSUP,
581                                    RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
582                                    NULL,
583                                    "transfer is not supported");
584                 return -rte_errno;
585         }
586         if (!attr->ingress) {
587                 rte_flow_error_set(error, ENOTSUP,
588                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
589                                    NULL,
590                                    "only ingress is supported");
591                 return -rte_errno;
592         }
593         return 0;
594 }
595
596 /**
597  * Extract actions request to the parser.
598  *
599  * @param dev
600  *   Pointer to Ethernet device.
601  * @param[in] actions
602  *   Associated actions (list terminated by the END action).
603  * @param[out] error
604  *   Perform verbose error reporting if not NULL.
605  * @param[in, out] parser
606  *   Internal parser structure.
607  *
608  * @return
609  *   0 on success, a negative errno value otherwise and rte_errno is set.
610  */
611 static int
612 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
613                           const struct rte_flow_action actions[],
614                           struct rte_flow_error *error,
615                           struct mlx5_flow_parse *parser)
616 {
617         enum { FATE = 1, MARK = 2, COUNT = 4, };
618         uint32_t overlap = 0;
619         struct priv *priv = dev->data->dev_private;
620
621         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
622                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
623                         continue;
624                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
625                         if (overlap & FATE)
626                                 goto exit_action_overlap;
627                         overlap |= FATE;
628                         parser->drop = 1;
629                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
630                         const struct rte_flow_action_queue *queue =
631                                 (const struct rte_flow_action_queue *)
632                                 actions->conf;
633
634                         if (overlap & FATE)
635                                 goto exit_action_overlap;
636                         overlap |= FATE;
637                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
638                                 goto exit_action_not_supported;
639                         parser->queues[0] = queue->index;
640                         parser->rss_conf = (struct rte_flow_action_rss){
641                                 .queue_num = 1,
642                                 .queue = parser->queues,
643                         };
644                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
645                         const struct rte_flow_action_rss *rss =
646                                 (const struct rte_flow_action_rss *)
647                                 actions->conf;
648                         const uint8_t *rss_key;
649                         uint32_t rss_key_len;
650                         uint16_t n;
651
652                         if (overlap & FATE)
653                                 goto exit_action_overlap;
654                         overlap |= FATE;
655                         if (rss->func &&
656                             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
657                                 rte_flow_error_set(error, EINVAL,
658                                                    RTE_FLOW_ERROR_TYPE_ACTION,
659                                                    actions,
660                                                    "the only supported RSS hash"
661                                                    " function is Toeplitz");
662                                 return -rte_errno;
663                         }
664                         if (rss->level) {
665                                 rte_flow_error_set(error, EINVAL,
666                                                    RTE_FLOW_ERROR_TYPE_ACTION,
667                                                    actions,
668                                                    "a nonzero RSS encapsulation"
669                                                    " level is not supported");
670                                 return -rte_errno;
671                         }
672                         if (rss->types & MLX5_RSS_HF_MASK) {
673                                 rte_flow_error_set(error, EINVAL,
674                                                    RTE_FLOW_ERROR_TYPE_ACTION,
675                                                    actions,
676                                                    "unsupported RSS type"
677                                                    " requested");
678                                 return -rte_errno;
679                         }
680                         if (rss->key_len) {
681                                 rss_key_len = rss->key_len;
682                                 rss_key = rss->key;
683                         } else {
684                                 rss_key_len = rss_hash_default_key_len;
685                                 rss_key = rss_hash_default_key;
686                         }
687                         if (rss_key_len != RTE_DIM(parser->rss_key)) {
688                                 rte_flow_error_set(error, EINVAL,
689                                                    RTE_FLOW_ERROR_TYPE_ACTION,
690                                                    actions,
691                                                    "RSS hash key must be"
692                                                    " exactly 40 bytes long");
693                                 return -rte_errno;
694                         }
695                         if (!rss->queue_num) {
696                                 rte_flow_error_set(error, EINVAL,
697                                                    RTE_FLOW_ERROR_TYPE_ACTION,
698                                                    actions,
699                                                    "no valid queues");
700                                 return -rte_errno;
701                         }
702                         if (rss->queue_num > RTE_DIM(parser->queues)) {
703                                 rte_flow_error_set(error, EINVAL,
704                                                    RTE_FLOW_ERROR_TYPE_ACTION,
705                                                    actions,
706                                                    "too many queues for RSS"
707                                                    " context");
708                                 return -rte_errno;
709                         }
710                         for (n = 0; n < rss->queue_num; ++n) {
711                                 if (rss->queue[n] >= priv->rxqs_n) {
712                                         rte_flow_error_set(error, EINVAL,
713                                                    RTE_FLOW_ERROR_TYPE_ACTION,
714                                                    actions,
715                                                    "queue id > number of"
716                                                    " queues");
717                                         return -rte_errno;
718                                 }
719                         }
720                         parser->rss_conf = (struct rte_flow_action_rss){
721                                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
722                                 .level = 0,
723                                 .types = rss->types,
724                                 .key_len = rss_key_len,
725                                 .queue_num = rss->queue_num,
726                                 .key = memcpy(parser->rss_key, rss_key,
727                                               sizeof(*rss_key) * rss_key_len),
728                                 .queue = memcpy(parser->queues, rss->queue,
729                                                 sizeof(*rss->queue) *
730                                                 rss->queue_num),
731                         };
732                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
733                         const struct rte_flow_action_mark *mark =
734                                 (const struct rte_flow_action_mark *)
735                                 actions->conf;
736
737                         if (overlap & MARK)
738                                 goto exit_action_overlap;
739                         overlap |= MARK;
740                         if (!mark) {
741                                 rte_flow_error_set(error, EINVAL,
742                                                    RTE_FLOW_ERROR_TYPE_ACTION,
743                                                    actions,
744                                                    "mark must be defined");
745                                 return -rte_errno;
746                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
747                                 rte_flow_error_set(error, ENOTSUP,
748                                                    RTE_FLOW_ERROR_TYPE_ACTION,
749                                                    actions,
750                                                    "mark must be between 0"
751                                                    " and 16777199");
752                                 return -rte_errno;
753                         }
754                         parser->mark = 1;
755                         parser->mark_id = mark->id;
756                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
757                         if (overlap & MARK)
758                                 goto exit_action_overlap;
759                         overlap |= MARK;
760                         parser->mark = 1;
761                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
762                            priv->config.flow_counter_en) {
763                         if (overlap & COUNT)
764                                 goto exit_action_overlap;
765                         overlap |= COUNT;
766                         parser->count = 1;
767                 } else {
768                         goto exit_action_not_supported;
769                 }
770         }
771         /* When fate is unknown, drop traffic. */
772         if (!(overlap & FATE))
773                 parser->drop = 1;
774         if (parser->drop && parser->mark)
775                 parser->mark = 0;
776         if (!parser->rss_conf.queue_num && !parser->drop) {
777                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
778                                    NULL, "no valid action");
779                 return -rte_errno;
780         }
781         return 0;
782 exit_action_not_supported:
783         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
784                            actions, "action not supported");
785         return -rte_errno;
786 exit_action_overlap:
787         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
788                            actions, "overlapping actions are not supported");
789         return -rte_errno;
790 }
791
792 /**
793  * Validate items.
794  *
795  * @param[in] items
796  *   Pattern specification (list terminated by the END pattern item).
797  * @param[out] error
798  *   Perform verbose error reporting if not NULL.
799  * @param[in, out] parser
800  *   Internal parser structure.
801  *
802  * @return
803  *   0 on success, a negative errno value otherwise and rte_errno is set.
804  */
805 static int
806 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
807                                  struct rte_flow_error *error,
808                                  struct mlx5_flow_parse *parser)
809 {
810         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
811         unsigned int i;
812         int ret = 0;
813
814         /* Initialise the offsets to start after verbs attribute. */
815         for (i = 0; i != hash_rxq_init_n; ++i)
816                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
817         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
818                 const struct mlx5_flow_items *token = NULL;
819                 unsigned int n;
820
821                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
822                         continue;
823                 for (i = 0;
824                      cur_item->items &&
825                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
826                      ++i) {
827                         if (cur_item->items[i] == items->type) {
828                                 token = &mlx5_flow_items[items->type];
829                                 break;
830                         }
831                 }
832                 if (!token) {
833                         ret = -ENOTSUP;
834                         goto exit_item_not_supported;
835                 }
836                 cur_item = token;
837                 ret = mlx5_flow_item_validate(items,
838                                               (const uint8_t *)cur_item->mask,
839                                               cur_item->mask_sz);
840                 if (ret)
841                         goto exit_item_not_supported;
842                 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
843                         if (parser->inner) {
844                                 rte_flow_error_set(error, ENOTSUP,
845                                                    RTE_FLOW_ERROR_TYPE_ITEM,
846                                                    items,
847                                                    "cannot recognize multiple"
848                                                    " VXLAN encapsulations");
849                                 return -rte_errno;
850                         }
851                         parser->inner = IBV_FLOW_SPEC_INNER;
852                 }
853                 if (parser->drop) {
854                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
855                 } else {
856                         for (n = 0; n != hash_rxq_init_n; ++n)
857                                 parser->queue[n].offset += cur_item->dst_sz;
858                 }
859         }
860         if (parser->drop) {
861                 parser->queue[HASH_RXQ_ETH].offset +=
862                         sizeof(struct ibv_flow_spec_action_drop);
863         }
864         if (parser->mark) {
865                 for (i = 0; i != hash_rxq_init_n; ++i)
866                         parser->queue[i].offset +=
867                                 sizeof(struct ibv_flow_spec_action_tag);
868         }
869         if (parser->count) {
870                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
871
872                 for (i = 0; i != hash_rxq_init_n; ++i)
873                         parser->queue[i].offset += size;
874         }
875         return 0;
876 exit_item_not_supported:
877         return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
878                                   items, "item not supported");
879 }
880
881 /**
882  * Allocate memory space to store verbs flow attributes.
883  *
884  * @param[in] size
885  *   Amount of byte to allocate.
886  * @param[out] error
887  *   Perform verbose error reporting if not NULL.
888  *
889  * @return
890  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
891  */
892 static struct ibv_flow_attr *
893 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
894 {
895         struct ibv_flow_attr *ibv_attr;
896
897         ibv_attr = rte_calloc(__func__, 1, size, 0);
898         if (!ibv_attr) {
899                 rte_flow_error_set(error, ENOMEM,
900                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
901                                    NULL,
902                                    "cannot allocate verbs spec attributes");
903                 return NULL;
904         }
905         return ibv_attr;
906 }
907
908 /**
909  * Make inner packet matching with an higher priority from the non Inner
910  * matching.
911  *
912  * @param dev
913  *   Pointer to Ethernet device.
914  * @param[in, out] parser
915  *   Internal parser structure.
916  * @param attr
917  *   User flow attribute.
918  */
919 static void
920 mlx5_flow_update_priority(struct rte_eth_dev *dev,
921                           struct mlx5_flow_parse *parser,
922                           const struct rte_flow_attr *attr)
923 {
924         struct priv *priv = dev->data->dev_private;
925         unsigned int i;
926         uint16_t priority;
927
928         /*                      8 priorities    >= 16 priorities
929          * Control flow:        4-7             8-15
930          * User normal flow:    1-3             4-7
931          * User tunnel flow:    0-2             0-3
932          */
933         priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
934         if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
935                 priority /= 2;
936         /*
937          * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
938          * priorities, lower 4 otherwise.
939          */
940         if (!parser->inner) {
941                 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
942                         priority += 1;
943                 else
944                         priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
945         }
946         if (parser->drop) {
947                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
948                                 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
949                 return;
950         }
951         for (i = 0; i != hash_rxq_init_n; ++i) {
952                 if (!parser->queue[i].ibv_attr)
953                         continue;
954                 parser->queue[i].ibv_attr->priority = priority +
955                                 hash_rxq_init[i].flow_priority;
956         }
957 }
958
959 /**
960  * Finalise verbs flow attributes.
961  *
962  * @param[in, out] parser
963  *   Internal parser structure.
964  */
965 static void
966 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
967 {
968         const unsigned int ipv4 =
969                 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
970         const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
971         const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
972         const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
973         const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
974         const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
975         unsigned int i;
976
977         /* Remove any other flow not matching the pattern. */
978         if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
979                 for (i = 0; i != hash_rxq_init_n; ++i) {
980                         if (i == HASH_RXQ_ETH)
981                                 continue;
982                         rte_free(parser->queue[i].ibv_attr);
983                         parser->queue[i].ibv_attr = NULL;
984                 }
985                 return;
986         }
987         if (parser->layer == HASH_RXQ_ETH) {
988                 goto fill;
989         } else {
990                 /*
991                  * This layer becomes useless as the pattern define under
992                  * layers.
993                  */
994                 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
995                 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
996         }
997         /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
998         for (i = ohmin; i != (ohmax + 1); ++i) {
999                 if (!parser->queue[i].ibv_attr)
1000                         continue;
1001                 rte_free(parser->queue[i].ibv_attr);
1002                 parser->queue[i].ibv_attr = NULL;
1003         }
1004         /* Remove impossible flow according to the RSS configuration. */
1005         if (hash_rxq_init[parser->layer].dpdk_rss_hf &
1006             parser->rss_conf.types) {
1007                 /* Remove any other flow. */
1008                 for (i = hmin; i != (hmax + 1); ++i) {
1009                         if ((i == parser->layer) ||
1010                              (!parser->queue[i].ibv_attr))
1011                                 continue;
1012                         rte_free(parser->queue[i].ibv_attr);
1013                         parser->queue[i].ibv_attr = NULL;
1014                 }
1015         } else  if (!parser->queue[ip].ibv_attr) {
1016                 /* no RSS possible with the current configuration. */
1017                 parser->rss_conf.queue_num = 1;
1018                 return;
1019         }
1020 fill:
1021         /*
1022          * Fill missing layers in verbs specifications, or compute the correct
1023          * offset to allocate the memory space for the attributes and
1024          * specifications.
1025          */
1026         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1027                 union {
1028                         struct ibv_flow_spec_ipv4_ext ipv4;
1029                         struct ibv_flow_spec_ipv6 ipv6;
1030                         struct ibv_flow_spec_tcp_udp udp_tcp;
1031                 } specs;
1032                 void *dst;
1033                 uint16_t size;
1034
1035                 if (i == parser->layer)
1036                         continue;
1037                 if (parser->layer == HASH_RXQ_ETH) {
1038                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1039                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1040                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1041                                         .type = IBV_FLOW_SPEC_IPV4_EXT,
1042                                         .size = size,
1043                                 };
1044                         } else {
1045                                 size = sizeof(struct ibv_flow_spec_ipv6);
1046                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1047                                         .type = IBV_FLOW_SPEC_IPV6,
1048                                         .size = size,
1049                                 };
1050                         }
1051                         if (parser->queue[i].ibv_attr) {
1052                                 dst = (void *)((uintptr_t)
1053                                                parser->queue[i].ibv_attr +
1054                                                parser->queue[i].offset);
1055                                 memcpy(dst, &specs, size);
1056                                 ++parser->queue[i].ibv_attr->num_of_specs;
1057                         }
1058                         parser->queue[i].offset += size;
1059                 }
1060                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1061                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1062                         size = sizeof(struct ibv_flow_spec_tcp_udp);
1063                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1064                                 .type = ((i == HASH_RXQ_UDPV4 ||
1065                                           i == HASH_RXQ_UDPV6) ?
1066                                          IBV_FLOW_SPEC_UDP :
1067                                          IBV_FLOW_SPEC_TCP),
1068                                 .size = size,
1069                         };
1070                         if (parser->queue[i].ibv_attr) {
1071                                 dst = (void *)((uintptr_t)
1072                                                parser->queue[i].ibv_attr +
1073                                                parser->queue[i].offset);
1074                                 memcpy(dst, &specs, size);
1075                                 ++parser->queue[i].ibv_attr->num_of_specs;
1076                         }
1077                         parser->queue[i].offset += size;
1078                 }
1079         }
1080 }
1081
1082 /**
1083  * Validate and convert a flow supported by the NIC.
1084  *
1085  * @param dev
1086  *   Pointer to Ethernet device.
1087  * @param[in] attr
1088  *   Flow rule attributes.
1089  * @param[in] pattern
1090  *   Pattern specification (list terminated by the END pattern item).
1091  * @param[in] actions
1092  *   Associated actions (list terminated by the END action).
1093  * @param[out] error
1094  *   Perform verbose error reporting if not NULL.
1095  * @param[in, out] parser
1096  *   Internal parser structure.
1097  *
1098  * @return
1099  *   0 on success, a negative errno value otherwise and rte_errno is set.
1100  */
1101 static int
1102 mlx5_flow_convert(struct rte_eth_dev *dev,
1103                   const struct rte_flow_attr *attr,
1104                   const struct rte_flow_item items[],
1105                   const struct rte_flow_action actions[],
1106                   struct rte_flow_error *error,
1107                   struct mlx5_flow_parse *parser)
1108 {
1109         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1110         unsigned int i;
1111         int ret;
1112
1113         /* First step. Validate the attributes, items and actions. */
1114         *parser = (struct mlx5_flow_parse){
1115                 .create = parser->create,
1116                 .layer = HASH_RXQ_ETH,
1117                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1118         };
1119         ret = mlx5_flow_convert_attributes(attr, error);
1120         if (ret)
1121                 return ret;
1122         ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1123         if (ret)
1124                 return ret;
1125         ret = mlx5_flow_convert_items_validate(items, error, parser);
1126         if (ret)
1127                 return ret;
1128         mlx5_flow_convert_finalise(parser);
1129         /*
1130          * Second step.
1131          * Allocate the memory space to store verbs specifications.
1132          */
1133         if (parser->drop) {
1134                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1135
1136                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1137                         mlx5_flow_convert_allocate(offset, error);
1138                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1139                         goto exit_enomem;
1140                 parser->queue[HASH_RXQ_ETH].offset =
1141                         sizeof(struct ibv_flow_attr);
1142         } else {
1143                 for (i = 0; i != hash_rxq_init_n; ++i) {
1144                         unsigned int offset;
1145
1146                         if (!(parser->rss_conf.types &
1147                               hash_rxq_init[i].dpdk_rss_hf) &&
1148                             (i != HASH_RXQ_ETH))
1149                                 continue;
1150                         offset = parser->queue[i].offset;
1151                         parser->queue[i].ibv_attr =
1152                                 mlx5_flow_convert_allocate(offset, error);
1153                         if (!parser->queue[i].ibv_attr)
1154                                 goto exit_enomem;
1155                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1156                 }
1157         }
1158         /* Third step. Conversion parse, fill the specifications. */
1159         parser->inner = 0;
1160         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1161                 struct mlx5_flow_data data = {
1162                         .parser = parser,
1163                         .error = error,
1164                 };
1165
1166                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1167                         continue;
1168                 cur_item = &mlx5_flow_items[items->type];
1169                 ret = cur_item->convert(items,
1170                                         (cur_item->default_mask ?
1171                                          cur_item->default_mask :
1172                                          cur_item->mask),
1173                                          &data);
1174                 if (ret)
1175                         goto exit_free;
1176         }
1177         if (parser->mark)
1178                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1179         if (parser->count && parser->create) {
1180                 mlx5_flow_create_count(dev, parser);
1181                 if (!parser->cs)
1182                         goto exit_count_error;
1183         }
1184         /*
1185          * Last step. Complete missing specification to reach the RSS
1186          * configuration.
1187          */
1188         if (!parser->drop)
1189                 mlx5_flow_convert_finalise(parser);
1190         mlx5_flow_update_priority(dev, parser, attr);
1191 exit_free:
1192         /* Only verification is expected, all resources should be released. */
1193         if (!parser->create) {
1194                 for (i = 0; i != hash_rxq_init_n; ++i) {
1195                         if (parser->queue[i].ibv_attr) {
1196                                 rte_free(parser->queue[i].ibv_attr);
1197                                 parser->queue[i].ibv_attr = NULL;
1198                         }
1199                 }
1200         }
1201         return ret;
1202 exit_enomem:
1203         for (i = 0; i != hash_rxq_init_n; ++i) {
1204                 if (parser->queue[i].ibv_attr) {
1205                         rte_free(parser->queue[i].ibv_attr);
1206                         parser->queue[i].ibv_attr = NULL;
1207                 }
1208         }
1209         rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1210                            NULL, "cannot allocate verbs spec attributes");
1211         return -rte_errno;
1212 exit_count_error:
1213         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1214                            NULL, "cannot create counter");
1215         return -rte_errno;
1216 }
1217
1218 /**
1219  * Copy the specification created into the flow.
1220  *
1221  * @param parser
1222  *   Internal parser structure.
1223  * @param src
1224  *   Create specification.
1225  * @param size
1226  *   Size in bytes of the specification to copy.
1227  */
1228 static void
1229 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1230                       unsigned int size)
1231 {
1232         unsigned int i;
1233         void *dst;
1234
1235         for (i = 0; i != hash_rxq_init_n; ++i) {
1236                 if (!parser->queue[i].ibv_attr)
1237                         continue;
1238                 /* Specification must be the same l3 type or none. */
1239                 if (parser->layer == HASH_RXQ_ETH ||
1240                     (hash_rxq_init[parser->layer].ip_version ==
1241                      hash_rxq_init[i].ip_version) ||
1242                     (hash_rxq_init[i].ip_version == 0)) {
1243                         dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1244                                         parser->queue[i].offset);
1245                         memcpy(dst, src, size);
1246                         ++parser->queue[i].ibv_attr->num_of_specs;
1247                         parser->queue[i].offset += size;
1248                 }
1249         }
1250 }
1251
1252 /**
1253  * Convert Ethernet item to Verbs specification.
1254  *
1255  * @param item[in]
1256  *   Item specification.
1257  * @param default_mask[in]
1258  *   Default bit-masks to use when item->mask is not provided.
1259  * @param data[in, out]
1260  *   User structure.
1261  *
1262  * @return
1263  *   0 on success, a negative errno value otherwise and rte_errno is set.
1264  */
1265 static int
1266 mlx5_flow_create_eth(const struct rte_flow_item *item,
1267                      const void *default_mask,
1268                      struct mlx5_flow_data *data)
1269 {
1270         const struct rte_flow_item_eth *spec = item->spec;
1271         const struct rte_flow_item_eth *mask = item->mask;
1272         struct mlx5_flow_parse *parser = data->parser;
1273         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1274         struct ibv_flow_spec_eth eth = {
1275                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1276                 .size = eth_size,
1277         };
1278
1279         /* Don't update layer for the inner pattern. */
1280         if (!parser->inner)
1281                 parser->layer = HASH_RXQ_ETH;
1282         if (spec) {
1283                 unsigned int i;
1284
1285                 if (!mask)
1286                         mask = default_mask;
1287                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1288                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1289                 eth.val.ether_type = spec->type;
1290                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1291                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1292                 eth.mask.ether_type = mask->type;
1293                 /* Remove unwanted bits from values. */
1294                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1295                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1296                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1297                 }
1298                 eth.val.ether_type &= eth.mask.ether_type;
1299         }
1300         mlx5_flow_create_copy(parser, &eth, eth_size);
1301         return 0;
1302 }
1303
1304 /**
1305  * Convert VLAN item to Verbs specification.
1306  *
1307  * @param item[in]
1308  *   Item specification.
1309  * @param default_mask[in]
1310  *   Default bit-masks to use when item->mask is not provided.
1311  * @param data[in, out]
1312  *   User structure.
1313  *
1314  * @return
1315  *   0 on success, a negative errno value otherwise and rte_errno is set.
1316  */
1317 static int
1318 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1319                       const void *default_mask,
1320                       struct mlx5_flow_data *data)
1321 {
1322         const struct rte_flow_item_vlan *spec = item->spec;
1323         const struct rte_flow_item_vlan *mask = item->mask;
1324         struct mlx5_flow_parse *parser = data->parser;
1325         struct ibv_flow_spec_eth *eth;
1326         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1327         const char *msg = "VLAN cannot be empty";
1328
1329         if (spec) {
1330                 unsigned int i;
1331                 if (!mask)
1332                         mask = default_mask;
1333
1334                 for (i = 0; i != hash_rxq_init_n; ++i) {
1335                         if (!parser->queue[i].ibv_attr)
1336                                 continue;
1337
1338                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1339                                        parser->queue[i].offset - eth_size);
1340                         eth->val.vlan_tag = spec->tci;
1341                         eth->mask.vlan_tag = mask->tci;
1342                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1343                         /*
1344                          * From verbs perspective an empty VLAN is equivalent
1345                          * to a packet without VLAN layer.
1346                          */
1347                         if (!eth->mask.vlan_tag)
1348                                 goto error;
1349                         /* Outer TPID cannot be matched. */
1350                         if (eth->mask.ether_type) {
1351                                 msg = "VLAN TPID matching is not supported";
1352                                 goto error;
1353                         }
1354                         eth->val.ether_type = spec->inner_type;
1355                         eth->mask.ether_type = mask->inner_type;
1356                         eth->val.ether_type &= eth->mask.ether_type;
1357                 }
1358                 return 0;
1359         }
1360 error:
1361         return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1362                                   item, msg);
1363 }
1364
1365 /**
1366  * Convert IPv4 item to Verbs specification.
1367  *
1368  * @param item[in]
1369  *   Item specification.
1370  * @param default_mask[in]
1371  *   Default bit-masks to use when item->mask is not provided.
1372  * @param data[in, out]
1373  *   User structure.
1374  *
1375  * @return
1376  *   0 on success, a negative errno value otherwise and rte_errno is set.
1377  */
1378 static int
1379 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1380                       const void *default_mask,
1381                       struct mlx5_flow_data *data)
1382 {
1383         const struct rte_flow_item_ipv4 *spec = item->spec;
1384         const struct rte_flow_item_ipv4 *mask = item->mask;
1385         struct mlx5_flow_parse *parser = data->parser;
1386         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1387         struct ibv_flow_spec_ipv4_ext ipv4 = {
1388                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1389                 .size = ipv4_size,
1390         };
1391
1392         /* Don't update layer for the inner pattern. */
1393         if (!parser->inner)
1394                 parser->layer = HASH_RXQ_IPV4;
1395         if (spec) {
1396                 if (!mask)
1397                         mask = default_mask;
1398                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1399                         .src_ip = spec->hdr.src_addr,
1400                         .dst_ip = spec->hdr.dst_addr,
1401                         .proto = spec->hdr.next_proto_id,
1402                         .tos = spec->hdr.type_of_service,
1403                 };
1404                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1405                         .src_ip = mask->hdr.src_addr,
1406                         .dst_ip = mask->hdr.dst_addr,
1407                         .proto = mask->hdr.next_proto_id,
1408                         .tos = mask->hdr.type_of_service,
1409                 };
1410                 /* Remove unwanted bits from values. */
1411                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1412                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1413                 ipv4.val.proto &= ipv4.mask.proto;
1414                 ipv4.val.tos &= ipv4.mask.tos;
1415         }
1416         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1417         return 0;
1418 }
1419
1420 /**
1421  * Convert IPv6 item to Verbs specification.
1422  *
1423  * @param item[in]
1424  *   Item specification.
1425  * @param default_mask[in]
1426  *   Default bit-masks to use when item->mask is not provided.
1427  * @param data[in, out]
1428  *   User structure.
1429  *
1430  * @return
1431  *   0 on success, a negative errno value otherwise and rte_errno is set.
1432  */
1433 static int
1434 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1435                       const void *default_mask,
1436                       struct mlx5_flow_data *data)
1437 {
1438         const struct rte_flow_item_ipv6 *spec = item->spec;
1439         const struct rte_flow_item_ipv6 *mask = item->mask;
1440         struct mlx5_flow_parse *parser = data->parser;
1441         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1442         struct ibv_flow_spec_ipv6 ipv6 = {
1443                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1444                 .size = ipv6_size,
1445         };
1446
1447         /* Don't update layer for the inner pattern. */
1448         if (!parser->inner)
1449                 parser->layer = HASH_RXQ_IPV6;
1450         if (spec) {
1451                 unsigned int i;
1452                 uint32_t vtc_flow_val;
1453                 uint32_t vtc_flow_mask;
1454
1455                 if (!mask)
1456                         mask = default_mask;
1457                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1458                        RTE_DIM(ipv6.val.src_ip));
1459                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1460                        RTE_DIM(ipv6.val.dst_ip));
1461                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1462                        RTE_DIM(ipv6.mask.src_ip));
1463                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1464                        RTE_DIM(ipv6.mask.dst_ip));
1465                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1466                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1467                 ipv6.val.flow_label =
1468                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1469                                          IPV6_HDR_FL_SHIFT);
1470                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1471                                          IPV6_HDR_TC_SHIFT;
1472                 ipv6.val.next_hdr = spec->hdr.proto;
1473                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1474                 ipv6.mask.flow_label =
1475                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1476                                          IPV6_HDR_FL_SHIFT);
1477                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1478                                           IPV6_HDR_TC_SHIFT;
1479                 ipv6.mask.next_hdr = mask->hdr.proto;
1480                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1481                 /* Remove unwanted bits from values. */
1482                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1483                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1484                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1485                 }
1486                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1487                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1488                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1489                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1490         }
1491         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1492         return 0;
1493 }
1494
1495 /**
1496  * Convert UDP item to Verbs specification.
1497  *
1498  * @param item[in]
1499  *   Item specification.
1500  * @param default_mask[in]
1501  *   Default bit-masks to use when item->mask is not provided.
1502  * @param data[in, out]
1503  *   User structure.
1504  *
1505  * @return
1506  *   0 on success, a negative errno value otherwise and rte_errno is set.
1507  */
1508 static int
1509 mlx5_flow_create_udp(const struct rte_flow_item *item,
1510                      const void *default_mask,
1511                      struct mlx5_flow_data *data)
1512 {
1513         const struct rte_flow_item_udp *spec = item->spec;
1514         const struct rte_flow_item_udp *mask = item->mask;
1515         struct mlx5_flow_parse *parser = data->parser;
1516         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1517         struct ibv_flow_spec_tcp_udp udp = {
1518                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1519                 .size = udp_size,
1520         };
1521
1522         /* Don't update layer for the inner pattern. */
1523         if (!parser->inner) {
1524                 if (parser->layer == HASH_RXQ_IPV4)
1525                         parser->layer = HASH_RXQ_UDPV4;
1526                 else
1527                         parser->layer = HASH_RXQ_UDPV6;
1528         }
1529         if (spec) {
1530                 if (!mask)
1531                         mask = default_mask;
1532                 udp.val.dst_port = spec->hdr.dst_port;
1533                 udp.val.src_port = spec->hdr.src_port;
1534                 udp.mask.dst_port = mask->hdr.dst_port;
1535                 udp.mask.src_port = mask->hdr.src_port;
1536                 /* Remove unwanted bits from values. */
1537                 udp.val.src_port &= udp.mask.src_port;
1538                 udp.val.dst_port &= udp.mask.dst_port;
1539         }
1540         mlx5_flow_create_copy(parser, &udp, udp_size);
1541         return 0;
1542 }
1543
1544 /**
1545  * Convert TCP item to Verbs specification.
1546  *
1547  * @param item[in]
1548  *   Item specification.
1549  * @param default_mask[in]
1550  *   Default bit-masks to use when item->mask is not provided.
1551  * @param data[in, out]
1552  *   User structure.
1553  *
1554  * @return
1555  *   0 on success, a negative errno value otherwise and rte_errno is set.
1556  */
1557 static int
1558 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1559                      const void *default_mask,
1560                      struct mlx5_flow_data *data)
1561 {
1562         const struct rte_flow_item_tcp *spec = item->spec;
1563         const struct rte_flow_item_tcp *mask = item->mask;
1564         struct mlx5_flow_parse *parser = data->parser;
1565         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1566         struct ibv_flow_spec_tcp_udp tcp = {
1567                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1568                 .size = tcp_size,
1569         };
1570
1571         /* Don't update layer for the inner pattern. */
1572         if (!parser->inner) {
1573                 if (parser->layer == HASH_RXQ_IPV4)
1574                         parser->layer = HASH_RXQ_TCPV4;
1575                 else
1576                         parser->layer = HASH_RXQ_TCPV6;
1577         }
1578         if (spec) {
1579                 if (!mask)
1580                         mask = default_mask;
1581                 tcp.val.dst_port = spec->hdr.dst_port;
1582                 tcp.val.src_port = spec->hdr.src_port;
1583                 tcp.mask.dst_port = mask->hdr.dst_port;
1584                 tcp.mask.src_port = mask->hdr.src_port;
1585                 /* Remove unwanted bits from values. */
1586                 tcp.val.src_port &= tcp.mask.src_port;
1587                 tcp.val.dst_port &= tcp.mask.dst_port;
1588         }
1589         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1590         return 0;
1591 }
1592
1593 /**
1594  * Convert VXLAN item to Verbs specification.
1595  *
1596  * @param item[in]
1597  *   Item specification.
1598  * @param default_mask[in]
1599  *   Default bit-masks to use when item->mask is not provided.
1600  * @param data[in, out]
1601  *   User structure.
1602  *
1603  * @return
1604  *   0 on success, a negative errno value otherwise and rte_errno is set.
1605  */
1606 static int
1607 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1608                        const void *default_mask,
1609                        struct mlx5_flow_data *data)
1610 {
1611         const struct rte_flow_item_vxlan *spec = item->spec;
1612         const struct rte_flow_item_vxlan *mask = item->mask;
1613         struct mlx5_flow_parse *parser = data->parser;
1614         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1615         struct ibv_flow_spec_tunnel vxlan = {
1616                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1617                 .size = size,
1618         };
1619         union vni {
1620                 uint32_t vlan_id;
1621                 uint8_t vni[4];
1622         } id;
1623
1624         id.vni[0] = 0;
1625         parser->inner = IBV_FLOW_SPEC_INNER;
1626         if (spec) {
1627                 if (!mask)
1628                         mask = default_mask;
1629                 memcpy(&id.vni[1], spec->vni, 3);
1630                 vxlan.val.tunnel_id = id.vlan_id;
1631                 memcpy(&id.vni[1], mask->vni, 3);
1632                 vxlan.mask.tunnel_id = id.vlan_id;
1633                 /* Remove unwanted bits from values. */
1634                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1635         }
1636         /*
1637          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1638          * layer is defined in the Verbs specification it is interpreted as
1639          * wildcard and all packets will match this rule, if it follows a full
1640          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1641          * before will also match this rule.
1642          * To avoid such situation, VNI 0 is currently refused.
1643          */
1644         if (!vxlan.val.tunnel_id)
1645                 return rte_flow_error_set(data->error, EINVAL,
1646                                           RTE_FLOW_ERROR_TYPE_ITEM,
1647                                           item,
1648                                           "VxLAN vni cannot be 0");
1649         mlx5_flow_create_copy(parser, &vxlan, size);
1650         return 0;
1651 }
1652
1653 /**
1654  * Convert mark/flag action to Verbs specification.
1655  *
1656  * @param parser
1657  *   Internal parser structure.
1658  * @param mark_id
1659  *   Mark identifier.
1660  *
1661  * @return
1662  *   0 on success, a negative errno value otherwise and rte_errno is set.
1663  */
1664 static int
1665 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1666 {
1667         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1668         struct ibv_flow_spec_action_tag tag = {
1669                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1670                 .size = size,
1671                 .tag_id = mlx5_flow_mark_set(mark_id),
1672         };
1673
1674         assert(parser->mark);
1675         mlx5_flow_create_copy(parser, &tag, size);
1676         return 0;
1677 }
1678
1679 /**
1680  * Convert count action to Verbs specification.
1681  *
1682  * @param dev
1683  *   Pointer to Ethernet device.
1684  * @param parser
1685  *   Pointer to MLX5 flow parser structure.
1686  *
1687  * @return
1688  *   0 on success, a negative errno value otherwise and rte_errno is set.
1689  */
1690 static int
1691 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1692                        struct mlx5_flow_parse *parser __rte_unused)
1693 {
1694 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1695         struct priv *priv = dev->data->dev_private;
1696         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1697         struct ibv_counter_set_init_attr init_attr = {0};
1698         struct ibv_flow_spec_counter_action counter = {
1699                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1700                 .size = size,
1701                 .counter_set_handle = 0,
1702         };
1703
1704         init_attr.counter_set_id = 0;
1705         parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1706         if (!parser->cs) {
1707                 rte_errno = EINVAL;
1708                 return -rte_errno;
1709         }
1710         counter.counter_set_handle = parser->cs->handle;
1711         mlx5_flow_create_copy(parser, &counter, size);
1712 #endif
1713         return 0;
1714 }
1715
1716 /**
1717  * Complete flow rule creation with a drop queue.
1718  *
1719  * @param dev
1720  *   Pointer to Ethernet device.
1721  * @param parser
1722  *   Internal parser structure.
1723  * @param flow
1724  *   Pointer to the rte_flow.
1725  * @param[out] error
1726  *   Perform verbose error reporting if not NULL.
1727  *
1728  * @return
1729  *   0 on success, a negative errno value otherwise and rte_errno is set.
1730  */
1731 static int
1732 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1733                                    struct mlx5_flow_parse *parser,
1734                                    struct rte_flow *flow,
1735                                    struct rte_flow_error *error)
1736 {
1737         struct priv *priv = dev->data->dev_private;
1738         struct ibv_flow_spec_action_drop *drop;
1739         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1740
1741         assert(priv->pd);
1742         assert(priv->ctx);
1743         flow->drop = 1;
1744         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1745                         parser->queue[HASH_RXQ_ETH].offset);
1746         *drop = (struct ibv_flow_spec_action_drop){
1747                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1748                         .size = size,
1749         };
1750         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1751         parser->queue[HASH_RXQ_ETH].offset += size;
1752         flow->frxq[HASH_RXQ_ETH].ibv_attr =
1753                 parser->queue[HASH_RXQ_ETH].ibv_attr;
1754         if (parser->count)
1755                 flow->cs = parser->cs;
1756         if (!priv->dev->data->dev_started)
1757                 return 0;
1758         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1759         flow->frxq[HASH_RXQ_ETH].ibv_flow =
1760                 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1761                                        flow->frxq[HASH_RXQ_ETH].ibv_attr);
1762         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1763                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1764                                    NULL, "flow rule creation failure");
1765                 goto error;
1766         }
1767         return 0;
1768 error:
1769         assert(flow);
1770         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1771                 claim_zero(mlx5_glue->destroy_flow
1772                            (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1773                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1774         }
1775         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1776                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1777                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1778         }
1779         if (flow->cs) {
1780                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1781                 flow->cs = NULL;
1782                 parser->cs = NULL;
1783         }
1784         return -rte_errno;
1785 }
1786
1787 /**
1788  * Create hash Rx queues when RSS is enabled.
1789  *
1790  * @param dev
1791  *   Pointer to Ethernet device.
1792  * @param parser
1793  *   Internal parser structure.
1794  * @param flow
1795  *   Pointer to the rte_flow.
1796  * @param[out] error
1797  *   Perform verbose error reporting if not NULL.
1798  *
1799  * @return
1800  *   0 on success, a negative errno value otherwise and rte_errno is set.
1801  */
1802 static int
1803 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1804                                   struct mlx5_flow_parse *parser,
1805                                   struct rte_flow *flow,
1806                                   struct rte_flow_error *error)
1807 {
1808         struct priv *priv = dev->data->dev_private;
1809         unsigned int i;
1810
1811         for (i = 0; i != hash_rxq_init_n; ++i) {
1812                 uint64_t hash_fields;
1813
1814                 if (!parser->queue[i].ibv_attr)
1815                         continue;
1816                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1817                 parser->queue[i].ibv_attr = NULL;
1818                 hash_fields = hash_rxq_init[i].hash_fields;
1819                 if (!priv->dev->data->dev_started)
1820                         continue;
1821                 flow->frxq[i].hrxq =
1822                         mlx5_hrxq_get(dev,
1823                                       parser->rss_conf.key,
1824                                       parser->rss_conf.key_len,
1825                                       hash_fields,
1826                                       parser->rss_conf.queue,
1827                                       parser->rss_conf.queue_num);
1828                 if (flow->frxq[i].hrxq)
1829                         continue;
1830                 flow->frxq[i].hrxq =
1831                         mlx5_hrxq_new(dev,
1832                                       parser->rss_conf.key,
1833                                       parser->rss_conf.key_len,
1834                                       hash_fields,
1835                                       parser->rss_conf.queue,
1836                                       parser->rss_conf.queue_num);
1837                 if (!flow->frxq[i].hrxq) {
1838                         return rte_flow_error_set(error, ENOMEM,
1839                                                   RTE_FLOW_ERROR_TYPE_HANDLE,
1840                                                   NULL,
1841                                                   "cannot create hash rxq");
1842                 }
1843         }
1844         return 0;
1845 }
1846
1847 /**
1848  * Complete flow rule creation.
1849  *
1850  * @param dev
1851  *   Pointer to Ethernet device.
1852  * @param parser
1853  *   Internal parser structure.
1854  * @param flow
1855  *   Pointer to the rte_flow.
1856  * @param[out] error
1857  *   Perform verbose error reporting if not NULL.
1858  *
1859  * @return
1860  *   0 on success, a negative errno value otherwise and rte_errno is set.
1861  */
1862 static int
1863 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1864                               struct mlx5_flow_parse *parser,
1865                               struct rte_flow *flow,
1866                               struct rte_flow_error *error)
1867 {
1868         struct priv *priv = dev->data->dev_private;
1869         int ret;
1870         unsigned int i;
1871         unsigned int flows_n = 0;
1872
1873         assert(priv->pd);
1874         assert(priv->ctx);
1875         assert(!parser->drop);
1876         ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1877         if (ret)
1878                 goto error;
1879         if (parser->count)
1880                 flow->cs = parser->cs;
1881         if (!priv->dev->data->dev_started)
1882                 return 0;
1883         for (i = 0; i != hash_rxq_init_n; ++i) {
1884                 if (!flow->frxq[i].hrxq)
1885                         continue;
1886                 flow->frxq[i].ibv_flow =
1887                         mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1888                                                flow->frxq[i].ibv_attr);
1889                 if (!flow->frxq[i].ibv_flow) {
1890                         rte_flow_error_set(error, ENOMEM,
1891                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1892                                            NULL, "flow rule creation failure");
1893                         goto error;
1894                 }
1895                 ++flows_n;
1896                 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1897                         dev->data->port_id,
1898                         (void *)flow, i,
1899                         (void *)flow->frxq[i].hrxq,
1900                         (void *)flow->frxq[i].ibv_flow);
1901         }
1902         if (!flows_n) {
1903                 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1904                                    NULL, "internal error in flow creation");
1905                 goto error;
1906         }
1907         for (i = 0; i != parser->rss_conf.queue_num; ++i) {
1908                 struct mlx5_rxq_data *q =
1909                         (*priv->rxqs)[parser->rss_conf.queue[i]];
1910
1911                 q->mark |= parser->mark;
1912         }
1913         return 0;
1914 error:
1915         ret = rte_errno; /* Save rte_errno before cleanup. */
1916         assert(flow);
1917         for (i = 0; i != hash_rxq_init_n; ++i) {
1918                 if (flow->frxq[i].ibv_flow) {
1919                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1920
1921                         claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1922                 }
1923                 if (flow->frxq[i].hrxq)
1924                         mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1925                 if (flow->frxq[i].ibv_attr)
1926                         rte_free(flow->frxq[i].ibv_attr);
1927         }
1928         if (flow->cs) {
1929                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1930                 flow->cs = NULL;
1931                 parser->cs = NULL;
1932         }
1933         rte_errno = ret; /* Restore rte_errno. */
1934         return -rte_errno;
1935 }
1936
1937 /**
1938  * Convert a flow.
1939  *
1940  * @param dev
1941  *   Pointer to Ethernet device.
1942  * @param list
1943  *   Pointer to a TAILQ flow list.
1944  * @param[in] attr
1945  *   Flow rule attributes.
1946  * @param[in] pattern
1947  *   Pattern specification (list terminated by the END pattern item).
1948  * @param[in] actions
1949  *   Associated actions (list terminated by the END action).
1950  * @param[out] error
1951  *   Perform verbose error reporting if not NULL.
1952  *
1953  * @return
1954  *   A flow on success, NULL otherwise and rte_errno is set.
1955  */
1956 static struct rte_flow *
1957 mlx5_flow_list_create(struct rte_eth_dev *dev,
1958                       struct mlx5_flows *list,
1959                       const struct rte_flow_attr *attr,
1960                       const struct rte_flow_item items[],
1961                       const struct rte_flow_action actions[],
1962                       struct rte_flow_error *error)
1963 {
1964         struct mlx5_flow_parse parser = { .create = 1, };
1965         struct rte_flow *flow = NULL;
1966         unsigned int i;
1967         int ret;
1968
1969         ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1970         if (ret)
1971                 goto exit;
1972         flow = rte_calloc(__func__, 1,
1973                           sizeof(*flow) +
1974                           parser.rss_conf.queue_num * sizeof(uint16_t),
1975                           0);
1976         if (!flow) {
1977                 rte_flow_error_set(error, ENOMEM,
1978                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1979                                    NULL,
1980                                    "cannot allocate flow memory");
1981                 return NULL;
1982         }
1983         /* Copy configuration. */
1984         flow->queues = (uint16_t (*)[])(flow + 1);
1985         flow->rss_conf = (struct rte_flow_action_rss){
1986                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1987                 .level = 0,
1988                 .types = parser.rss_conf.types,
1989                 .key_len = parser.rss_conf.key_len,
1990                 .queue_num = parser.rss_conf.queue_num,
1991                 .key = memcpy(flow->rss_key, parser.rss_conf.key,
1992                               sizeof(*parser.rss_conf.key) *
1993                               parser.rss_conf.key_len),
1994                 .queue = memcpy(flow->queues, parser.rss_conf.queue,
1995                                 sizeof(*parser.rss_conf.queue) *
1996                                 parser.rss_conf.queue_num),
1997         };
1998         flow->mark = parser.mark;
1999         /* finalise the flow. */
2000         if (parser.drop)
2001                 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2002                                                          error);
2003         else
2004                 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2005         if (ret)
2006                 goto exit;
2007         TAILQ_INSERT_TAIL(list, flow, next);
2008         DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2009                 (void *)flow);
2010         return flow;
2011 exit:
2012         DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2013                 error->message);
2014         for (i = 0; i != hash_rxq_init_n; ++i) {
2015                 if (parser.queue[i].ibv_attr)
2016                         rte_free(parser.queue[i].ibv_attr);
2017         }
2018         rte_free(flow);
2019         return NULL;
2020 }
2021
2022 /**
2023  * Validate a flow supported by the NIC.
2024  *
2025  * @see rte_flow_validate()
2026  * @see rte_flow_ops
2027  */
2028 int
2029 mlx5_flow_validate(struct rte_eth_dev *dev,
2030                    const struct rte_flow_attr *attr,
2031                    const struct rte_flow_item items[],
2032                    const struct rte_flow_action actions[],
2033                    struct rte_flow_error *error)
2034 {
2035         struct mlx5_flow_parse parser = { .create = 0, };
2036
2037         return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2038 }
2039
2040 /**
2041  * Create a flow.
2042  *
2043  * @see rte_flow_create()
2044  * @see rte_flow_ops
2045  */
2046 struct rte_flow *
2047 mlx5_flow_create(struct rte_eth_dev *dev,
2048                  const struct rte_flow_attr *attr,
2049                  const struct rte_flow_item items[],
2050                  const struct rte_flow_action actions[],
2051                  struct rte_flow_error *error)
2052 {
2053         struct priv *priv = dev->data->dev_private;
2054
2055         return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2056                                      error);
2057 }
2058
2059 /**
2060  * Destroy a flow in a list.
2061  *
2062  * @param dev
2063  *   Pointer to Ethernet device.
2064  * @param list
2065  *   Pointer to a TAILQ flow list.
2066  * @param[in] flow
2067  *   Flow to destroy.
2068  */
2069 static void
2070 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2071                        struct rte_flow *flow)
2072 {
2073         struct priv *priv = dev->data->dev_private;
2074         unsigned int i;
2075
2076         if (flow->drop || !flow->mark)
2077                 goto free;
2078         for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2079                 struct rte_flow *tmp;
2080                 int mark = 0;
2081
2082                 /*
2083                  * To remove the mark from the queue, the queue must not be
2084                  * present in any other marked flow (RSS or not).
2085                  */
2086                 TAILQ_FOREACH(tmp, list, next) {
2087                         unsigned int j;
2088                         uint16_t *tqs = NULL;
2089                         uint16_t tq_n = 0;
2090
2091                         if (!tmp->mark)
2092                                 continue;
2093                         for (j = 0; j != hash_rxq_init_n; ++j) {
2094                                 if (!tmp->frxq[j].hrxq)
2095                                         continue;
2096                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2097                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2098                         }
2099                         if (!tq_n)
2100                                 continue;
2101                         for (j = 0; (j != tq_n) && !mark; j++)
2102                                 if (tqs[j] == (*flow->queues)[i])
2103                                         mark = 1;
2104                 }
2105                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2106         }
2107 free:
2108         if (flow->drop) {
2109                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2110                         claim_zero(mlx5_glue->destroy_flow
2111                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2112                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2113         } else {
2114                 for (i = 0; i != hash_rxq_init_n; ++i) {
2115                         struct mlx5_flow *frxq = &flow->frxq[i];
2116
2117                         if (frxq->ibv_flow)
2118                                 claim_zero(mlx5_glue->destroy_flow
2119                                            (frxq->ibv_flow));
2120                         if (frxq->hrxq)
2121                                 mlx5_hrxq_release(dev, frxq->hrxq);
2122                         if (frxq->ibv_attr)
2123                                 rte_free(frxq->ibv_attr);
2124                 }
2125         }
2126         if (flow->cs) {
2127                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2128                 flow->cs = NULL;
2129         }
2130         TAILQ_REMOVE(list, flow, next);
2131         DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2132                 (void *)flow);
2133         rte_free(flow);
2134 }
2135
2136 /**
2137  * Destroy all flows.
2138  *
2139  * @param dev
2140  *   Pointer to Ethernet device.
2141  * @param list
2142  *   Pointer to a TAILQ flow list.
2143  */
2144 void
2145 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2146 {
2147         while (!TAILQ_EMPTY(list)) {
2148                 struct rte_flow *flow;
2149
2150                 flow = TAILQ_FIRST(list);
2151                 mlx5_flow_list_destroy(dev, list, flow);
2152         }
2153 }
2154
2155 /**
2156  * Create drop queue.
2157  *
2158  * @param dev
2159  *   Pointer to Ethernet device.
2160  *
2161  * @return
2162  *   0 on success, a negative errno value otherwise and rte_errno is set.
2163  */
2164 int
2165 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2166 {
2167         struct priv *priv = dev->data->dev_private;
2168         struct mlx5_hrxq_drop *fdq = NULL;
2169
2170         assert(priv->pd);
2171         assert(priv->ctx);
2172         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2173         if (!fdq) {
2174                 DRV_LOG(WARNING,
2175                         "port %u cannot allocate memory for drop queue",
2176                         dev->data->port_id);
2177                 rte_errno = ENOMEM;
2178                 return -rte_errno;
2179         }
2180         fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2181         if (!fdq->cq) {
2182                 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2183                         dev->data->port_id);
2184                 rte_errno = errno;
2185                 goto error;
2186         }
2187         fdq->wq = mlx5_glue->create_wq
2188                 (priv->ctx,
2189                  &(struct ibv_wq_init_attr){
2190                         .wq_type = IBV_WQT_RQ,
2191                         .max_wr = 1,
2192                         .max_sge = 1,
2193                         .pd = priv->pd,
2194                         .cq = fdq->cq,
2195                  });
2196         if (!fdq->wq) {
2197                 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2198                         dev->data->port_id);
2199                 rte_errno = errno;
2200                 goto error;
2201         }
2202         fdq->ind_table = mlx5_glue->create_rwq_ind_table
2203                 (priv->ctx,
2204                  &(struct ibv_rwq_ind_table_init_attr){
2205                         .log_ind_tbl_size = 0,
2206                         .ind_tbl = &fdq->wq,
2207                         .comp_mask = 0,
2208                  });
2209         if (!fdq->ind_table) {
2210                 DRV_LOG(WARNING,
2211                         "port %u cannot allocate indirection table for drop"
2212                         " queue",
2213                         dev->data->port_id);
2214                 rte_errno = errno;
2215                 goto error;
2216         }
2217         fdq->qp = mlx5_glue->create_qp_ex
2218                 (priv->ctx,
2219                  &(struct ibv_qp_init_attr_ex){
2220                         .qp_type = IBV_QPT_RAW_PACKET,
2221                         .comp_mask =
2222                                 IBV_QP_INIT_ATTR_PD |
2223                                 IBV_QP_INIT_ATTR_IND_TABLE |
2224                                 IBV_QP_INIT_ATTR_RX_HASH,
2225                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2226                                 .rx_hash_function =
2227                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2228                                 .rx_hash_key_len = rss_hash_default_key_len,
2229                                 .rx_hash_key = rss_hash_default_key,
2230                                 .rx_hash_fields_mask = 0,
2231                                 },
2232                         .rwq_ind_tbl = fdq->ind_table,
2233                         .pd = priv->pd
2234                  });
2235         if (!fdq->qp) {
2236                 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2237                         dev->data->port_id);
2238                 rte_errno = errno;
2239                 goto error;
2240         }
2241         priv->flow_drop_queue = fdq;
2242         return 0;
2243 error:
2244         if (fdq->qp)
2245                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2246         if (fdq->ind_table)
2247                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2248         if (fdq->wq)
2249                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2250         if (fdq->cq)
2251                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2252         if (fdq)
2253                 rte_free(fdq);
2254         priv->flow_drop_queue = NULL;
2255         return -rte_errno;
2256 }
2257
2258 /**
2259  * Delete drop queue.
2260  *
2261  * @param dev
2262  *   Pointer to Ethernet device.
2263  */
2264 void
2265 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2266 {
2267         struct priv *priv = dev->data->dev_private;
2268         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2269
2270         if (!fdq)
2271                 return;
2272         if (fdq->qp)
2273                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2274         if (fdq->ind_table)
2275                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2276         if (fdq->wq)
2277                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2278         if (fdq->cq)
2279                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2280         rte_free(fdq);
2281         priv->flow_drop_queue = NULL;
2282 }
2283
2284 /**
2285  * Remove all flows.
2286  *
2287  * @param dev
2288  *   Pointer to Ethernet device.
2289  * @param list
2290  *   Pointer to a TAILQ flow list.
2291  */
2292 void
2293 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2294 {
2295         struct priv *priv = dev->data->dev_private;
2296         struct rte_flow *flow;
2297
2298         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2299                 unsigned int i;
2300                 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2301
2302                 if (flow->drop) {
2303                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2304                                 continue;
2305                         claim_zero(mlx5_glue->destroy_flow
2306                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2307                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2308                         DRV_LOG(DEBUG, "port %u flow %p removed",
2309                                 dev->data->port_id, (void *)flow);
2310                         /* Next flow. */
2311                         continue;
2312                 }
2313                 /* Verify the flow has not already been cleaned. */
2314                 for (i = 0; i != hash_rxq_init_n; ++i) {
2315                         if (!flow->frxq[i].ibv_flow)
2316                                 continue;
2317                         /*
2318                          * Indirection table may be necessary to remove the
2319                          * flags in the Rx queues.
2320                          * This helps to speed-up the process by avoiding
2321                          * another loop.
2322                          */
2323                         ind_tbl = flow->frxq[i].hrxq->ind_table;
2324                         break;
2325                 }
2326                 if (i == hash_rxq_init_n)
2327                         return;
2328                 if (flow->mark) {
2329                         assert(ind_tbl);
2330                         for (i = 0; i != ind_tbl->queues_n; ++i)
2331                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2332                 }
2333                 for (i = 0; i != hash_rxq_init_n; ++i) {
2334                         if (!flow->frxq[i].ibv_flow)
2335                                 continue;
2336                         claim_zero(mlx5_glue->destroy_flow
2337                                    (flow->frxq[i].ibv_flow));
2338                         flow->frxq[i].ibv_flow = NULL;
2339                         mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2340                         flow->frxq[i].hrxq = NULL;
2341                 }
2342                 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2343                         (void *)flow);
2344         }
2345 }
2346
2347 /**
2348  * Add all flows.
2349  *
2350  * @param dev
2351  *   Pointer to Ethernet device.
2352  * @param list
2353  *   Pointer to a TAILQ flow list.
2354  *
2355  * @return
2356  *   0 on success, a negative errno value otherwise and rte_errno is set.
2357  */
2358 int
2359 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2360 {
2361         struct priv *priv = dev->data->dev_private;
2362         struct rte_flow *flow;
2363
2364         TAILQ_FOREACH(flow, list, next) {
2365                 unsigned int i;
2366
2367                 if (flow->drop) {
2368                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2369                                 mlx5_glue->create_flow
2370                                 (priv->flow_drop_queue->qp,
2371                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2372                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2373                                 DRV_LOG(DEBUG,
2374                                         "port %u flow %p cannot be applied",
2375                                         dev->data->port_id, (void *)flow);
2376                                 rte_errno = EINVAL;
2377                                 return -rte_errno;
2378                         }
2379                         DRV_LOG(DEBUG, "port %u flow %p applied",
2380                                 dev->data->port_id, (void *)flow);
2381                         /* Next flow. */
2382                         continue;
2383                 }
2384                 for (i = 0; i != hash_rxq_init_n; ++i) {
2385                         if (!flow->frxq[i].ibv_attr)
2386                                 continue;
2387                         flow->frxq[i].hrxq =
2388                                 mlx5_hrxq_get(dev, flow->rss_conf.key,
2389                                               flow->rss_conf.key_len,
2390                                               hash_rxq_init[i].hash_fields,
2391                                               flow->rss_conf.queue,
2392                                               flow->rss_conf.queue_num);
2393                         if (flow->frxq[i].hrxq)
2394                                 goto flow_create;
2395                         flow->frxq[i].hrxq =
2396                                 mlx5_hrxq_new(dev, flow->rss_conf.key,
2397                                               flow->rss_conf.key_len,
2398                                               hash_rxq_init[i].hash_fields,
2399                                               flow->rss_conf.queue,
2400                                               flow->rss_conf.queue_num);
2401                         if (!flow->frxq[i].hrxq) {
2402                                 DRV_LOG(DEBUG,
2403                                         "port %u flow %p cannot be applied",
2404                                         dev->data->port_id, (void *)flow);
2405                                 rte_errno = EINVAL;
2406                                 return -rte_errno;
2407                         }
2408 flow_create:
2409                         flow->frxq[i].ibv_flow =
2410                                 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2411                                                        flow->frxq[i].ibv_attr);
2412                         if (!flow->frxq[i].ibv_flow) {
2413                                 DRV_LOG(DEBUG,
2414                                         "port %u flow %p cannot be applied",
2415                                         dev->data->port_id, (void *)flow);
2416                                 rte_errno = EINVAL;
2417                                 return -rte_errno;
2418                         }
2419                         DRV_LOG(DEBUG, "port %u flow %p applied",
2420                                 dev->data->port_id, (void *)flow);
2421                 }
2422                 if (!flow->mark)
2423                         continue;
2424                 for (i = 0; i != flow->rss_conf.queue_num; ++i)
2425                         (*priv->rxqs)[flow->rss_conf.queue[i]]->mark = 1;
2426         }
2427         return 0;
2428 }
2429
2430 /**
2431  * Verify the flow list is empty
2432  *
2433  * @param dev
2434  *  Pointer to Ethernet device.
2435  *
2436  * @return the number of flows not released.
2437  */
2438 int
2439 mlx5_flow_verify(struct rte_eth_dev *dev)
2440 {
2441         struct priv *priv = dev->data->dev_private;
2442         struct rte_flow *flow;
2443         int ret = 0;
2444
2445         TAILQ_FOREACH(flow, &priv->flows, next) {
2446                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2447                         dev->data->port_id, (void *)flow);
2448                 ++ret;
2449         }
2450         return ret;
2451 }
2452
2453 /**
2454  * Enable a control flow configured from the control plane.
2455  *
2456  * @param dev
2457  *   Pointer to Ethernet device.
2458  * @param eth_spec
2459  *   An Ethernet flow spec to apply.
2460  * @param eth_mask
2461  *   An Ethernet flow mask to apply.
2462  * @param vlan_spec
2463  *   A VLAN flow spec to apply.
2464  * @param vlan_mask
2465  *   A VLAN flow mask to apply.
2466  *
2467  * @return
2468  *   0 on success, a negative errno value otherwise and rte_errno is set.
2469  */
2470 int
2471 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2472                     struct rte_flow_item_eth *eth_spec,
2473                     struct rte_flow_item_eth *eth_mask,
2474                     struct rte_flow_item_vlan *vlan_spec,
2475                     struct rte_flow_item_vlan *vlan_mask)
2476 {
2477         struct priv *priv = dev->data->dev_private;
2478         const struct rte_flow_attr attr = {
2479                 .ingress = 1,
2480                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2481         };
2482         struct rte_flow_item items[] = {
2483                 {
2484                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2485                         .spec = eth_spec,
2486                         .last = NULL,
2487                         .mask = eth_mask,
2488                 },
2489                 {
2490                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2491                                 RTE_FLOW_ITEM_TYPE_END,
2492                         .spec = vlan_spec,
2493                         .last = NULL,
2494                         .mask = vlan_mask,
2495                 },
2496                 {
2497                         .type = RTE_FLOW_ITEM_TYPE_END,
2498                 },
2499         };
2500         uint16_t queue[priv->reta_idx_n];
2501         struct rte_flow_action_rss action_rss = {
2502                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2503                 .level = 0,
2504                 .types = priv->rss_conf.rss_hf,
2505                 .key_len = priv->rss_conf.rss_key_len,
2506                 .queue_num = priv->reta_idx_n,
2507                 .key = priv->rss_conf.rss_key,
2508                 .queue = queue,
2509         };
2510         struct rte_flow_action actions[] = {
2511                 {
2512                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2513                         .conf = &action_rss,
2514                 },
2515                 {
2516                         .type = RTE_FLOW_ACTION_TYPE_END,
2517                 },
2518         };
2519         struct rte_flow *flow;
2520         struct rte_flow_error error;
2521         unsigned int i;
2522
2523         if (!priv->reta_idx_n) {
2524                 rte_errno = EINVAL;
2525                 return -rte_errno;
2526         }
2527         for (i = 0; i != priv->reta_idx_n; ++i)
2528                 queue[i] = (*priv->reta_idx)[i];
2529         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2530                                      actions, &error);
2531         if (!flow)
2532                 return -rte_errno;
2533         return 0;
2534 }
2535
2536 /**
2537  * Enable a flow control configured from the control plane.
2538  *
2539  * @param dev
2540  *   Pointer to Ethernet device.
2541  * @param eth_spec
2542  *   An Ethernet flow spec to apply.
2543  * @param eth_mask
2544  *   An Ethernet flow mask to apply.
2545  *
2546  * @return
2547  *   0 on success, a negative errno value otherwise and rte_errno is set.
2548  */
2549 int
2550 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2551                struct rte_flow_item_eth *eth_spec,
2552                struct rte_flow_item_eth *eth_mask)
2553 {
2554         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2555 }
2556
2557 /**
2558  * Destroy a flow.
2559  *
2560  * @see rte_flow_destroy()
2561  * @see rte_flow_ops
2562  */
2563 int
2564 mlx5_flow_destroy(struct rte_eth_dev *dev,
2565                   struct rte_flow *flow,
2566                   struct rte_flow_error *error __rte_unused)
2567 {
2568         struct priv *priv = dev->data->dev_private;
2569
2570         mlx5_flow_list_destroy(dev, &priv->flows, flow);
2571         return 0;
2572 }
2573
2574 /**
2575  * Destroy all flows.
2576  *
2577  * @see rte_flow_flush()
2578  * @see rte_flow_ops
2579  */
2580 int
2581 mlx5_flow_flush(struct rte_eth_dev *dev,
2582                 struct rte_flow_error *error __rte_unused)
2583 {
2584         struct priv *priv = dev->data->dev_private;
2585
2586         mlx5_flow_list_flush(dev, &priv->flows);
2587         return 0;
2588 }
2589
2590 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2591 /**
2592  * Query flow counter.
2593  *
2594  * @param cs
2595  *   the counter set.
2596  * @param counter_value
2597  *   returned data from the counter.
2598  *
2599  * @return
2600  *   0 on success, a negative errno value otherwise and rte_errno is set.
2601  */
2602 static int
2603 mlx5_flow_query_count(struct ibv_counter_set *cs,
2604                       struct mlx5_flow_counter_stats *counter_stats,
2605                       struct rte_flow_query_count *query_count,
2606                       struct rte_flow_error *error)
2607 {
2608         uint64_t counters[2];
2609         struct ibv_query_counter_set_attr query_cs_attr = {
2610                 .cs = cs,
2611                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2612         };
2613         struct ibv_counter_set_data query_out = {
2614                 .out = counters,
2615                 .outlen = 2 * sizeof(uint64_t),
2616         };
2617         int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2618
2619         if (err)
2620                 return rte_flow_error_set(error, err,
2621                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2622                                           NULL,
2623                                           "cannot read counter");
2624         query_count->hits_set = 1;
2625         query_count->bytes_set = 1;
2626         query_count->hits = counters[0] - counter_stats->hits;
2627         query_count->bytes = counters[1] - counter_stats->bytes;
2628         if (query_count->reset) {
2629                 counter_stats->hits = counters[0];
2630                 counter_stats->bytes = counters[1];
2631         }
2632         return 0;
2633 }
2634
2635 /**
2636  * Query a flows.
2637  *
2638  * @see rte_flow_query()
2639  * @see rte_flow_ops
2640  */
2641 int
2642 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2643                 struct rte_flow *flow,
2644                 enum rte_flow_action_type action __rte_unused,
2645                 void *data,
2646                 struct rte_flow_error *error)
2647 {
2648         if (flow->cs) {
2649                 int ret;
2650
2651                 ret = mlx5_flow_query_count(flow->cs,
2652                                             &flow->counter_stats,
2653                                             (struct rte_flow_query_count *)data,
2654                                             error);
2655                 if (ret)
2656                         return ret;
2657         } else {
2658                 return rte_flow_error_set(error, EINVAL,
2659                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2660                                           NULL,
2661                                           "no counter found for flow");
2662         }
2663         return 0;
2664 }
2665 #endif
2666
2667 /**
2668  * Isolated mode.
2669  *
2670  * @see rte_flow_isolate()
2671  * @see rte_flow_ops
2672  */
2673 int
2674 mlx5_flow_isolate(struct rte_eth_dev *dev,
2675                   int enable,
2676                   struct rte_flow_error *error)
2677 {
2678         struct priv *priv = dev->data->dev_private;
2679
2680         if (dev->data->dev_started) {
2681                 rte_flow_error_set(error, EBUSY,
2682                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2683                                    NULL,
2684                                    "port must be stopped first");
2685                 return -rte_errno;
2686         }
2687         priv->isolated = !!enable;
2688         if (enable)
2689                 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2690         else
2691                 priv->dev->dev_ops = &mlx5_dev_ops;
2692         return 0;
2693 }
2694
2695 /**
2696  * Convert a flow director filter to a generic flow.
2697  *
2698  * @param dev
2699  *   Pointer to Ethernet device.
2700  * @param fdir_filter
2701  *   Flow director filter to add.
2702  * @param attributes
2703  *   Generic flow parameters structure.
2704  *
2705  * @return
2706  *   0 on success, a negative errno value otherwise and rte_errno is set.
2707  */
2708 static int
2709 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2710                          const struct rte_eth_fdir_filter *fdir_filter,
2711                          struct mlx5_fdir *attributes)
2712 {
2713         struct priv *priv = dev->data->dev_private;
2714         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2715         const struct rte_eth_fdir_masks *mask =
2716                 &dev->data->dev_conf.fdir_conf.mask;
2717
2718         /* Validate queue number. */
2719         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2720                 DRV_LOG(ERR, "port %u invalid queue number %d",
2721                         dev->data->port_id, fdir_filter->action.rx_queue);
2722                 rte_errno = EINVAL;
2723                 return -rte_errno;
2724         }
2725         attributes->attr.ingress = 1;
2726         attributes->items[0] = (struct rte_flow_item) {
2727                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2728                 .spec = &attributes->l2,
2729                 .mask = &attributes->l2_mask,
2730         };
2731         switch (fdir_filter->action.behavior) {
2732         case RTE_ETH_FDIR_ACCEPT:
2733                 attributes->actions[0] = (struct rte_flow_action){
2734                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2735                         .conf = &attributes->queue,
2736                 };
2737                 break;
2738         case RTE_ETH_FDIR_REJECT:
2739                 attributes->actions[0] = (struct rte_flow_action){
2740                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2741                 };
2742                 break;
2743         default:
2744                 DRV_LOG(ERR, "port %u invalid behavior %d",
2745                         dev->data->port_id,
2746                         fdir_filter->action.behavior);
2747                 rte_errno = ENOTSUP;
2748                 return -rte_errno;
2749         }
2750         attributes->queue.index = fdir_filter->action.rx_queue;
2751         /* Handle L3. */
2752         switch (fdir_filter->input.flow_type) {
2753         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2754         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2755         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2756                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2757                         .src_addr = input->flow.ip4_flow.src_ip,
2758                         .dst_addr = input->flow.ip4_flow.dst_ip,
2759                         .time_to_live = input->flow.ip4_flow.ttl,
2760                         .type_of_service = input->flow.ip4_flow.tos,
2761                         .next_proto_id = input->flow.ip4_flow.proto,
2762                 };
2763                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2764                         .src_addr = mask->ipv4_mask.src_ip,
2765                         .dst_addr = mask->ipv4_mask.dst_ip,
2766                         .time_to_live = mask->ipv4_mask.ttl,
2767                         .type_of_service = mask->ipv4_mask.tos,
2768                         .next_proto_id = mask->ipv4_mask.proto,
2769                 };
2770                 attributes->items[1] = (struct rte_flow_item){
2771                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2772                         .spec = &attributes->l3,
2773                         .mask = &attributes->l3_mask,
2774                 };
2775                 break;
2776         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2777         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2778         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2779                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2780                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2781                         .proto = input->flow.ipv6_flow.proto,
2782                 };
2783
2784                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2785                        input->flow.ipv6_flow.src_ip,
2786                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2787                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2788                        input->flow.ipv6_flow.dst_ip,
2789                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2790                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2791                        mask->ipv6_mask.src_ip,
2792                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2793                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2794                        mask->ipv6_mask.dst_ip,
2795                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2796                 attributes->items[1] = (struct rte_flow_item){
2797                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2798                         .spec = &attributes->l3,
2799                         .mask = &attributes->l3_mask,
2800                 };
2801                 break;
2802         default:
2803                 DRV_LOG(ERR, "port %u invalid flow type%d",
2804                         dev->data->port_id, fdir_filter->input.flow_type);
2805                 rte_errno = ENOTSUP;
2806                 return -rte_errno;
2807         }
2808         /* Handle L4. */
2809         switch (fdir_filter->input.flow_type) {
2810         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2811                 attributes->l4.udp.hdr = (struct udp_hdr){
2812                         .src_port = input->flow.udp4_flow.src_port,
2813                         .dst_port = input->flow.udp4_flow.dst_port,
2814                 };
2815                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2816                         .src_port = mask->src_port_mask,
2817                         .dst_port = mask->dst_port_mask,
2818                 };
2819                 attributes->items[2] = (struct rte_flow_item){
2820                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2821                         .spec = &attributes->l4,
2822                         .mask = &attributes->l4_mask,
2823                 };
2824                 break;
2825         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2826                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2827                         .src_port = input->flow.tcp4_flow.src_port,
2828                         .dst_port = input->flow.tcp4_flow.dst_port,
2829                 };
2830                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2831                         .src_port = mask->src_port_mask,
2832                         .dst_port = mask->dst_port_mask,
2833                 };
2834                 attributes->items[2] = (struct rte_flow_item){
2835                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2836                         .spec = &attributes->l4,
2837                         .mask = &attributes->l4_mask,
2838                 };
2839                 break;
2840         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2841                 attributes->l4.udp.hdr = (struct udp_hdr){
2842                         .src_port = input->flow.udp6_flow.src_port,
2843                         .dst_port = input->flow.udp6_flow.dst_port,
2844                 };
2845                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2846                         .src_port = mask->src_port_mask,
2847                         .dst_port = mask->dst_port_mask,
2848                 };
2849                 attributes->items[2] = (struct rte_flow_item){
2850                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2851                         .spec = &attributes->l4,
2852                         .mask = &attributes->l4_mask,
2853                 };
2854                 break;
2855         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2856                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2857                         .src_port = input->flow.tcp6_flow.src_port,
2858                         .dst_port = input->flow.tcp6_flow.dst_port,
2859                 };
2860                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2861                         .src_port = mask->src_port_mask,
2862                         .dst_port = mask->dst_port_mask,
2863                 };
2864                 attributes->items[2] = (struct rte_flow_item){
2865                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2866                         .spec = &attributes->l4,
2867                         .mask = &attributes->l4_mask,
2868                 };
2869                 break;
2870         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2871         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2872                 break;
2873         default:
2874                 DRV_LOG(ERR, "port %u invalid flow type%d",
2875                         dev->data->port_id, fdir_filter->input.flow_type);
2876                 rte_errno = ENOTSUP;
2877                 return -rte_errno;
2878         }
2879         return 0;
2880 }
2881
2882 /**
2883  * Add new flow director filter and store it in list.
2884  *
2885  * @param dev
2886  *   Pointer to Ethernet device.
2887  * @param fdir_filter
2888  *   Flow director filter to add.
2889  *
2890  * @return
2891  *   0 on success, a negative errno value otherwise and rte_errno is set.
2892  */
2893 static int
2894 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2895                      const struct rte_eth_fdir_filter *fdir_filter)
2896 {
2897         struct priv *priv = dev->data->dev_private;
2898         struct mlx5_fdir attributes = {
2899                 .attr.group = 0,
2900                 .l2_mask = {
2901                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2902                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2903                         .type = 0,
2904                 },
2905         };
2906         struct mlx5_flow_parse parser = {
2907                 .layer = HASH_RXQ_ETH,
2908         };
2909         struct rte_flow_error error;
2910         struct rte_flow *flow;
2911         int ret;
2912
2913         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2914         if (ret)
2915                 return ret;
2916         ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2917                                 attributes.actions, &error, &parser);
2918         if (ret)
2919                 return ret;
2920         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2921                                      attributes.items, attributes.actions,
2922                                      &error);
2923         if (flow) {
2924                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2925                         (void *)flow);
2926                 return 0;
2927         }
2928         return -rte_errno;
2929 }
2930
2931 /**
2932  * Delete specific filter.
2933  *
2934  * @param dev
2935  *   Pointer to Ethernet device.
2936  * @param fdir_filter
2937  *   Filter to be deleted.
2938  *
2939  * @return
2940  *   0 on success, a negative errno value otherwise and rte_errno is set.
2941  */
2942 static int
2943 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2944                         const struct rte_eth_fdir_filter *fdir_filter)
2945 {
2946         struct priv *priv = dev->data->dev_private;
2947         struct mlx5_fdir attributes = {
2948                 .attr.group = 0,
2949         };
2950         struct mlx5_flow_parse parser = {
2951                 .create = 1,
2952                 .layer = HASH_RXQ_ETH,
2953         };
2954         struct rte_flow_error error;
2955         struct rte_flow *flow;
2956         unsigned int i;
2957         int ret;
2958
2959         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2960         if (ret)
2961                 return ret;
2962         ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2963                                 attributes.actions, &error, &parser);
2964         if (ret)
2965                 goto exit;
2966         /*
2967          * Special case for drop action which is only set in the
2968          * specifications when the flow is created.  In this situation the
2969          * drop specification is missing.
2970          */
2971         if (parser.drop) {
2972                 struct ibv_flow_spec_action_drop *drop;
2973
2974                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2975                                 parser.queue[HASH_RXQ_ETH].offset);
2976                 *drop = (struct ibv_flow_spec_action_drop){
2977                         .type = IBV_FLOW_SPEC_ACTION_DROP,
2978                         .size = sizeof(struct ibv_flow_spec_action_drop),
2979                 };
2980                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2981         }
2982         TAILQ_FOREACH(flow, &priv->flows, next) {
2983                 struct ibv_flow_attr *attr;
2984                 struct ibv_spec_header *attr_h;
2985                 void *spec;
2986                 struct ibv_flow_attr *flow_attr;
2987                 struct ibv_spec_header *flow_h;
2988                 void *flow_spec;
2989                 unsigned int specs_n;
2990
2991                 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2992                 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2993                 /* Compare first the attributes. */
2994                 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2995                         continue;
2996                 if (attr->num_of_specs == 0)
2997                         continue;
2998                 spec = (void *)((uintptr_t)attr +
2999                                 sizeof(struct ibv_flow_attr));
3000                 flow_spec = (void *)((uintptr_t)flow_attr +
3001                                      sizeof(struct ibv_flow_attr));
3002                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3003                 for (i = 0; i != specs_n; ++i) {
3004                         attr_h = spec;
3005                         flow_h = flow_spec;
3006                         if (memcmp(spec, flow_spec,
3007                                    RTE_MIN(attr_h->size, flow_h->size)))
3008                                 goto wrong_flow;
3009                         spec = (void *)((uintptr_t)spec + attr_h->size);
3010                         flow_spec = (void *)((uintptr_t)flow_spec +
3011                                              flow_h->size);
3012                 }
3013                 /* At this point, the flow match. */
3014                 break;
3015 wrong_flow:
3016                 /* The flow does not match. */
3017                 continue;
3018         }
3019         ret = rte_errno; /* Save rte_errno before cleanup. */
3020         if (flow)
3021                 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3022 exit:
3023         for (i = 0; i != hash_rxq_init_n; ++i) {
3024                 if (parser.queue[i].ibv_attr)
3025                         rte_free(parser.queue[i].ibv_attr);
3026         }
3027         rte_errno = ret; /* Restore rte_errno. */
3028         return -rte_errno;
3029 }
3030
3031 /**
3032  * Update queue for specific filter.
3033  *
3034  * @param dev
3035  *   Pointer to Ethernet device.
3036  * @param fdir_filter
3037  *   Filter to be updated.
3038  *
3039  * @return
3040  *   0 on success, a negative errno value otherwise and rte_errno is set.
3041  */
3042 static int
3043 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3044                         const struct rte_eth_fdir_filter *fdir_filter)
3045 {
3046         int ret;
3047
3048         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3049         if (ret)
3050                 return ret;
3051         return mlx5_fdir_filter_add(dev, fdir_filter);
3052 }
3053
3054 /**
3055  * Flush all filters.
3056  *
3057  * @param dev
3058  *   Pointer to Ethernet device.
3059  */
3060 static void
3061 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3062 {
3063         struct priv *priv = dev->data->dev_private;
3064
3065         mlx5_flow_list_flush(dev, &priv->flows);
3066 }
3067
3068 /**
3069  * Get flow director information.
3070  *
3071  * @param dev
3072  *   Pointer to Ethernet device.
3073  * @param[out] fdir_info
3074  *   Resulting flow director information.
3075  */
3076 static void
3077 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3078 {
3079         struct priv *priv = dev->data->dev_private;
3080         struct rte_eth_fdir_masks *mask =
3081                 &priv->dev->data->dev_conf.fdir_conf.mask;
3082
3083         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3084         fdir_info->guarant_spc = 0;
3085         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3086         fdir_info->max_flexpayload = 0;
3087         fdir_info->flow_types_mask[0] = 0;
3088         fdir_info->flex_payload_unit = 0;
3089         fdir_info->max_flex_payload_segment_num = 0;
3090         fdir_info->flex_payload_limit = 0;
3091         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3092 }
3093
3094 /**
3095  * Deal with flow director operations.
3096  *
3097  * @param dev
3098  *   Pointer to Ethernet device.
3099  * @param filter_op
3100  *   Operation to perform.
3101  * @param arg
3102  *   Pointer to operation-specific structure.
3103  *
3104  * @return
3105  *   0 on success, a negative errno value otherwise and rte_errno is set.
3106  */
3107 static int
3108 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3109                     void *arg)
3110 {
3111         struct priv *priv = dev->data->dev_private;
3112         enum rte_fdir_mode fdir_mode =
3113                 priv->dev->data->dev_conf.fdir_conf.mode;
3114
3115         if (filter_op == RTE_ETH_FILTER_NOP)
3116                 return 0;
3117         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3118             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3119                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3120                         dev->data->port_id, fdir_mode);
3121                 rte_errno = EINVAL;
3122                 return -rte_errno;
3123         }
3124         switch (filter_op) {
3125         case RTE_ETH_FILTER_ADD:
3126                 return mlx5_fdir_filter_add(dev, arg);
3127         case RTE_ETH_FILTER_UPDATE:
3128                 return mlx5_fdir_filter_update(dev, arg);
3129         case RTE_ETH_FILTER_DELETE:
3130                 return mlx5_fdir_filter_delete(dev, arg);
3131         case RTE_ETH_FILTER_FLUSH:
3132                 mlx5_fdir_filter_flush(dev);
3133                 break;
3134         case RTE_ETH_FILTER_INFO:
3135                 mlx5_fdir_info_get(dev, arg);
3136                 break;
3137         default:
3138                 DRV_LOG(DEBUG, "port %u unknown operation %u",
3139                         dev->data->port_id, filter_op);
3140                 rte_errno = EINVAL;
3141                 return -rte_errno;
3142         }
3143         return 0;
3144 }
3145
3146 /**
3147  * Manage filter operations.
3148  *
3149  * @param dev
3150  *   Pointer to Ethernet device structure.
3151  * @param filter_type
3152  *   Filter type.
3153  * @param filter_op
3154  *   Operation to perform.
3155  * @param arg
3156  *   Pointer to operation-specific structure.
3157  *
3158  * @return
3159  *   0 on success, a negative errno value otherwise and rte_errno is set.
3160  */
3161 int
3162 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3163                      enum rte_filter_type filter_type,
3164                      enum rte_filter_op filter_op,
3165                      void *arg)
3166 {
3167         switch (filter_type) {
3168         case RTE_ETH_FILTER_GENERIC:
3169                 if (filter_op != RTE_ETH_FILTER_GET) {
3170                         rte_errno = EINVAL;
3171                         return -rte_errno;
3172                 }
3173                 *(const void **)arg = &mlx5_flow_ops;
3174                 return 0;
3175         case RTE_ETH_FILTER_FDIR:
3176                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3177         default:
3178                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3179                         dev->data->port_id, filter_type);
3180                 rte_errno = ENOTSUP;
3181                 return -rte_errno;
3182         }
3183         return 0;
3184 }
3185
3186 /**
3187  * Detect number of Verbs flow priorities supported.
3188  *
3189  * @param dev
3190  *   Pointer to Ethernet device.
3191  *
3192  * @return
3193  *   number of supported Verbs flow priority.
3194  */
3195 unsigned int
3196 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3197 {
3198         struct priv *priv = dev->data->dev_private;
3199         unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3200         struct {
3201                 struct ibv_flow_attr attr;
3202                 struct ibv_flow_spec_eth eth;
3203                 struct ibv_flow_spec_action_drop drop;
3204         } flow_attr = {
3205                 .attr = {
3206                         .num_of_specs = 2,
3207                 },
3208                 .eth = {
3209                         .type = IBV_FLOW_SPEC_ETH,
3210                         .size = sizeof(struct ibv_flow_spec_eth),
3211                 },
3212                 .drop = {
3213                         .size = sizeof(struct ibv_flow_spec_action_drop),
3214                         .type = IBV_FLOW_SPEC_ACTION_DROP,
3215                 },
3216         };
3217         struct ibv_flow *flow;
3218
3219         do {
3220                 flow_attr.attr.priority = verb_priorities - 1;
3221                 flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3222                                               &flow_attr.attr);
3223                 if (flow) {
3224                         claim_zero(mlx5_glue->destroy_flow(flow));
3225                         /* Try more priorities. */
3226                         verb_priorities *= 2;
3227                 } else {
3228                         /* Failed, restore last right number. */
3229                         verb_priorities /= 2;
3230                         break;
3231                 }
3232         } while (1);
3233         DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3234                 " user flow priorities: %d",
3235                 dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3236         return verb_priorities;
3237 }