net/mlx5: fix flow director drop rule deletion crash
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
36
37 /* Internet Protocol versions. */
38 #define MLX5_IPV4 4
39 #define MLX5_IPV6 6
40 #define MLX5_GRE 47
41
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
44         int dummy;
45 };
46 #endif
47
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
51
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54         struct rte_eth_dev *dev; /** Ethernet device. */
55         struct mlx5_flow_parse *parser; /** Parser context. */
56         struct rte_flow_error *error; /** Error context. */
57 };
58
59 static int
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61                      const void *default_mask,
62                      struct mlx5_flow_data *data);
63
64 static int
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66                       const void *default_mask,
67                       struct mlx5_flow_data *data);
68
69 static int
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71                       const void *default_mask,
72                       struct mlx5_flow_data *data);
73
74 static int
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76                       const void *default_mask,
77                       struct mlx5_flow_data *data);
78
79 static int
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81                      const void *default_mask,
82                      struct mlx5_flow_data *data);
83
84 static int
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86                      const void *default_mask,
87                      struct mlx5_flow_data *data);
88
89 static int
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91                        const void *default_mask,
92                        struct mlx5_flow_data *data);
93
94 static int
95 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
96                            const void *default_mask,
97                            struct mlx5_flow_data *data);
98
99 static int
100 mlx5_flow_create_gre(const struct rte_flow_item *item,
101                      const void *default_mask,
102                      struct mlx5_flow_data *data);
103
104 struct mlx5_flow_parse;
105
106 static void
107 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
108                       unsigned int size);
109
110 static int
111 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
112
113 static int
114 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
115
116 /* Hash RX queue types. */
117 enum hash_rxq_type {
118         HASH_RXQ_TCPV4,
119         HASH_RXQ_UDPV4,
120         HASH_RXQ_IPV4,
121         HASH_RXQ_TCPV6,
122         HASH_RXQ_UDPV6,
123         HASH_RXQ_IPV6,
124         HASH_RXQ_ETH,
125         HASH_RXQ_TUNNEL,
126 };
127
128 /* Initialization data for hash RX queue. */
129 struct hash_rxq_init {
130         uint64_t hash_fields; /* Fields that participate in the hash. */
131         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
132         unsigned int flow_priority; /* Flow priority to use. */
133         unsigned int ip_version; /* Internet protocol. */
134 };
135
136 /* Initialization data for hash RX queues. */
137 const struct hash_rxq_init hash_rxq_init[] = {
138         [HASH_RXQ_TCPV4] = {
139                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
140                                 IBV_RX_HASH_DST_IPV4 |
141                                 IBV_RX_HASH_SRC_PORT_TCP |
142                                 IBV_RX_HASH_DST_PORT_TCP),
143                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
144                 .flow_priority = 0,
145                 .ip_version = MLX5_IPV4,
146         },
147         [HASH_RXQ_UDPV4] = {
148                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
149                                 IBV_RX_HASH_DST_IPV4 |
150                                 IBV_RX_HASH_SRC_PORT_UDP |
151                                 IBV_RX_HASH_DST_PORT_UDP),
152                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
153                 .flow_priority = 0,
154                 .ip_version = MLX5_IPV4,
155         },
156         [HASH_RXQ_IPV4] = {
157                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
158                                 IBV_RX_HASH_DST_IPV4),
159                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
160                                 ETH_RSS_FRAG_IPV4),
161                 .flow_priority = 1,
162                 .ip_version = MLX5_IPV4,
163         },
164         [HASH_RXQ_TCPV6] = {
165                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
166                                 IBV_RX_HASH_DST_IPV6 |
167                                 IBV_RX_HASH_SRC_PORT_TCP |
168                                 IBV_RX_HASH_DST_PORT_TCP),
169                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
170                 .flow_priority = 0,
171                 .ip_version = MLX5_IPV6,
172         },
173         [HASH_RXQ_UDPV6] = {
174                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
175                                 IBV_RX_HASH_DST_IPV6 |
176                                 IBV_RX_HASH_SRC_PORT_UDP |
177                                 IBV_RX_HASH_DST_PORT_UDP),
178                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
179                 .flow_priority = 0,
180                 .ip_version = MLX5_IPV6,
181         },
182         [HASH_RXQ_IPV6] = {
183                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
184                                 IBV_RX_HASH_DST_IPV6),
185                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
186                                 ETH_RSS_FRAG_IPV6),
187                 .flow_priority = 1,
188                 .ip_version = MLX5_IPV6,
189         },
190         [HASH_RXQ_ETH] = {
191                 .hash_fields = 0,
192                 .dpdk_rss_hf = 0,
193                 .flow_priority = 2,
194         },
195 };
196
197 /* Number of entries in hash_rxq_init[]. */
198 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
199
200 /** Structure for holding counter stats. */
201 struct mlx5_flow_counter_stats {
202         uint64_t hits; /**< Number of packets matched by the rule. */
203         uint64_t bytes; /**< Number of bytes matched by the rule. */
204 };
205
206 /** Structure for Drop queue. */
207 struct mlx5_hrxq_drop {
208         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
209         struct ibv_qp *qp; /**< Verbs queue pair. */
210         struct ibv_wq *wq; /**< Verbs work queue. */
211         struct ibv_cq *cq; /**< Verbs completion queue. */
212 };
213
214 /* Flows structures. */
215 struct mlx5_flow {
216         uint64_t hash_fields; /**< Fields that participate in the hash. */
217         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
218         struct ibv_flow *ibv_flow; /**< Verbs flow. */
219         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
220 };
221
222 /* Drop flows structures. */
223 struct mlx5_flow_drop {
224         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
225         struct ibv_flow *ibv_flow; /**< Verbs flow. */
226 };
227
228 struct rte_flow {
229         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
230         uint32_t mark:1; /**< Set if the flow is marked. */
231         uint32_t drop:1; /**< Drop queue. */
232         struct rte_flow_action_rss rss_conf; /**< RSS configuration */
233         uint16_t (*queues)[]; /**< Queues indexes to use. */
234         uint8_t rss_key[40]; /**< copy of the RSS key. */
235         uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
236         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
237         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
238         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
239         /**< Flow with Rx queue. */
240 };
241
242 /** Static initializer for items. */
243 #define ITEMS(...) \
244         (const enum rte_flow_item_type []){ \
245                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
246         }
247
248 #define IS_TUNNEL(type) ( \
249         (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
250         (type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
251         (type) == RTE_FLOW_ITEM_TYPE_GRE)
252
253 const uint32_t flow_ptype[] = {
254         [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
255         [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
256         [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
257 };
258
259 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
260
261 const uint32_t ptype_ext[] = {
262         [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
263                                               RTE_PTYPE_L4_UDP,
264         [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)] = RTE_PTYPE_TUNNEL_VXLAN_GPE |
265                                                   RTE_PTYPE_L4_UDP,
266         [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
267 };
268
269 /** Structure to generate a simple graph of layers supported by the NIC. */
270 struct mlx5_flow_items {
271         /** List of possible actions for these items. */
272         const enum rte_flow_action_type *const actions;
273         /** Bit-masks corresponding to the possibilities for the item. */
274         const void *mask;
275         /**
276          * Default bit-masks to use when item->mask is not provided. When
277          * \default_mask is also NULL, the full supported bit-mask (\mask) is
278          * used instead.
279          */
280         const void *default_mask;
281         /** Bit-masks size in bytes. */
282         const unsigned int mask_sz;
283         /**
284          * Conversion function from rte_flow to NIC specific flow.
285          *
286          * @param item
287          *   rte_flow item to convert.
288          * @param default_mask
289          *   Default bit-masks to use when item->mask is not provided.
290          * @param data
291          *   Internal structure to store the conversion.
292          *
293          * @return
294          *   0 on success, a negative errno value otherwise and rte_errno is
295          *   set.
296          */
297         int (*convert)(const struct rte_flow_item *item,
298                        const void *default_mask,
299                        struct mlx5_flow_data *data);
300         /** Size in bytes of the destination structure. */
301         const unsigned int dst_sz;
302         /** List of possible following items.  */
303         const enum rte_flow_item_type *const items;
304 };
305
306 /** Valid action for this PMD. */
307 static const enum rte_flow_action_type valid_actions[] = {
308         RTE_FLOW_ACTION_TYPE_DROP,
309         RTE_FLOW_ACTION_TYPE_QUEUE,
310         RTE_FLOW_ACTION_TYPE_MARK,
311         RTE_FLOW_ACTION_TYPE_FLAG,
312 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
313         RTE_FLOW_ACTION_TYPE_COUNT,
314 #endif
315         RTE_FLOW_ACTION_TYPE_END,
316 };
317
318 /** Graph of supported items and associated actions. */
319 static const struct mlx5_flow_items mlx5_flow_items[] = {
320         [RTE_FLOW_ITEM_TYPE_END] = {
321                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
322                                RTE_FLOW_ITEM_TYPE_VXLAN,
323                                RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
324                                RTE_FLOW_ITEM_TYPE_GRE),
325         },
326         [RTE_FLOW_ITEM_TYPE_ETH] = {
327                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
328                                RTE_FLOW_ITEM_TYPE_IPV4,
329                                RTE_FLOW_ITEM_TYPE_IPV6),
330                 .actions = valid_actions,
331                 .mask = &(const struct rte_flow_item_eth){
332                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
333                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
334                         .type = -1,
335                 },
336                 .default_mask = &rte_flow_item_eth_mask,
337                 .mask_sz = sizeof(struct rte_flow_item_eth),
338                 .convert = mlx5_flow_create_eth,
339                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
340         },
341         [RTE_FLOW_ITEM_TYPE_VLAN] = {
342                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
343                                RTE_FLOW_ITEM_TYPE_IPV6),
344                 .actions = valid_actions,
345                 .mask = &(const struct rte_flow_item_vlan){
346                         .tci = -1,
347                         .inner_type = -1,
348                 },
349                 .default_mask = &rte_flow_item_vlan_mask,
350                 .mask_sz = sizeof(struct rte_flow_item_vlan),
351                 .convert = mlx5_flow_create_vlan,
352                 .dst_sz = 0,
353         },
354         [RTE_FLOW_ITEM_TYPE_IPV4] = {
355                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
356                                RTE_FLOW_ITEM_TYPE_TCP,
357                                RTE_FLOW_ITEM_TYPE_GRE),
358                 .actions = valid_actions,
359                 .mask = &(const struct rte_flow_item_ipv4){
360                         .hdr = {
361                                 .src_addr = -1,
362                                 .dst_addr = -1,
363                                 .type_of_service = -1,
364                                 .next_proto_id = -1,
365                         },
366                 },
367                 .default_mask = &rte_flow_item_ipv4_mask,
368                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
369                 .convert = mlx5_flow_create_ipv4,
370                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
371         },
372         [RTE_FLOW_ITEM_TYPE_IPV6] = {
373                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
374                                RTE_FLOW_ITEM_TYPE_TCP,
375                                RTE_FLOW_ITEM_TYPE_GRE),
376                 .actions = valid_actions,
377                 .mask = &(const struct rte_flow_item_ipv6){
378                         .hdr = {
379                                 .src_addr = {
380                                         0xff, 0xff, 0xff, 0xff,
381                                         0xff, 0xff, 0xff, 0xff,
382                                         0xff, 0xff, 0xff, 0xff,
383                                         0xff, 0xff, 0xff, 0xff,
384                                 },
385                                 .dst_addr = {
386                                         0xff, 0xff, 0xff, 0xff,
387                                         0xff, 0xff, 0xff, 0xff,
388                                         0xff, 0xff, 0xff, 0xff,
389                                         0xff, 0xff, 0xff, 0xff,
390                                 },
391                                 .vtc_flow = -1,
392                                 .proto = -1,
393                                 .hop_limits = -1,
394                         },
395                 },
396                 .default_mask = &rte_flow_item_ipv6_mask,
397                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
398                 .convert = mlx5_flow_create_ipv6,
399                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
400         },
401         [RTE_FLOW_ITEM_TYPE_UDP] = {
402                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
403                                RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
404                 .actions = valid_actions,
405                 .mask = &(const struct rte_flow_item_udp){
406                         .hdr = {
407                                 .src_port = -1,
408                                 .dst_port = -1,
409                         },
410                 },
411                 .default_mask = &rte_flow_item_udp_mask,
412                 .mask_sz = sizeof(struct rte_flow_item_udp),
413                 .convert = mlx5_flow_create_udp,
414                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
415         },
416         [RTE_FLOW_ITEM_TYPE_TCP] = {
417                 .actions = valid_actions,
418                 .mask = &(const struct rte_flow_item_tcp){
419                         .hdr = {
420                                 .src_port = -1,
421                                 .dst_port = -1,
422                         },
423                 },
424                 .default_mask = &rte_flow_item_tcp_mask,
425                 .mask_sz = sizeof(struct rte_flow_item_tcp),
426                 .convert = mlx5_flow_create_tcp,
427                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
428         },
429         [RTE_FLOW_ITEM_TYPE_GRE] = {
430                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
431                                RTE_FLOW_ITEM_TYPE_IPV4,
432                                RTE_FLOW_ITEM_TYPE_IPV6),
433                 .actions = valid_actions,
434                 .mask = &(const struct rte_flow_item_gre){
435                         .protocol = -1,
436                 },
437                 .default_mask = &rte_flow_item_gre_mask,
438                 .mask_sz = sizeof(struct rte_flow_item_gre),
439                 .convert = mlx5_flow_create_gre,
440                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
441         },
442         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
443                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
444                                RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
445                                RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
446                 .actions = valid_actions,
447                 .mask = &(const struct rte_flow_item_vxlan){
448                         .vni = "\xff\xff\xff",
449                 },
450                 .default_mask = &rte_flow_item_vxlan_mask,
451                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
452                 .convert = mlx5_flow_create_vxlan,
453                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
454         },
455         [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = {
456                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
457                                RTE_FLOW_ITEM_TYPE_IPV4,
458                                RTE_FLOW_ITEM_TYPE_IPV6),
459                 .actions = valid_actions,
460                 .mask = &(const struct rte_flow_item_vxlan_gpe){
461                         .vni = "\xff\xff\xff",
462                 },
463                 .default_mask = &rte_flow_item_vxlan_gpe_mask,
464                 .mask_sz = sizeof(struct rte_flow_item_vxlan_gpe),
465                 .convert = mlx5_flow_create_vxlan_gpe,
466                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
467         },
468 };
469
470 /** Structure to pass to the conversion function. */
471 struct mlx5_flow_parse {
472         uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
473         uint32_t create:1;
474         /**< Whether resources should remain after a validate. */
475         uint32_t drop:1; /**< Target is a drop queue. */
476         uint32_t mark:1; /**< Mark is present in the flow. */
477         uint32_t count:1; /**< Count is present in the flow. */
478         uint32_t mark_id; /**< Mark identifier. */
479         struct rte_flow_action_rss rss_conf; /**< RSS configuration */
480         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
481         uint8_t rss_key[40]; /**< copy of the RSS key. */
482         enum hash_rxq_type layer; /**< Last pattern layer detected. */
483         enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
484         uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
485         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
486         struct {
487                 struct ibv_flow_attr *ibv_attr;
488                 /**< Pointer to Verbs attributes. */
489                 unsigned int offset;
490                 /**< Current position or total size of the attribute. */
491                 uint64_t hash_fields; /**< Verbs hash fields. */
492         } queue[RTE_DIM(hash_rxq_init)];
493 };
494
495 static const struct rte_flow_ops mlx5_flow_ops = {
496         .validate = mlx5_flow_validate,
497         .create = mlx5_flow_create,
498         .destroy = mlx5_flow_destroy,
499         .flush = mlx5_flow_flush,
500 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
501         .query = mlx5_flow_query,
502 #else
503         .query = NULL,
504 #endif
505         .isolate = mlx5_flow_isolate,
506 };
507
508 /* Convert FDIR request to Generic flow. */
509 struct mlx5_fdir {
510         struct rte_flow_attr attr;
511         struct rte_flow_action actions[2];
512         struct rte_flow_item items[4];
513         struct rte_flow_item_eth l2;
514         struct rte_flow_item_eth l2_mask;
515         union {
516                 struct rte_flow_item_ipv4 ipv4;
517                 struct rte_flow_item_ipv6 ipv6;
518         } l3;
519         union {
520                 struct rte_flow_item_ipv4 ipv4;
521                 struct rte_flow_item_ipv6 ipv6;
522         } l3_mask;
523         union {
524                 struct rte_flow_item_udp udp;
525                 struct rte_flow_item_tcp tcp;
526         } l4;
527         union {
528                 struct rte_flow_item_udp udp;
529                 struct rte_flow_item_tcp tcp;
530         } l4_mask;
531         struct rte_flow_action_queue queue;
532 };
533
534 /* Verbs specification header. */
535 struct ibv_spec_header {
536         enum ibv_flow_spec_type type;
537         uint16_t size;
538 };
539
540 /**
541  * Check item is fully supported by the NIC matching capability.
542  *
543  * @param item[in]
544  *   Item specification.
545  * @param mask[in]
546  *   Bit-masks covering supported fields to compare with spec, last and mask in
547  *   \item.
548  * @param size
549  *   Bit-Mask size in bytes.
550  *
551  * @return
552  *   0 on success, a negative errno value otherwise and rte_errno is set.
553  */
554 static int
555 mlx5_flow_item_validate(const struct rte_flow_item *item,
556                         const uint8_t *mask, unsigned int size)
557 {
558         unsigned int i;
559         const uint8_t *spec = item->spec;
560         const uint8_t *last = item->last;
561         const uint8_t *m = item->mask ? item->mask : mask;
562
563         if (!spec && (item->mask || last))
564                 goto error;
565         if (!spec)
566                 return 0;
567         /*
568          * Single-pass check to make sure that:
569          * - item->mask is supported, no bits are set outside mask.
570          * - Both masked item->spec and item->last are equal (no range
571          *   supported).
572          */
573         for (i = 0; i < size; i++) {
574                 if (!m[i])
575                         continue;
576                 if ((m[i] | mask[i]) != mask[i])
577                         goto error;
578                 if (last && ((spec[i] & m[i]) != (last[i] & m[i])))
579                         goto error;
580         }
581         return 0;
582 error:
583         rte_errno = ENOTSUP;
584         return -rte_errno;
585 }
586
587 /**
588  * Extract attribute to the parser.
589  *
590  * @param[in] attr
591  *   Flow rule attributes.
592  * @param[out] error
593  *   Perform verbose error reporting if not NULL.
594  *
595  * @return
596  *   0 on success, a negative errno value otherwise and rte_errno is set.
597  */
598 static int
599 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
600                              struct rte_flow_error *error)
601 {
602         if (attr->group) {
603                 rte_flow_error_set(error, ENOTSUP,
604                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
605                                    NULL,
606                                    "groups are not supported");
607                 return -rte_errno;
608         }
609         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
610                 rte_flow_error_set(error, ENOTSUP,
611                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
612                                    NULL,
613                                    "priorities are not supported");
614                 return -rte_errno;
615         }
616         if (attr->egress) {
617                 rte_flow_error_set(error, ENOTSUP,
618                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
619                                    NULL,
620                                    "egress is not supported");
621                 return -rte_errno;
622         }
623         if (attr->transfer) {
624                 rte_flow_error_set(error, ENOTSUP,
625                                    RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
626                                    NULL,
627                                    "transfer is not supported");
628                 return -rte_errno;
629         }
630         if (!attr->ingress) {
631                 rte_flow_error_set(error, ENOTSUP,
632                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
633                                    NULL,
634                                    "only ingress is supported");
635                 return -rte_errno;
636         }
637         return 0;
638 }
639
640 /**
641  * Extract actions request to the parser.
642  *
643  * @param dev
644  *   Pointer to Ethernet device.
645  * @param[in] actions
646  *   Associated actions (list terminated by the END action).
647  * @param[out] error
648  *   Perform verbose error reporting if not NULL.
649  * @param[in, out] parser
650  *   Internal parser structure.
651  *
652  * @return
653  *   0 on success, a negative errno value otherwise and rte_errno is set.
654  */
655 static int
656 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
657                           const struct rte_flow_action actions[],
658                           struct rte_flow_error *error,
659                           struct mlx5_flow_parse *parser)
660 {
661         enum { FATE = 1, MARK = 2, COUNT = 4, };
662         uint32_t overlap = 0;
663         struct priv *priv = dev->data->dev_private;
664
665         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
666                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
667                         continue;
668                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
669                         if (overlap & FATE)
670                                 goto exit_action_overlap;
671                         overlap |= FATE;
672                         parser->drop = 1;
673                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
674                         const struct rte_flow_action_queue *queue =
675                                 (const struct rte_flow_action_queue *)
676                                 actions->conf;
677
678                         if (overlap & FATE)
679                                 goto exit_action_overlap;
680                         overlap |= FATE;
681                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
682                                 goto exit_action_not_supported;
683                         parser->queues[0] = queue->index;
684                         parser->rss_conf = (struct rte_flow_action_rss){
685                                 .queue_num = 1,
686                                 .queue = parser->queues,
687                         };
688                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
689                         const struct rte_flow_action_rss *rss =
690                                 (const struct rte_flow_action_rss *)
691                                 actions->conf;
692                         const uint8_t *rss_key;
693                         uint32_t rss_key_len;
694                         uint16_t n;
695
696                         if (overlap & FATE)
697                                 goto exit_action_overlap;
698                         overlap |= FATE;
699                         if (rss->func &&
700                             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
701                                 rte_flow_error_set(error, EINVAL,
702                                                    RTE_FLOW_ERROR_TYPE_ACTION,
703                                                    actions,
704                                                    "the only supported RSS hash"
705                                                    " function is Toeplitz");
706                                 return -rte_errno;
707                         }
708 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
709                         if (parser->rss_conf.level > 1) {
710                                 rte_flow_error_set(error, EINVAL,
711                                                    RTE_FLOW_ERROR_TYPE_ACTION,
712                                                    actions,
713                                                    "a nonzero RSS encapsulation"
714                                                    " level is not supported");
715                                 return -rte_errno;
716                         }
717 #endif
718                         if (parser->rss_conf.level > 2) {
719                                 rte_flow_error_set(error, EINVAL,
720                                                    RTE_FLOW_ERROR_TYPE_ACTION,
721                                                    actions,
722                                                    "RSS encapsulation level"
723                                                    " > 1 is not supported");
724                                 return -rte_errno;
725                         }
726                         if (rss->types & MLX5_RSS_HF_MASK) {
727                                 rte_flow_error_set(error, EINVAL,
728                                                    RTE_FLOW_ERROR_TYPE_ACTION,
729                                                    actions,
730                                                    "unsupported RSS type"
731                                                    " requested");
732                                 return -rte_errno;
733                         }
734                         if (rss->key_len) {
735                                 rss_key_len = rss->key_len;
736                                 rss_key = rss->key;
737                         } else {
738                                 rss_key_len = rss_hash_default_key_len;
739                                 rss_key = rss_hash_default_key;
740                         }
741                         if (rss_key_len != RTE_DIM(parser->rss_key)) {
742                                 rte_flow_error_set(error, EINVAL,
743                                                    RTE_FLOW_ERROR_TYPE_ACTION,
744                                                    actions,
745                                                    "RSS hash key must be"
746                                                    " exactly 40 bytes long");
747                                 return -rte_errno;
748                         }
749                         if (!rss->queue_num) {
750                                 rte_flow_error_set(error, EINVAL,
751                                                    RTE_FLOW_ERROR_TYPE_ACTION,
752                                                    actions,
753                                                    "no valid queues");
754                                 return -rte_errno;
755                         }
756                         if (rss->queue_num > RTE_DIM(parser->queues)) {
757                                 rte_flow_error_set(error, EINVAL,
758                                                    RTE_FLOW_ERROR_TYPE_ACTION,
759                                                    actions,
760                                                    "too many queues for RSS"
761                                                    " context");
762                                 return -rte_errno;
763                         }
764                         for (n = 0; n < rss->queue_num; ++n) {
765                                 if (rss->queue[n] >= priv->rxqs_n) {
766                                         rte_flow_error_set(error, EINVAL,
767                                                    RTE_FLOW_ERROR_TYPE_ACTION,
768                                                    actions,
769                                                    "queue id > number of"
770                                                    " queues");
771                                         return -rte_errno;
772                                 }
773                         }
774                         parser->rss_conf = (struct rte_flow_action_rss){
775                                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
776                                 .level = rss->level,
777                                 .types = rss->types,
778                                 .key_len = rss_key_len,
779                                 .queue_num = rss->queue_num,
780                                 .key = memcpy(parser->rss_key, rss_key,
781                                               sizeof(*rss_key) * rss_key_len),
782                                 .queue = memcpy(parser->queues, rss->queue,
783                                                 sizeof(*rss->queue) *
784                                                 rss->queue_num),
785                         };
786                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
787                         const struct rte_flow_action_mark *mark =
788                                 (const struct rte_flow_action_mark *)
789                                 actions->conf;
790
791                         if (overlap & MARK)
792                                 goto exit_action_overlap;
793                         overlap |= MARK;
794                         if (!mark) {
795                                 rte_flow_error_set(error, EINVAL,
796                                                    RTE_FLOW_ERROR_TYPE_ACTION,
797                                                    actions,
798                                                    "mark must be defined");
799                                 return -rte_errno;
800                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
801                                 rte_flow_error_set(error, ENOTSUP,
802                                                    RTE_FLOW_ERROR_TYPE_ACTION,
803                                                    actions,
804                                                    "mark must be between 0"
805                                                    " and 16777199");
806                                 return -rte_errno;
807                         }
808                         parser->mark = 1;
809                         parser->mark_id = mark->id;
810                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
811                         if (overlap & MARK)
812                                 goto exit_action_overlap;
813                         overlap |= MARK;
814                         parser->mark = 1;
815                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
816                            priv->config.flow_counter_en) {
817                         if (overlap & COUNT)
818                                 goto exit_action_overlap;
819                         overlap |= COUNT;
820                         parser->count = 1;
821                 } else {
822                         goto exit_action_not_supported;
823                 }
824         }
825         /* When fate is unknown, drop traffic. */
826         if (!(overlap & FATE))
827                 parser->drop = 1;
828         if (parser->drop && parser->mark)
829                 parser->mark = 0;
830         if (!parser->rss_conf.queue_num && !parser->drop) {
831                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
832                                    NULL, "no valid action");
833                 return -rte_errno;
834         }
835         return 0;
836 exit_action_not_supported:
837         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
838                            actions, "action not supported");
839         return -rte_errno;
840 exit_action_overlap:
841         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
842                            actions, "overlapping actions are not supported");
843         return -rte_errno;
844 }
845
846 /**
847  * Validate items.
848  *
849  * @param[in] items
850  *   Pattern specification (list terminated by the END pattern item).
851  * @param[out] error
852  *   Perform verbose error reporting if not NULL.
853  * @param[in, out] parser
854  *   Internal parser structure.
855  *
856  * @return
857  *   0 on success, a negative errno value otherwise and rte_errno is set.
858  */
859 static int
860 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
861                                  const struct rte_flow_item items[],
862                                  struct rte_flow_error *error,
863                                  struct mlx5_flow_parse *parser)
864 {
865         struct priv *priv = dev->data->dev_private;
866         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
867         unsigned int i;
868         int ret = 0;
869
870         /* Initialise the offsets to start after verbs attribute. */
871         for (i = 0; i != hash_rxq_init_n; ++i)
872                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
873         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
874                 const struct mlx5_flow_items *token = NULL;
875                 unsigned int n;
876
877                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
878                         continue;
879                 for (i = 0;
880                      cur_item->items &&
881                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
882                      ++i) {
883                         if (cur_item->items[i] == items->type) {
884                                 token = &mlx5_flow_items[items->type];
885                                 break;
886                         }
887                 }
888                 if (!token) {
889                         ret = -ENOTSUP;
890                         goto exit_item_not_supported;
891                 }
892                 cur_item = token;
893                 ret = mlx5_flow_item_validate(items,
894                                               (const uint8_t *)cur_item->mask,
895                                               cur_item->mask_sz);
896                 if (ret)
897                         goto exit_item_not_supported;
898                 if (IS_TUNNEL(items->type)) {
899                         if (parser->tunnel) {
900                                 rte_flow_error_set(error, ENOTSUP,
901                                                    RTE_FLOW_ERROR_TYPE_ITEM,
902                                                    items,
903                                                    "Cannot recognize multiple"
904                                                    " tunnel encapsulations.");
905                                 return -rte_errno;
906                         }
907                         if (!priv->config.tunnel_en &&
908                             parser->rss_conf.level > 1) {
909                                 rte_flow_error_set(error, ENOTSUP,
910                                         RTE_FLOW_ERROR_TYPE_ITEM,
911                                         items,
912                                         "RSS on tunnel is not supported");
913                                 return -rte_errno;
914                         }
915                         parser->inner = IBV_FLOW_SPEC_INNER;
916                         parser->tunnel = flow_ptype[items->type];
917                 }
918                 if (parser->drop) {
919                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
920                 } else {
921                         for (n = 0; n != hash_rxq_init_n; ++n)
922                                 parser->queue[n].offset += cur_item->dst_sz;
923                 }
924         }
925         if (parser->drop) {
926                 parser->queue[HASH_RXQ_ETH].offset +=
927                         sizeof(struct ibv_flow_spec_action_drop);
928         }
929         if (parser->mark) {
930                 for (i = 0; i != hash_rxq_init_n; ++i)
931                         parser->queue[i].offset +=
932                                 sizeof(struct ibv_flow_spec_action_tag);
933         }
934         if (parser->count) {
935                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
936
937                 for (i = 0; i != hash_rxq_init_n; ++i)
938                         parser->queue[i].offset += size;
939         }
940         return 0;
941 exit_item_not_supported:
942         return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
943                                   items, "item not supported");
944 }
945
946 /**
947  * Allocate memory space to store verbs flow attributes.
948  *
949  * @param[in] size
950  *   Amount of byte to allocate.
951  * @param[out] error
952  *   Perform verbose error reporting if not NULL.
953  *
954  * @return
955  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
956  */
957 static struct ibv_flow_attr *
958 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
959 {
960         struct ibv_flow_attr *ibv_attr;
961
962         ibv_attr = rte_calloc(__func__, 1, size, 0);
963         if (!ibv_attr) {
964                 rte_flow_error_set(error, ENOMEM,
965                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
966                                    NULL,
967                                    "cannot allocate verbs spec attributes");
968                 return NULL;
969         }
970         return ibv_attr;
971 }
972
973 /**
974  * Make inner packet matching with an higher priority from the non Inner
975  * matching.
976  *
977  * @param dev
978  *   Pointer to Ethernet device.
979  * @param[in, out] parser
980  *   Internal parser structure.
981  * @param attr
982  *   User flow attribute.
983  */
984 static void
985 mlx5_flow_update_priority(struct rte_eth_dev *dev,
986                           struct mlx5_flow_parse *parser,
987                           const struct rte_flow_attr *attr)
988 {
989         struct priv *priv = dev->data->dev_private;
990         unsigned int i;
991         uint16_t priority;
992
993         /*                      8 priorities    >= 16 priorities
994          * Control flow:        4-7             8-15
995          * User normal flow:    1-3             4-7
996          * User tunnel flow:    0-2             0-3
997          */
998         priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
999         if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1000                 priority /= 2;
1001         /*
1002          * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1003          * priorities, lower 4 otherwise.
1004          */
1005         if (!parser->inner) {
1006                 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1007                         priority += 1;
1008                 else
1009                         priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1010         }
1011         if (parser->drop) {
1012                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1013                                 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1014                 return;
1015         }
1016         for (i = 0; i != hash_rxq_init_n; ++i) {
1017                 if (!parser->queue[i].ibv_attr)
1018                         continue;
1019                 parser->queue[i].ibv_attr->priority = priority +
1020                                 hash_rxq_init[i].flow_priority;
1021         }
1022 }
1023
1024 /**
1025  * Finalise verbs flow attributes.
1026  *
1027  * @param[in, out] parser
1028  *   Internal parser structure.
1029  */
1030 static void
1031 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1032 {
1033         unsigned int i;
1034         uint32_t inner = parser->inner;
1035
1036         /* Don't create extra flows for outer RSS. */
1037         if (parser->tunnel && parser->rss_conf.level < 2)
1038                 return;
1039         /*
1040          * Fill missing layers in verbs specifications, or compute the correct
1041          * offset to allocate the memory space for the attributes and
1042          * specifications.
1043          */
1044         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1045                 union {
1046                         struct ibv_flow_spec_ipv4_ext ipv4;
1047                         struct ibv_flow_spec_ipv6 ipv6;
1048                         struct ibv_flow_spec_tcp_udp udp_tcp;
1049                         struct ibv_flow_spec_eth eth;
1050                 } specs;
1051                 void *dst;
1052                 uint16_t size;
1053
1054                 if (i == parser->layer)
1055                         continue;
1056                 if (parser->layer == HASH_RXQ_ETH ||
1057                     parser->layer == HASH_RXQ_TUNNEL) {
1058                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1059                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1060                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1061                                         .type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1062                                         .size = size,
1063                                 };
1064                         } else {
1065                                 size = sizeof(struct ibv_flow_spec_ipv6);
1066                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1067                                         .type = inner | IBV_FLOW_SPEC_IPV6,
1068                                         .size = size,
1069                                 };
1070                         }
1071                         if (parser->queue[i].ibv_attr) {
1072                                 dst = (void *)((uintptr_t)
1073                                                parser->queue[i].ibv_attr +
1074                                                parser->queue[i].offset);
1075                                 memcpy(dst, &specs, size);
1076                                 ++parser->queue[i].ibv_attr->num_of_specs;
1077                         }
1078                         parser->queue[i].offset += size;
1079                 }
1080                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1081                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1082                         size = sizeof(struct ibv_flow_spec_tcp_udp);
1083                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1084                                 .type = inner | ((i == HASH_RXQ_UDPV4 ||
1085                                           i == HASH_RXQ_UDPV6) ?
1086                                          IBV_FLOW_SPEC_UDP :
1087                                          IBV_FLOW_SPEC_TCP),
1088                                 .size = size,
1089                         };
1090                         if (parser->queue[i].ibv_attr) {
1091                                 dst = (void *)((uintptr_t)
1092                                                parser->queue[i].ibv_attr +
1093                                                parser->queue[i].offset);
1094                                 memcpy(dst, &specs, size);
1095                                 ++parser->queue[i].ibv_attr->num_of_specs;
1096                         }
1097                         parser->queue[i].offset += size;
1098                 }
1099         }
1100 }
1101
1102 /**
1103  * Update flows according to pattern and RSS hash fields.
1104  *
1105  * @param[in, out] parser
1106  *   Internal parser structure.
1107  *
1108  * @return
1109  *   0 on success, a negative errno value otherwise and rte_errno is set.
1110  */
1111 static int
1112 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1113 {
1114         unsigned int i;
1115         enum hash_rxq_type start;
1116         enum hash_rxq_type layer;
1117         int outer = parser->tunnel && parser->rss_conf.level < 2;
1118         uint64_t rss = parser->rss_conf.types;
1119
1120         /* Default to outer RSS. */
1121         if (!parser->rss_conf.level)
1122                 parser->rss_conf.level = 1;
1123         layer = outer ? parser->out_layer : parser->layer;
1124         if (layer == HASH_RXQ_TUNNEL)
1125                 layer = HASH_RXQ_ETH;
1126         if (outer) {
1127                 /* Only one hash type for outer RSS. */
1128                 if (rss && layer == HASH_RXQ_ETH) {
1129                         start = HASH_RXQ_TCPV4;
1130                 } else if (rss && layer != HASH_RXQ_ETH &&
1131                            !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1132                         /* If RSS not match L4 pattern, try L3 RSS. */
1133                         if (layer < HASH_RXQ_IPV4)
1134                                 layer = HASH_RXQ_IPV4;
1135                         else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1136                                 layer = HASH_RXQ_IPV6;
1137                         start = layer;
1138                 } else {
1139                         start = layer;
1140                 }
1141                 /* Scan first valid hash type. */
1142                 for (i = start; rss && i <= layer; ++i) {
1143                         if (!parser->queue[i].ibv_attr)
1144                                 continue;
1145                         if (hash_rxq_init[i].dpdk_rss_hf & rss)
1146                                 break;
1147                 }
1148                 if (rss && i <= layer)
1149                         parser->queue[layer].hash_fields =
1150                                         hash_rxq_init[i].hash_fields;
1151                 /* Trim unused hash types. */
1152                 for (i = 0; i != hash_rxq_init_n; ++i) {
1153                         if (parser->queue[i].ibv_attr && i != layer) {
1154                                 rte_free(parser->queue[i].ibv_attr);
1155                                 parser->queue[i].ibv_attr = NULL;
1156                         }
1157                 }
1158         } else {
1159                 /* Expand for inner or normal RSS. */
1160                 if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1161                         start = HASH_RXQ_TCPV4;
1162                 else if (rss && layer == HASH_RXQ_IPV6)
1163                         start = HASH_RXQ_TCPV6;
1164                 else
1165                         start = layer;
1166                 /* For L4 pattern, try L3 RSS if no L4 RSS. */
1167                 /* Trim unused hash types. */
1168                 for (i = 0; i != hash_rxq_init_n; ++i) {
1169                         if (!parser->queue[i].ibv_attr)
1170                                 continue;
1171                         if (i < start || i > layer) {
1172                                 rte_free(parser->queue[i].ibv_attr);
1173                                 parser->queue[i].ibv_attr = NULL;
1174                                 continue;
1175                         }
1176                         if (!rss)
1177                                 continue;
1178                         if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1179                                 parser->queue[i].hash_fields =
1180                                                 hash_rxq_init[i].hash_fields;
1181                         } else if (i != layer) {
1182                                 /* Remove unused RSS expansion. */
1183                                 rte_free(parser->queue[i].ibv_attr);
1184                                 parser->queue[i].ibv_attr = NULL;
1185                         } else if (layer < HASH_RXQ_IPV4 &&
1186                                    (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1187                                     rss)) {
1188                                 /* Allow IPv4 RSS on L4 pattern. */
1189                                 parser->queue[i].hash_fields =
1190                                         hash_rxq_init[HASH_RXQ_IPV4]
1191                                                 .hash_fields;
1192                         } else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1193                                    (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1194                                     rss)) {
1195                                 /* Allow IPv4 RSS on L4 pattern. */
1196                                 parser->queue[i].hash_fields =
1197                                         hash_rxq_init[HASH_RXQ_IPV6]
1198                                                 .hash_fields;
1199                         }
1200                 }
1201         }
1202         return 0;
1203 }
1204
1205 /**
1206  * Validate and convert a flow supported by the NIC.
1207  *
1208  * @param dev
1209  *   Pointer to Ethernet device.
1210  * @param[in] attr
1211  *   Flow rule attributes.
1212  * @param[in] pattern
1213  *   Pattern specification (list terminated by the END pattern item).
1214  * @param[in] actions
1215  *   Associated actions (list terminated by the END action).
1216  * @param[out] error
1217  *   Perform verbose error reporting if not NULL.
1218  * @param[in, out] parser
1219  *   Internal parser structure.
1220  *
1221  * @return
1222  *   0 on success, a negative errno value otherwise and rte_errno is set.
1223  */
1224 static int
1225 mlx5_flow_convert(struct rte_eth_dev *dev,
1226                   const struct rte_flow_attr *attr,
1227                   const struct rte_flow_item items[],
1228                   const struct rte_flow_action actions[],
1229                   struct rte_flow_error *error,
1230                   struct mlx5_flow_parse *parser)
1231 {
1232         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1233         unsigned int i;
1234         int ret;
1235
1236         /* First step. Validate the attributes, items and actions. */
1237         *parser = (struct mlx5_flow_parse){
1238                 .create = parser->create,
1239                 .layer = HASH_RXQ_ETH,
1240                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1241         };
1242         ret = mlx5_flow_convert_attributes(attr, error);
1243         if (ret)
1244                 return ret;
1245         ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1246         if (ret)
1247                 return ret;
1248         ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1249         if (ret)
1250                 return ret;
1251         mlx5_flow_convert_finalise(parser);
1252         /*
1253          * Second step.
1254          * Allocate the memory space to store verbs specifications.
1255          */
1256         if (parser->drop) {
1257                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1258
1259                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1260                         mlx5_flow_convert_allocate(offset, error);
1261                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1262                         goto exit_enomem;
1263                 parser->queue[HASH_RXQ_ETH].offset =
1264                         sizeof(struct ibv_flow_attr);
1265         } else {
1266                 for (i = 0; i != hash_rxq_init_n; ++i) {
1267                         unsigned int offset;
1268
1269                         offset = parser->queue[i].offset;
1270                         parser->queue[i].ibv_attr =
1271                                 mlx5_flow_convert_allocate(offset, error);
1272                         if (!parser->queue[i].ibv_attr)
1273                                 goto exit_enomem;
1274                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1275                 }
1276         }
1277         /* Third step. Conversion parse, fill the specifications. */
1278         parser->inner = 0;
1279         parser->tunnel = 0;
1280         parser->layer = HASH_RXQ_ETH;
1281         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1282                 struct mlx5_flow_data data = {
1283                         .dev = dev,
1284                         .parser = parser,
1285                         .error = error,
1286                 };
1287
1288                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1289                         continue;
1290                 cur_item = &mlx5_flow_items[items->type];
1291                 ret = cur_item->convert(items,
1292                                         (cur_item->default_mask ?
1293                                          cur_item->default_mask :
1294                                          cur_item->mask),
1295                                          &data);
1296                 if (ret)
1297                         goto exit_free;
1298         }
1299         if (!parser->drop) {
1300                 /* RSS check, remove unused hash types. */
1301                 ret = mlx5_flow_convert_rss(parser);
1302                 if (ret)
1303                         goto exit_free;
1304                 /* Complete missing specification. */
1305                 mlx5_flow_convert_finalise(parser);
1306         }
1307         mlx5_flow_update_priority(dev, parser, attr);
1308         if (parser->mark)
1309                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1310         if (parser->count && parser->create) {
1311                 mlx5_flow_create_count(dev, parser);
1312                 if (!parser->cs)
1313                         goto exit_count_error;
1314         }
1315 exit_free:
1316         /* Only verification is expected, all resources should be released. */
1317         if (!parser->create) {
1318                 for (i = 0; i != hash_rxq_init_n; ++i) {
1319                         if (parser->queue[i].ibv_attr) {
1320                                 rte_free(parser->queue[i].ibv_attr);
1321                                 parser->queue[i].ibv_attr = NULL;
1322                         }
1323                 }
1324         }
1325         return ret;
1326 exit_enomem:
1327         for (i = 0; i != hash_rxq_init_n; ++i) {
1328                 if (parser->queue[i].ibv_attr) {
1329                         rte_free(parser->queue[i].ibv_attr);
1330                         parser->queue[i].ibv_attr = NULL;
1331                 }
1332         }
1333         rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1334                            NULL, "cannot allocate verbs spec attributes");
1335         return -rte_errno;
1336 exit_count_error:
1337         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1338                            NULL, "cannot create counter");
1339         return -rte_errno;
1340 }
1341
1342 /**
1343  * Copy the specification created into the flow.
1344  *
1345  * @param parser
1346  *   Internal parser structure.
1347  * @param src
1348  *   Create specification.
1349  * @param size
1350  *   Size in bytes of the specification to copy.
1351  */
1352 static void
1353 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1354                       unsigned int size)
1355 {
1356         unsigned int i;
1357         void *dst;
1358
1359         for (i = 0; i != hash_rxq_init_n; ++i) {
1360                 if (!parser->queue[i].ibv_attr)
1361                         continue;
1362                 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1363                                 parser->queue[i].offset);
1364                 memcpy(dst, src, size);
1365                 ++parser->queue[i].ibv_attr->num_of_specs;
1366                 parser->queue[i].offset += size;
1367         }
1368 }
1369
1370 /**
1371  * Convert Ethernet item to Verbs specification.
1372  *
1373  * @param item[in]
1374  *   Item specification.
1375  * @param default_mask[in]
1376  *   Default bit-masks to use when item->mask is not provided.
1377  * @param data[in, out]
1378  *   User structure.
1379  *
1380  * @return
1381  *   0 on success, a negative errno value otherwise and rte_errno is set.
1382  */
1383 static int
1384 mlx5_flow_create_eth(const struct rte_flow_item *item,
1385                      const void *default_mask,
1386                      struct mlx5_flow_data *data)
1387 {
1388         const struct rte_flow_item_eth *spec = item->spec;
1389         const struct rte_flow_item_eth *mask = item->mask;
1390         struct mlx5_flow_parse *parser = data->parser;
1391         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1392         struct ibv_flow_spec_eth eth = {
1393                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1394                 .size = eth_size,
1395         };
1396
1397         parser->layer = HASH_RXQ_ETH;
1398         if (spec) {
1399                 unsigned int i;
1400
1401                 if (!mask)
1402                         mask = default_mask;
1403                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1404                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1405                 eth.val.ether_type = spec->type;
1406                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1407                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1408                 eth.mask.ether_type = mask->type;
1409                 /* Remove unwanted bits from values. */
1410                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1411                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1412                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1413                 }
1414                 eth.val.ether_type &= eth.mask.ether_type;
1415         }
1416         mlx5_flow_create_copy(parser, &eth, eth_size);
1417         return 0;
1418 }
1419
1420 /**
1421  * Convert VLAN item to Verbs specification.
1422  *
1423  * @param item[in]
1424  *   Item specification.
1425  * @param default_mask[in]
1426  *   Default bit-masks to use when item->mask is not provided.
1427  * @param data[in, out]
1428  *   User structure.
1429  *
1430  * @return
1431  *   0 on success, a negative errno value otherwise and rte_errno is set.
1432  */
1433 static int
1434 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1435                       const void *default_mask,
1436                       struct mlx5_flow_data *data)
1437 {
1438         const struct rte_flow_item_vlan *spec = item->spec;
1439         const struct rte_flow_item_vlan *mask = item->mask;
1440         struct mlx5_flow_parse *parser = data->parser;
1441         struct ibv_flow_spec_eth *eth;
1442         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1443         const char *msg = "VLAN cannot be empty";
1444
1445         if (spec) {
1446                 unsigned int i;
1447                 if (!mask)
1448                         mask = default_mask;
1449
1450                 for (i = 0; i != hash_rxq_init_n; ++i) {
1451                         if (!parser->queue[i].ibv_attr)
1452                                 continue;
1453
1454                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1455                                        parser->queue[i].offset - eth_size);
1456                         eth->val.vlan_tag = spec->tci;
1457                         eth->mask.vlan_tag = mask->tci;
1458                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1459                         /*
1460                          * From verbs perspective an empty VLAN is equivalent
1461                          * to a packet without VLAN layer.
1462                          */
1463                         if (!eth->mask.vlan_tag)
1464                                 goto error;
1465                         /* Outer TPID cannot be matched. */
1466                         if (eth->mask.ether_type) {
1467                                 msg = "VLAN TPID matching is not supported";
1468                                 goto error;
1469                         }
1470                         eth->val.ether_type = spec->inner_type;
1471                         eth->mask.ether_type = mask->inner_type;
1472                         eth->val.ether_type &= eth->mask.ether_type;
1473                 }
1474                 return 0;
1475         }
1476 error:
1477         return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1478                                   item, msg);
1479 }
1480
1481 /**
1482  * Convert IPv4 item to Verbs specification.
1483  *
1484  * @param item[in]
1485  *   Item specification.
1486  * @param default_mask[in]
1487  *   Default bit-masks to use when item->mask is not provided.
1488  * @param data[in, out]
1489  *   User structure.
1490  *
1491  * @return
1492  *   0 on success, a negative errno value otherwise and rte_errno is set.
1493  */
1494 static int
1495 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1496                       const void *default_mask,
1497                       struct mlx5_flow_data *data)
1498 {
1499         struct priv *priv = data->dev->data->dev_private;
1500         const struct rte_flow_item_ipv4 *spec = item->spec;
1501         const struct rte_flow_item_ipv4 *mask = item->mask;
1502         struct mlx5_flow_parse *parser = data->parser;
1503         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1504         struct ibv_flow_spec_ipv4_ext ipv4 = {
1505                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1506                 .size = ipv4_size,
1507         };
1508
1509         if (parser->layer == HASH_RXQ_TUNNEL &&
1510             parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1511             !priv->config.l3_vxlan_en)
1512                 return rte_flow_error_set(data->error, EINVAL,
1513                                           RTE_FLOW_ERROR_TYPE_ITEM,
1514                                           item,
1515                                           "L3 VXLAN not enabled by device"
1516                                           " parameter and/or not configured"
1517                                           " in firmware");
1518         parser->layer = HASH_RXQ_IPV4;
1519         if (spec) {
1520                 if (!mask)
1521                         mask = default_mask;
1522                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1523                         .src_ip = spec->hdr.src_addr,
1524                         .dst_ip = spec->hdr.dst_addr,
1525                         .proto = spec->hdr.next_proto_id,
1526                         .tos = spec->hdr.type_of_service,
1527                 };
1528                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1529                         .src_ip = mask->hdr.src_addr,
1530                         .dst_ip = mask->hdr.dst_addr,
1531                         .proto = mask->hdr.next_proto_id,
1532                         .tos = mask->hdr.type_of_service,
1533                 };
1534                 /* Remove unwanted bits from values. */
1535                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1536                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1537                 ipv4.val.proto &= ipv4.mask.proto;
1538                 ipv4.val.tos &= ipv4.mask.tos;
1539         }
1540         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1541         return 0;
1542 }
1543
1544 /**
1545  * Convert IPv6 item to Verbs specification.
1546  *
1547  * @param item[in]
1548  *   Item specification.
1549  * @param default_mask[in]
1550  *   Default bit-masks to use when item->mask is not provided.
1551  * @param data[in, out]
1552  *   User structure.
1553  *
1554  * @return
1555  *   0 on success, a negative errno value otherwise and rte_errno is set.
1556  */
1557 static int
1558 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1559                       const void *default_mask,
1560                       struct mlx5_flow_data *data)
1561 {
1562         struct priv *priv = data->dev->data->dev_private;
1563         const struct rte_flow_item_ipv6 *spec = item->spec;
1564         const struct rte_flow_item_ipv6 *mask = item->mask;
1565         struct mlx5_flow_parse *parser = data->parser;
1566         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1567         struct ibv_flow_spec_ipv6 ipv6 = {
1568                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1569                 .size = ipv6_size,
1570         };
1571
1572         if (parser->layer == HASH_RXQ_TUNNEL &&
1573             parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1574             !priv->config.l3_vxlan_en)
1575                 return rte_flow_error_set(data->error, EINVAL,
1576                                           RTE_FLOW_ERROR_TYPE_ITEM,
1577                                           item,
1578                                           "L3 VXLAN not enabled by device"
1579                                           " parameter and/or not configured"
1580                                           " in firmware");
1581         parser->layer = HASH_RXQ_IPV6;
1582         if (spec) {
1583                 unsigned int i;
1584                 uint32_t vtc_flow_val;
1585                 uint32_t vtc_flow_mask;
1586
1587                 if (!mask)
1588                         mask = default_mask;
1589                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1590                        RTE_DIM(ipv6.val.src_ip));
1591                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1592                        RTE_DIM(ipv6.val.dst_ip));
1593                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1594                        RTE_DIM(ipv6.mask.src_ip));
1595                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1596                        RTE_DIM(ipv6.mask.dst_ip));
1597                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1598                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1599                 ipv6.val.flow_label =
1600                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1601                                          IPV6_HDR_FL_SHIFT);
1602                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1603                                          IPV6_HDR_TC_SHIFT;
1604                 ipv6.val.next_hdr = spec->hdr.proto;
1605                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1606                 ipv6.mask.flow_label =
1607                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1608                                          IPV6_HDR_FL_SHIFT);
1609                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1610                                           IPV6_HDR_TC_SHIFT;
1611                 ipv6.mask.next_hdr = mask->hdr.proto;
1612                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1613                 /* Remove unwanted bits from values. */
1614                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1615                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1616                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1617                 }
1618                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1619                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1620                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1621                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1622         }
1623         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1624         return 0;
1625 }
1626
1627 /**
1628  * Convert UDP item to Verbs specification.
1629  *
1630  * @param item[in]
1631  *   Item specification.
1632  * @param default_mask[in]
1633  *   Default bit-masks to use when item->mask is not provided.
1634  * @param data[in, out]
1635  *   User structure.
1636  *
1637  * @return
1638  *   0 on success, a negative errno value otherwise and rte_errno is set.
1639  */
1640 static int
1641 mlx5_flow_create_udp(const struct rte_flow_item *item,
1642                      const void *default_mask,
1643                      struct mlx5_flow_data *data)
1644 {
1645         const struct rte_flow_item_udp *spec = item->spec;
1646         const struct rte_flow_item_udp *mask = item->mask;
1647         struct mlx5_flow_parse *parser = data->parser;
1648         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1649         struct ibv_flow_spec_tcp_udp udp = {
1650                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1651                 .size = udp_size,
1652         };
1653
1654         if (parser->layer == HASH_RXQ_IPV4)
1655                 parser->layer = HASH_RXQ_UDPV4;
1656         else
1657                 parser->layer = HASH_RXQ_UDPV6;
1658         if (spec) {
1659                 if (!mask)
1660                         mask = default_mask;
1661                 udp.val.dst_port = spec->hdr.dst_port;
1662                 udp.val.src_port = spec->hdr.src_port;
1663                 udp.mask.dst_port = mask->hdr.dst_port;
1664                 udp.mask.src_port = mask->hdr.src_port;
1665                 /* Remove unwanted bits from values. */
1666                 udp.val.src_port &= udp.mask.src_port;
1667                 udp.val.dst_port &= udp.mask.dst_port;
1668         }
1669         mlx5_flow_create_copy(parser, &udp, udp_size);
1670         return 0;
1671 }
1672
1673 /**
1674  * Convert TCP item to Verbs specification.
1675  *
1676  * @param item[in]
1677  *   Item specification.
1678  * @param default_mask[in]
1679  *   Default bit-masks to use when item->mask is not provided.
1680  * @param data[in, out]
1681  *   User structure.
1682  *
1683  * @return
1684  *   0 on success, a negative errno value otherwise and rte_errno is set.
1685  */
1686 static int
1687 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1688                      const void *default_mask,
1689                      struct mlx5_flow_data *data)
1690 {
1691         const struct rte_flow_item_tcp *spec = item->spec;
1692         const struct rte_flow_item_tcp *mask = item->mask;
1693         struct mlx5_flow_parse *parser = data->parser;
1694         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1695         struct ibv_flow_spec_tcp_udp tcp = {
1696                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1697                 .size = tcp_size,
1698         };
1699
1700         if (parser->layer == HASH_RXQ_IPV4)
1701                 parser->layer = HASH_RXQ_TCPV4;
1702         else
1703                 parser->layer = HASH_RXQ_TCPV6;
1704         if (spec) {
1705                 if (!mask)
1706                         mask = default_mask;
1707                 tcp.val.dst_port = spec->hdr.dst_port;
1708                 tcp.val.src_port = spec->hdr.src_port;
1709                 tcp.mask.dst_port = mask->hdr.dst_port;
1710                 tcp.mask.src_port = mask->hdr.src_port;
1711                 /* Remove unwanted bits from values. */
1712                 tcp.val.src_port &= tcp.mask.src_port;
1713                 tcp.val.dst_port &= tcp.mask.dst_port;
1714         }
1715         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1716         return 0;
1717 }
1718
1719 /**
1720  * Convert VXLAN item to Verbs specification.
1721  *
1722  * @param item[in]
1723  *   Item specification.
1724  * @param default_mask[in]
1725  *   Default bit-masks to use when item->mask is not provided.
1726  * @param data[in, out]
1727  *   User structure.
1728  *
1729  * @return
1730  *   0 on success, a negative errno value otherwise and rte_errno is set.
1731  */
1732 static int
1733 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1734                        const void *default_mask,
1735                        struct mlx5_flow_data *data)
1736 {
1737         const struct rte_flow_item_vxlan *spec = item->spec;
1738         const struct rte_flow_item_vxlan *mask = item->mask;
1739         struct mlx5_flow_parse *parser = data->parser;
1740         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1741         struct ibv_flow_spec_tunnel vxlan = {
1742                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1743                 .size = size,
1744         };
1745         union vni {
1746                 uint32_t vlan_id;
1747                 uint8_t vni[4];
1748         } id;
1749
1750         id.vni[0] = 0;
1751         parser->inner = IBV_FLOW_SPEC_INNER;
1752         parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1753         parser->out_layer = parser->layer;
1754         parser->layer = HASH_RXQ_TUNNEL;
1755         /* Default VXLAN to outer RSS. */
1756         if (!parser->rss_conf.level)
1757                 parser->rss_conf.level = 1;
1758         if (spec) {
1759                 if (!mask)
1760                         mask = default_mask;
1761                 memcpy(&id.vni[1], spec->vni, 3);
1762                 vxlan.val.tunnel_id = id.vlan_id;
1763                 memcpy(&id.vni[1], mask->vni, 3);
1764                 vxlan.mask.tunnel_id = id.vlan_id;
1765                 /* Remove unwanted bits from values. */
1766                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1767         }
1768         /*
1769          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1770          * layer is defined in the Verbs specification it is interpreted as
1771          * wildcard and all packets will match this rule, if it follows a full
1772          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1773          * before will also match this rule.
1774          * To avoid such situation, VNI 0 is currently refused.
1775          */
1776         /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1777         if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1778                 return rte_flow_error_set(data->error, EINVAL,
1779                                           RTE_FLOW_ERROR_TYPE_ITEM,
1780                                           item,
1781                                           "VxLAN vni cannot be 0");
1782         mlx5_flow_create_copy(parser, &vxlan, size);
1783         return 0;
1784 }
1785
1786 /**
1787  * Convert VXLAN-GPE item to Verbs specification.
1788  *
1789  * @param item[in]
1790  *   Item specification.
1791  * @param default_mask[in]
1792  *   Default bit-masks to use when item->mask is not provided.
1793  * @param data[in, out]
1794  *   User structure.
1795  *
1796  * @return
1797  *   0 on success, a negative errno value otherwise and rte_errno is set.
1798  */
1799 static int
1800 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
1801                            const void *default_mask,
1802                            struct mlx5_flow_data *data)
1803 {
1804         struct priv *priv = data->dev->data->dev_private;
1805         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1806         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1807         struct mlx5_flow_parse *parser = data->parser;
1808         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1809         struct ibv_flow_spec_tunnel vxlan = {
1810                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1811                 .size = size,
1812         };
1813         union vni {
1814                 uint32_t vlan_id;
1815                 uint8_t vni[4];
1816         } id;
1817
1818         if (!priv->config.l3_vxlan_en)
1819                 return rte_flow_error_set(data->error, EINVAL,
1820                                           RTE_FLOW_ERROR_TYPE_ITEM,
1821                                           item,
1822                                           "L3 VXLAN not enabled by device"
1823                                           " parameter and/or not configured"
1824                                           " in firmware");
1825         id.vni[0] = 0;
1826         parser->inner = IBV_FLOW_SPEC_INNER;
1827         parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)];
1828         parser->out_layer = parser->layer;
1829         parser->layer = HASH_RXQ_TUNNEL;
1830         /* Default VXLAN-GPE to outer RSS. */
1831         if (!parser->rss_conf.level)
1832                 parser->rss_conf.level = 1;
1833         if (spec) {
1834                 if (!mask)
1835                         mask = default_mask;
1836                 memcpy(&id.vni[1], spec->vni, 3);
1837                 vxlan.val.tunnel_id = id.vlan_id;
1838                 memcpy(&id.vni[1], mask->vni, 3);
1839                 vxlan.mask.tunnel_id = id.vlan_id;
1840                 if (spec->protocol)
1841                         return rte_flow_error_set(data->error, EINVAL,
1842                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1843                                                   item,
1844                                                   "VxLAN-GPE protocol not"
1845                                                   " supported");
1846                 /* Remove unwanted bits from values. */
1847                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1848         }
1849         /*
1850          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1851          * layer is defined in the Verbs specification it is interpreted as
1852          * wildcard and all packets will match this rule, if it follows a full
1853          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1854          * before will also match this rule.
1855          * To avoid such situation, VNI 0 is currently refused.
1856          */
1857         /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1858         if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1859                 return rte_flow_error_set(data->error, EINVAL,
1860                                           RTE_FLOW_ERROR_TYPE_ITEM,
1861                                           item,
1862                                           "VxLAN-GPE vni cannot be 0");
1863         mlx5_flow_create_copy(parser, &vxlan, size);
1864         return 0;
1865 }
1866
1867 /**
1868  * Convert GRE item to Verbs specification.
1869  *
1870  * @param item[in]
1871  *   Item specification.
1872  * @param default_mask[in]
1873  *   Default bit-masks to use when item->mask is not provided.
1874  * @param data[in, out]
1875  *   User structure.
1876  *
1877  * @return
1878  *   0 on success, a negative errno value otherwise and rte_errno is set.
1879  */
1880 static int
1881 mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
1882                      const void *default_mask __rte_unused,
1883                      struct mlx5_flow_data *data)
1884 {
1885         struct mlx5_flow_parse *parser = data->parser;
1886         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1887         struct ibv_flow_spec_tunnel tunnel = {
1888                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1889                 .size = size,
1890         };
1891         struct ibv_flow_spec_ipv4_ext *ipv4;
1892         struct ibv_flow_spec_ipv6 *ipv6;
1893         unsigned int i;
1894
1895         parser->inner = IBV_FLOW_SPEC_INNER;
1896         parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1897         parser->out_layer = parser->layer;
1898         parser->layer = HASH_RXQ_TUNNEL;
1899         /* Default GRE to inner RSS. */
1900         if (!parser->rss_conf.level)
1901                 parser->rss_conf.level = 2;
1902         /* Update encapsulation IP layer protocol. */
1903         for (i = 0; i != hash_rxq_init_n; ++i) {
1904                 if (!parser->queue[i].ibv_attr)
1905                         continue;
1906                 if (parser->out_layer == HASH_RXQ_IPV4) {
1907                         ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1908                                 parser->queue[i].offset -
1909                                 sizeof(struct ibv_flow_spec_ipv4_ext));
1910                         if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1911                                 break;
1912                         ipv4->val.proto = MLX5_GRE;
1913                         ipv4->mask.proto = 0xff;
1914                 } else if (parser->out_layer == HASH_RXQ_IPV6) {
1915                         ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1916                                 parser->queue[i].offset -
1917                                 sizeof(struct ibv_flow_spec_ipv6));
1918                         if (ipv6->mask.next_hdr &&
1919                             ipv6->val.next_hdr != MLX5_GRE)
1920                                 break;
1921                         ipv6->val.next_hdr = MLX5_GRE;
1922                         ipv6->mask.next_hdr = 0xff;
1923                 }
1924         }
1925         if (i != hash_rxq_init_n)
1926                 return rte_flow_error_set(data->error, EINVAL,
1927                                           RTE_FLOW_ERROR_TYPE_ITEM,
1928                                           item,
1929                                           "IP protocol of GRE must be 47");
1930         mlx5_flow_create_copy(parser, &tunnel, size);
1931         return 0;
1932 }
1933
1934 /**
1935  * Convert mark/flag action to Verbs specification.
1936  *
1937  * @param parser
1938  *   Internal parser structure.
1939  * @param mark_id
1940  *   Mark identifier.
1941  *
1942  * @return
1943  *   0 on success, a negative errno value otherwise and rte_errno is set.
1944  */
1945 static int
1946 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1947 {
1948         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1949         struct ibv_flow_spec_action_tag tag = {
1950                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1951                 .size = size,
1952                 .tag_id = mlx5_flow_mark_set(mark_id),
1953         };
1954
1955         assert(parser->mark);
1956         mlx5_flow_create_copy(parser, &tag, size);
1957         return 0;
1958 }
1959
1960 /**
1961  * Convert count action to Verbs specification.
1962  *
1963  * @param dev
1964  *   Pointer to Ethernet device.
1965  * @param parser
1966  *   Pointer to MLX5 flow parser structure.
1967  *
1968  * @return
1969  *   0 on success, a negative errno value otherwise and rte_errno is set.
1970  */
1971 static int
1972 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1973                        struct mlx5_flow_parse *parser __rte_unused)
1974 {
1975 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1976         struct priv *priv = dev->data->dev_private;
1977         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1978         struct ibv_counter_set_init_attr init_attr = {0};
1979         struct ibv_flow_spec_counter_action counter = {
1980                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1981                 .size = size,
1982                 .counter_set_handle = 0,
1983         };
1984
1985         init_attr.counter_set_id = 0;
1986         parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1987         if (!parser->cs) {
1988                 rte_errno = EINVAL;
1989                 return -rte_errno;
1990         }
1991         counter.counter_set_handle = parser->cs->handle;
1992         mlx5_flow_create_copy(parser, &counter, size);
1993 #endif
1994         return 0;
1995 }
1996
1997 /**
1998  * Complete flow rule creation with a drop queue.
1999  *
2000  * @param dev
2001  *   Pointer to Ethernet device.
2002  * @param parser
2003  *   Internal parser structure.
2004  * @param flow
2005  *   Pointer to the rte_flow.
2006  * @param[out] error
2007  *   Perform verbose error reporting if not NULL.
2008  *
2009  * @return
2010  *   0 on success, a negative errno value otherwise and rte_errno is set.
2011  */
2012 static int
2013 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
2014                                    struct mlx5_flow_parse *parser,
2015                                    struct rte_flow *flow,
2016                                    struct rte_flow_error *error)
2017 {
2018         struct priv *priv = dev->data->dev_private;
2019         struct ibv_flow_spec_action_drop *drop;
2020         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
2021
2022         assert(priv->pd);
2023         assert(priv->ctx);
2024         flow->drop = 1;
2025         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
2026                         parser->queue[HASH_RXQ_ETH].offset);
2027         *drop = (struct ibv_flow_spec_action_drop){
2028                         .type = IBV_FLOW_SPEC_ACTION_DROP,
2029                         .size = size,
2030         };
2031         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
2032         parser->queue[HASH_RXQ_ETH].offset += size;
2033         flow->frxq[HASH_RXQ_ETH].ibv_attr =
2034                 parser->queue[HASH_RXQ_ETH].ibv_attr;
2035         if (parser->count)
2036                 flow->cs = parser->cs;
2037         if (!dev->data->dev_started)
2038                 return 0;
2039         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
2040         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2041                 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
2042                                        flow->frxq[HASH_RXQ_ETH].ibv_attr);
2043         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2044                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
2045                                    NULL, "flow rule creation failure");
2046                 goto error;
2047         }
2048         return 0;
2049 error:
2050         assert(flow);
2051         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2052                 claim_zero(mlx5_glue->destroy_flow
2053                            (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2054                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2055         }
2056         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
2057                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2058                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
2059         }
2060         if (flow->cs) {
2061                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2062                 flow->cs = NULL;
2063                 parser->cs = NULL;
2064         }
2065         return -rte_errno;
2066 }
2067
2068 /**
2069  * Create hash Rx queues when RSS is enabled.
2070  *
2071  * @param dev
2072  *   Pointer to Ethernet device.
2073  * @param parser
2074  *   Internal parser structure.
2075  * @param flow
2076  *   Pointer to the rte_flow.
2077  * @param[out] error
2078  *   Perform verbose error reporting if not NULL.
2079  *
2080  * @return
2081  *   0 on success, a negative errno value otherwise and rte_errno is set.
2082  */
2083 static int
2084 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2085                                   struct mlx5_flow_parse *parser,
2086                                   struct rte_flow *flow,
2087                                   struct rte_flow_error *error)
2088 {
2089         unsigned int i;
2090
2091         for (i = 0; i != hash_rxq_init_n; ++i) {
2092                 if (!parser->queue[i].ibv_attr)
2093                         continue;
2094                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2095                 parser->queue[i].ibv_attr = NULL;
2096                 flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2097                 if (!dev->data->dev_started)
2098                         continue;
2099                 flow->frxq[i].hrxq =
2100                         mlx5_hrxq_get(dev,
2101                                       parser->rss_conf.key,
2102                                       parser->rss_conf.key_len,
2103                                       flow->frxq[i].hash_fields,
2104                                       parser->rss_conf.queue,
2105                                       parser->rss_conf.queue_num,
2106                                       parser->tunnel,
2107                                       parser->rss_conf.level);
2108                 if (flow->frxq[i].hrxq)
2109                         continue;
2110                 flow->frxq[i].hrxq =
2111                         mlx5_hrxq_new(dev,
2112                                       parser->rss_conf.key,
2113                                       parser->rss_conf.key_len,
2114                                       flow->frxq[i].hash_fields,
2115                                       parser->rss_conf.queue,
2116                                       parser->rss_conf.queue_num,
2117                                       parser->tunnel,
2118                                       parser->rss_conf.level);
2119                 if (!flow->frxq[i].hrxq) {
2120                         return rte_flow_error_set(error, ENOMEM,
2121                                                   RTE_FLOW_ERROR_TYPE_HANDLE,
2122                                                   NULL,
2123                                                   "cannot create hash rxq");
2124                 }
2125         }
2126         return 0;
2127 }
2128
2129 /**
2130  * RXQ update after flow rule creation.
2131  *
2132  * @param dev
2133  *   Pointer to Ethernet device.
2134  * @param flow
2135  *   Pointer to the flow rule.
2136  */
2137 static void
2138 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2139 {
2140         struct priv *priv = dev->data->dev_private;
2141         unsigned int i;
2142         unsigned int j;
2143
2144         if (!dev->data->dev_started)
2145                 return;
2146         for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2147                 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2148                                                  [(*flow->queues)[i]];
2149                 struct mlx5_rxq_ctrl *rxq_ctrl =
2150                         container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2151                 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2152
2153                 rxq_data->mark |= flow->mark;
2154                 if (!tunnel)
2155                         continue;
2156                 rxq_ctrl->tunnel_types[tunnel] += 1;
2157                 /* Clear tunnel type if more than one tunnel types set. */
2158                 for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2159                         if (j == tunnel)
2160                                 continue;
2161                         if (rxq_ctrl->tunnel_types[j] > 0) {
2162                                 rxq_data->tunnel = 0;
2163                                 break;
2164                         }
2165                 }
2166                 if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2167                         rxq_data->tunnel = flow->tunnel;
2168         }
2169 }
2170
2171 /**
2172  * Dump flow hash RX queue detail.
2173  *
2174  * @param dev
2175  *   Pointer to Ethernet device.
2176  * @param flow
2177  *   Pointer to the rte_flow.
2178  * @param hrxq_idx
2179  *   Hash RX queue index.
2180  */
2181 static void
2182 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
2183                struct rte_flow *flow __rte_unused,
2184                unsigned int hrxq_idx __rte_unused)
2185 {
2186 #ifndef NDEBUG
2187         uintptr_t spec_ptr;
2188         uint16_t j;
2189         char buf[256];
2190         uint8_t off;
2191
2192         spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
2193         for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
2194              j++) {
2195                 struct ibv_flow_spec *spec = (void *)spec_ptr;
2196                 off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
2197                                spec->hdr.size);
2198                 spec_ptr += spec->hdr.size;
2199         }
2200         DRV_LOG(DEBUG,
2201                 "port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
2202                 " hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
2203                 " flags:%x, comp_mask:%x specs:%s",
2204                 dev->data->port_id, (void *)flow, hrxq_idx,
2205                 (void *)flow->frxq[hrxq_idx].hrxq,
2206                 (void *)flow->frxq[hrxq_idx].hrxq->qp,
2207                 (void *)flow->frxq[hrxq_idx].hrxq->ind_table,
2208                 flow->frxq[hrxq_idx].hash_fields |
2209                 (flow->tunnel &&
2210                  flow->rss_conf.level > 1 ? (uint32_t)IBV_RX_HASH_INNER : 0),
2211                 flow->rss_conf.queue_num,
2212                 flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
2213                 flow->frxq[hrxq_idx].ibv_attr->size,
2214                 flow->frxq[hrxq_idx].ibv_attr->priority,
2215                 flow->frxq[hrxq_idx].ibv_attr->type,
2216                 flow->frxq[hrxq_idx].ibv_attr->flags,
2217                 flow->frxq[hrxq_idx].ibv_attr->comp_mask,
2218                 buf);
2219 #endif
2220 }
2221
2222 /**
2223  * Complete flow rule creation.
2224  *
2225  * @param dev
2226  *   Pointer to Ethernet device.
2227  * @param parser
2228  *   Internal parser structure.
2229  * @param flow
2230  *   Pointer to the rte_flow.
2231  * @param[out] error
2232  *   Perform verbose error reporting if not NULL.
2233  *
2234  * @return
2235  *   0 on success, a negative errno value otherwise and rte_errno is set.
2236  */
2237 static int
2238 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2239                               struct mlx5_flow_parse *parser,
2240                               struct rte_flow *flow,
2241                               struct rte_flow_error *error)
2242 {
2243         struct priv *priv __rte_unused = dev->data->dev_private;
2244         int ret;
2245         unsigned int i;
2246         unsigned int flows_n = 0;
2247
2248         assert(priv->pd);
2249         assert(priv->ctx);
2250         assert(!parser->drop);
2251         ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2252         if (ret)
2253                 goto error;
2254         if (parser->count)
2255                 flow->cs = parser->cs;
2256         if (!dev->data->dev_started)
2257                 return 0;
2258         for (i = 0; i != hash_rxq_init_n; ++i) {
2259                 if (!flow->frxq[i].hrxq)
2260                         continue;
2261                 flow->frxq[i].ibv_flow =
2262                         mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2263                                                flow->frxq[i].ibv_attr);
2264                 mlx5_flow_dump(dev, flow, i);
2265                 if (!flow->frxq[i].ibv_flow) {
2266                         rte_flow_error_set(error, ENOMEM,
2267                                            RTE_FLOW_ERROR_TYPE_HANDLE,
2268                                            NULL, "flow rule creation failure");
2269                         goto error;
2270                 }
2271                 ++flows_n;
2272         }
2273         if (!flows_n) {
2274                 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2275                                    NULL, "internal error in flow creation");
2276                 goto error;
2277         }
2278         mlx5_flow_create_update_rxqs(dev, flow);
2279         return 0;
2280 error:
2281         ret = rte_errno; /* Save rte_errno before cleanup. */
2282         assert(flow);
2283         for (i = 0; i != hash_rxq_init_n; ++i) {
2284                 if (flow->frxq[i].ibv_flow) {
2285                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2286
2287                         claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2288                 }
2289                 if (flow->frxq[i].hrxq)
2290                         mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2291                 if (flow->frxq[i].ibv_attr)
2292                         rte_free(flow->frxq[i].ibv_attr);
2293         }
2294         if (flow->cs) {
2295                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2296                 flow->cs = NULL;
2297                 parser->cs = NULL;
2298         }
2299         rte_errno = ret; /* Restore rte_errno. */
2300         return -rte_errno;
2301 }
2302
2303 /**
2304  * Convert a flow.
2305  *
2306  * @param dev
2307  *   Pointer to Ethernet device.
2308  * @param list
2309  *   Pointer to a TAILQ flow list.
2310  * @param[in] attr
2311  *   Flow rule attributes.
2312  * @param[in] pattern
2313  *   Pattern specification (list terminated by the END pattern item).
2314  * @param[in] actions
2315  *   Associated actions (list terminated by the END action).
2316  * @param[out] error
2317  *   Perform verbose error reporting if not NULL.
2318  *
2319  * @return
2320  *   A flow on success, NULL otherwise and rte_errno is set.
2321  */
2322 static struct rte_flow *
2323 mlx5_flow_list_create(struct rte_eth_dev *dev,
2324                       struct mlx5_flows *list,
2325                       const struct rte_flow_attr *attr,
2326                       const struct rte_flow_item items[],
2327                       const struct rte_flow_action actions[],
2328                       struct rte_flow_error *error)
2329 {
2330         struct mlx5_flow_parse parser = { .create = 1, };
2331         struct rte_flow *flow = NULL;
2332         unsigned int i;
2333         int ret;
2334
2335         ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2336         if (ret)
2337                 goto exit;
2338         flow = rte_calloc(__func__, 1,
2339                           sizeof(*flow) +
2340                           parser.rss_conf.queue_num * sizeof(uint16_t),
2341                           0);
2342         if (!flow) {
2343                 rte_flow_error_set(error, ENOMEM,
2344                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2345                                    NULL,
2346                                    "cannot allocate flow memory");
2347                 return NULL;
2348         }
2349         /* Copy configuration. */
2350         flow->queues = (uint16_t (*)[])(flow + 1);
2351         flow->tunnel = parser.tunnel;
2352         flow->rss_conf = (struct rte_flow_action_rss){
2353                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2354                 .level = 0,
2355                 .types = parser.rss_conf.types,
2356                 .key_len = parser.rss_conf.key_len,
2357                 .queue_num = parser.rss_conf.queue_num,
2358                 .key = memcpy(flow->rss_key, parser.rss_conf.key,
2359                               sizeof(*parser.rss_conf.key) *
2360                               parser.rss_conf.key_len),
2361                 .queue = memcpy(flow->queues, parser.rss_conf.queue,
2362                                 sizeof(*parser.rss_conf.queue) *
2363                                 parser.rss_conf.queue_num),
2364         };
2365         flow->mark = parser.mark;
2366         /* finalise the flow. */
2367         if (parser.drop)
2368                 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2369                                                          error);
2370         else
2371                 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2372         if (ret)
2373                 goto exit;
2374         TAILQ_INSERT_TAIL(list, flow, next);
2375         DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2376                 (void *)flow);
2377         return flow;
2378 exit:
2379         DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2380                 error->message);
2381         for (i = 0; i != hash_rxq_init_n; ++i) {
2382                 if (parser.queue[i].ibv_attr)
2383                         rte_free(parser.queue[i].ibv_attr);
2384         }
2385         rte_free(flow);
2386         return NULL;
2387 }
2388
2389 /**
2390  * Validate a flow supported by the NIC.
2391  *
2392  * @see rte_flow_validate()
2393  * @see rte_flow_ops
2394  */
2395 int
2396 mlx5_flow_validate(struct rte_eth_dev *dev,
2397                    const struct rte_flow_attr *attr,
2398                    const struct rte_flow_item items[],
2399                    const struct rte_flow_action actions[],
2400                    struct rte_flow_error *error)
2401 {
2402         struct mlx5_flow_parse parser = { .create = 0, };
2403
2404         return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2405 }
2406
2407 /**
2408  * Create a flow.
2409  *
2410  * @see rte_flow_create()
2411  * @see rte_flow_ops
2412  */
2413 struct rte_flow *
2414 mlx5_flow_create(struct rte_eth_dev *dev,
2415                  const struct rte_flow_attr *attr,
2416                  const struct rte_flow_item items[],
2417                  const struct rte_flow_action actions[],
2418                  struct rte_flow_error *error)
2419 {
2420         struct priv *priv = dev->data->dev_private;
2421
2422         return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2423                                      error);
2424 }
2425
2426 /**
2427  * Destroy a flow in a list.
2428  *
2429  * @param dev
2430  *   Pointer to Ethernet device.
2431  * @param list
2432  *   Pointer to a TAILQ flow list.
2433  * @param[in] flow
2434  *   Flow to destroy.
2435  */
2436 static void
2437 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2438                        struct rte_flow *flow)
2439 {
2440         struct priv *priv = dev->data->dev_private;
2441         unsigned int i;
2442
2443         if (flow->drop || !dev->data->dev_started)
2444                 goto free;
2445         for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2446                 /* Update queue tunnel type. */
2447                 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2448                                                  [(*flow->queues)[i]];
2449                 struct mlx5_rxq_ctrl *rxq_ctrl =
2450                         container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2451                 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2452
2453                 assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2454                 rxq_ctrl->tunnel_types[tunnel] -= 1;
2455                 if (!rxq_ctrl->tunnel_types[tunnel]) {
2456                         /* Update tunnel type. */
2457                         uint8_t j;
2458                         uint8_t types = 0;
2459                         uint8_t last;
2460
2461                         for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2462                                 if (rxq_ctrl->tunnel_types[j]) {
2463                                         types += 1;
2464                                         last = j;
2465                                 }
2466                         /* Keep same if more than one tunnel types left. */
2467                         if (types == 1)
2468                                 rxq_data->tunnel = ptype_ext[last];
2469                         else if (types == 0)
2470                                 /* No tunnel type left. */
2471                                 rxq_data->tunnel = 0;
2472                 }
2473         }
2474         for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2475                 struct rte_flow *tmp;
2476                 int mark = 0;
2477
2478                 /*
2479                  * To remove the mark from the queue, the queue must not be
2480                  * present in any other marked flow (RSS or not).
2481                  */
2482                 TAILQ_FOREACH(tmp, list, next) {
2483                         unsigned int j;
2484                         uint16_t *tqs = NULL;
2485                         uint16_t tq_n = 0;
2486
2487                         if (!tmp->mark)
2488                                 continue;
2489                         for (j = 0; j != hash_rxq_init_n; ++j) {
2490                                 if (!tmp->frxq[j].hrxq)
2491                                         continue;
2492                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2493                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2494                         }
2495                         if (!tq_n)
2496                                 continue;
2497                         for (j = 0; (j != tq_n) && !mark; j++)
2498                                 if (tqs[j] == (*flow->queues)[i])
2499                                         mark = 1;
2500                 }
2501                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2502         }
2503 free:
2504         if (flow->drop) {
2505                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2506                         claim_zero(mlx5_glue->destroy_flow
2507                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2508                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2509         } else {
2510                 for (i = 0; i != hash_rxq_init_n; ++i) {
2511                         struct mlx5_flow *frxq = &flow->frxq[i];
2512
2513                         if (frxq->ibv_flow)
2514                                 claim_zero(mlx5_glue->destroy_flow
2515                                            (frxq->ibv_flow));
2516                         if (frxq->hrxq)
2517                                 mlx5_hrxq_release(dev, frxq->hrxq);
2518                         if (frxq->ibv_attr)
2519                                 rte_free(frxq->ibv_attr);
2520                 }
2521         }
2522         if (flow->cs) {
2523                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2524                 flow->cs = NULL;
2525         }
2526         TAILQ_REMOVE(list, flow, next);
2527         DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2528                 (void *)flow);
2529         rte_free(flow);
2530 }
2531
2532 /**
2533  * Destroy all flows.
2534  *
2535  * @param dev
2536  *   Pointer to Ethernet device.
2537  * @param list
2538  *   Pointer to a TAILQ flow list.
2539  */
2540 void
2541 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2542 {
2543         while (!TAILQ_EMPTY(list)) {
2544                 struct rte_flow *flow;
2545
2546                 flow = TAILQ_FIRST(list);
2547                 mlx5_flow_list_destroy(dev, list, flow);
2548         }
2549 }
2550
2551 /**
2552  * Create drop queue.
2553  *
2554  * @param dev
2555  *   Pointer to Ethernet device.
2556  *
2557  * @return
2558  *   0 on success, a negative errno value otherwise and rte_errno is set.
2559  */
2560 int
2561 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2562 {
2563         struct priv *priv = dev->data->dev_private;
2564         struct mlx5_hrxq_drop *fdq = NULL;
2565
2566         assert(priv->pd);
2567         assert(priv->ctx);
2568         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2569         if (!fdq) {
2570                 DRV_LOG(WARNING,
2571                         "port %u cannot allocate memory for drop queue",
2572                         dev->data->port_id);
2573                 rte_errno = ENOMEM;
2574                 return -rte_errno;
2575         }
2576         fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2577         if (!fdq->cq) {
2578                 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2579                         dev->data->port_id);
2580                 rte_errno = errno;
2581                 goto error;
2582         }
2583         fdq->wq = mlx5_glue->create_wq
2584                 (priv->ctx,
2585                  &(struct ibv_wq_init_attr){
2586                         .wq_type = IBV_WQT_RQ,
2587                         .max_wr = 1,
2588                         .max_sge = 1,
2589                         .pd = priv->pd,
2590                         .cq = fdq->cq,
2591                  });
2592         if (!fdq->wq) {
2593                 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2594                         dev->data->port_id);
2595                 rte_errno = errno;
2596                 goto error;
2597         }
2598         fdq->ind_table = mlx5_glue->create_rwq_ind_table
2599                 (priv->ctx,
2600                  &(struct ibv_rwq_ind_table_init_attr){
2601                         .log_ind_tbl_size = 0,
2602                         .ind_tbl = &fdq->wq,
2603                         .comp_mask = 0,
2604                  });
2605         if (!fdq->ind_table) {
2606                 DRV_LOG(WARNING,
2607                         "port %u cannot allocate indirection table for drop"
2608                         " queue",
2609                         dev->data->port_id);
2610                 rte_errno = errno;
2611                 goto error;
2612         }
2613         fdq->qp = mlx5_glue->create_qp_ex
2614                 (priv->ctx,
2615                  &(struct ibv_qp_init_attr_ex){
2616                         .qp_type = IBV_QPT_RAW_PACKET,
2617                         .comp_mask =
2618                                 IBV_QP_INIT_ATTR_PD |
2619                                 IBV_QP_INIT_ATTR_IND_TABLE |
2620                                 IBV_QP_INIT_ATTR_RX_HASH,
2621                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2622                                 .rx_hash_function =
2623                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2624                                 .rx_hash_key_len = rss_hash_default_key_len,
2625                                 .rx_hash_key = rss_hash_default_key,
2626                                 .rx_hash_fields_mask = 0,
2627                                 },
2628                         .rwq_ind_tbl = fdq->ind_table,
2629                         .pd = priv->pd
2630                  });
2631         if (!fdq->qp) {
2632                 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2633                         dev->data->port_id);
2634                 rte_errno = errno;
2635                 goto error;
2636         }
2637         priv->flow_drop_queue = fdq;
2638         return 0;
2639 error:
2640         if (fdq->qp)
2641                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2642         if (fdq->ind_table)
2643                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2644         if (fdq->wq)
2645                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2646         if (fdq->cq)
2647                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2648         if (fdq)
2649                 rte_free(fdq);
2650         priv->flow_drop_queue = NULL;
2651         return -rte_errno;
2652 }
2653
2654 /**
2655  * Delete drop queue.
2656  *
2657  * @param dev
2658  *   Pointer to Ethernet device.
2659  */
2660 void
2661 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2662 {
2663         struct priv *priv = dev->data->dev_private;
2664         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2665
2666         if (!fdq)
2667                 return;
2668         if (fdq->qp)
2669                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2670         if (fdq->ind_table)
2671                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2672         if (fdq->wq)
2673                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2674         if (fdq->cq)
2675                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2676         rte_free(fdq);
2677         priv->flow_drop_queue = NULL;
2678 }
2679
2680 /**
2681  * Remove all flows.
2682  *
2683  * @param dev
2684  *   Pointer to Ethernet device.
2685  * @param list
2686  *   Pointer to a TAILQ flow list.
2687  */
2688 void
2689 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2690 {
2691         struct priv *priv = dev->data->dev_private;
2692         struct rte_flow *flow;
2693         unsigned int i;
2694
2695         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2696                 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2697
2698                 if (flow->drop) {
2699                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2700                                 continue;
2701                         claim_zero(mlx5_glue->destroy_flow
2702                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2703                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2704                         DRV_LOG(DEBUG, "port %u flow %p removed",
2705                                 dev->data->port_id, (void *)flow);
2706                         /* Next flow. */
2707                         continue;
2708                 }
2709                 /* Verify the flow has not already been cleaned. */
2710                 for (i = 0; i != hash_rxq_init_n; ++i) {
2711                         if (!flow->frxq[i].ibv_flow)
2712                                 continue;
2713                         /*
2714                          * Indirection table may be necessary to remove the
2715                          * flags in the Rx queues.
2716                          * This helps to speed-up the process by avoiding
2717                          * another loop.
2718                          */
2719                         ind_tbl = flow->frxq[i].hrxq->ind_table;
2720                         break;
2721                 }
2722                 if (i == hash_rxq_init_n)
2723                         return;
2724                 if (flow->mark) {
2725                         assert(ind_tbl);
2726                         for (i = 0; i != ind_tbl->queues_n; ++i)
2727                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2728                 }
2729                 for (i = 0; i != hash_rxq_init_n; ++i) {
2730                         if (!flow->frxq[i].ibv_flow)
2731                                 continue;
2732                         claim_zero(mlx5_glue->destroy_flow
2733                                    (flow->frxq[i].ibv_flow));
2734                         flow->frxq[i].ibv_flow = NULL;
2735                         mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2736                         flow->frxq[i].hrxq = NULL;
2737                 }
2738                 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2739                         (void *)flow);
2740         }
2741         /* Cleanup Rx queue tunnel info. */
2742         for (i = 0; i != priv->rxqs_n; ++i) {
2743                 struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2744                 struct mlx5_rxq_ctrl *rxq_ctrl =
2745                         container_of(q, struct mlx5_rxq_ctrl, rxq);
2746
2747                 if (!q)
2748                         continue;
2749                 memset((void *)rxq_ctrl->tunnel_types, 0,
2750                        sizeof(rxq_ctrl->tunnel_types));
2751                 q->tunnel = 0;
2752         }
2753 }
2754
2755 /**
2756  * Add all flows.
2757  *
2758  * @param dev
2759  *   Pointer to Ethernet device.
2760  * @param list
2761  *   Pointer to a TAILQ flow list.
2762  *
2763  * @return
2764  *   0 on success, a negative errno value otherwise and rte_errno is set.
2765  */
2766 int
2767 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2768 {
2769         struct priv *priv = dev->data->dev_private;
2770         struct rte_flow *flow;
2771
2772         TAILQ_FOREACH(flow, list, next) {
2773                 unsigned int i;
2774
2775                 if (flow->drop) {
2776                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2777                                 mlx5_glue->create_flow
2778                                 (priv->flow_drop_queue->qp,
2779                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2780                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2781                                 DRV_LOG(DEBUG,
2782                                         "port %u flow %p cannot be applied",
2783                                         dev->data->port_id, (void *)flow);
2784                                 rte_errno = EINVAL;
2785                                 return -rte_errno;
2786                         }
2787                         DRV_LOG(DEBUG, "port %u flow %p applied",
2788                                 dev->data->port_id, (void *)flow);
2789                         /* Next flow. */
2790                         continue;
2791                 }
2792                 for (i = 0; i != hash_rxq_init_n; ++i) {
2793                         if (!flow->frxq[i].ibv_attr)
2794                                 continue;
2795                         flow->frxq[i].hrxq =
2796                                 mlx5_hrxq_get(dev, flow->rss_conf.key,
2797                                               flow->rss_conf.key_len,
2798                                               flow->frxq[i].hash_fields,
2799                                               flow->rss_conf.queue,
2800                                               flow->rss_conf.queue_num,
2801                                               flow->tunnel,
2802                                               flow->rss_conf.level);
2803                         if (flow->frxq[i].hrxq)
2804                                 goto flow_create;
2805                         flow->frxq[i].hrxq =
2806                                 mlx5_hrxq_new(dev, flow->rss_conf.key,
2807                                               flow->rss_conf.key_len,
2808                                               flow->frxq[i].hash_fields,
2809                                               flow->rss_conf.queue,
2810                                               flow->rss_conf.queue_num,
2811                                               flow->tunnel,
2812                                               flow->rss_conf.level);
2813                         if (!flow->frxq[i].hrxq) {
2814                                 DRV_LOG(DEBUG,
2815                                         "port %u flow %p cannot create hash"
2816                                         " rxq",
2817                                         dev->data->port_id, (void *)flow);
2818                                 rte_errno = EINVAL;
2819                                 return -rte_errno;
2820                         }
2821 flow_create:
2822                         mlx5_flow_dump(dev, flow, i);
2823                         flow->frxq[i].ibv_flow =
2824                                 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2825                                                        flow->frxq[i].ibv_attr);
2826                         if (!flow->frxq[i].ibv_flow) {
2827                                 DRV_LOG(DEBUG,
2828                                         "port %u flow %p type %u cannot be"
2829                                         " applied",
2830                                         dev->data->port_id, (void *)flow, i);
2831                                 rte_errno = EINVAL;
2832                                 return -rte_errno;
2833                         }
2834                 }
2835                 mlx5_flow_create_update_rxqs(dev, flow);
2836         }
2837         return 0;
2838 }
2839
2840 /**
2841  * Verify the flow list is empty
2842  *
2843  * @param dev
2844  *  Pointer to Ethernet device.
2845  *
2846  * @return the number of flows not released.
2847  */
2848 int
2849 mlx5_flow_verify(struct rte_eth_dev *dev)
2850 {
2851         struct priv *priv = dev->data->dev_private;
2852         struct rte_flow *flow;
2853         int ret = 0;
2854
2855         TAILQ_FOREACH(flow, &priv->flows, next) {
2856                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2857                         dev->data->port_id, (void *)flow);
2858                 ++ret;
2859         }
2860         return ret;
2861 }
2862
2863 /**
2864  * Enable a control flow configured from the control plane.
2865  *
2866  * @param dev
2867  *   Pointer to Ethernet device.
2868  * @param eth_spec
2869  *   An Ethernet flow spec to apply.
2870  * @param eth_mask
2871  *   An Ethernet flow mask to apply.
2872  * @param vlan_spec
2873  *   A VLAN flow spec to apply.
2874  * @param vlan_mask
2875  *   A VLAN flow mask to apply.
2876  *
2877  * @return
2878  *   0 on success, a negative errno value otherwise and rte_errno is set.
2879  */
2880 int
2881 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2882                     struct rte_flow_item_eth *eth_spec,
2883                     struct rte_flow_item_eth *eth_mask,
2884                     struct rte_flow_item_vlan *vlan_spec,
2885                     struct rte_flow_item_vlan *vlan_mask)
2886 {
2887         struct priv *priv = dev->data->dev_private;
2888         const struct rte_flow_attr attr = {
2889                 .ingress = 1,
2890                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2891         };
2892         struct rte_flow_item items[] = {
2893                 {
2894                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2895                         .spec = eth_spec,
2896                         .last = NULL,
2897                         .mask = eth_mask,
2898                 },
2899                 {
2900                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2901                                 RTE_FLOW_ITEM_TYPE_END,
2902                         .spec = vlan_spec,
2903                         .last = NULL,
2904                         .mask = vlan_mask,
2905                 },
2906                 {
2907                         .type = RTE_FLOW_ITEM_TYPE_END,
2908                 },
2909         };
2910         uint16_t queue[priv->reta_idx_n];
2911         struct rte_flow_action_rss action_rss = {
2912                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2913                 .level = 0,
2914                 .types = priv->rss_conf.rss_hf,
2915                 .key_len = priv->rss_conf.rss_key_len,
2916                 .queue_num = priv->reta_idx_n,
2917                 .key = priv->rss_conf.rss_key,
2918                 .queue = queue,
2919         };
2920         struct rte_flow_action actions[] = {
2921                 {
2922                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2923                         .conf = &action_rss,
2924                 },
2925                 {
2926                         .type = RTE_FLOW_ACTION_TYPE_END,
2927                 },
2928         };
2929         struct rte_flow *flow;
2930         struct rte_flow_error error;
2931         unsigned int i;
2932
2933         if (!priv->reta_idx_n) {
2934                 rte_errno = EINVAL;
2935                 return -rte_errno;
2936         }
2937         for (i = 0; i != priv->reta_idx_n; ++i)
2938                 queue[i] = (*priv->reta_idx)[i];
2939         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2940                                      actions, &error);
2941         if (!flow)
2942                 return -rte_errno;
2943         return 0;
2944 }
2945
2946 /**
2947  * Enable a flow control configured from the control plane.
2948  *
2949  * @param dev
2950  *   Pointer to Ethernet device.
2951  * @param eth_spec
2952  *   An Ethernet flow spec to apply.
2953  * @param eth_mask
2954  *   An Ethernet flow mask to apply.
2955  *
2956  * @return
2957  *   0 on success, a negative errno value otherwise and rte_errno is set.
2958  */
2959 int
2960 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2961                struct rte_flow_item_eth *eth_spec,
2962                struct rte_flow_item_eth *eth_mask)
2963 {
2964         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2965 }
2966
2967 /**
2968  * Destroy a flow.
2969  *
2970  * @see rte_flow_destroy()
2971  * @see rte_flow_ops
2972  */
2973 int
2974 mlx5_flow_destroy(struct rte_eth_dev *dev,
2975                   struct rte_flow *flow,
2976                   struct rte_flow_error *error __rte_unused)
2977 {
2978         struct priv *priv = dev->data->dev_private;
2979
2980         mlx5_flow_list_destroy(dev, &priv->flows, flow);
2981         return 0;
2982 }
2983
2984 /**
2985  * Destroy all flows.
2986  *
2987  * @see rte_flow_flush()
2988  * @see rte_flow_ops
2989  */
2990 int
2991 mlx5_flow_flush(struct rte_eth_dev *dev,
2992                 struct rte_flow_error *error __rte_unused)
2993 {
2994         struct priv *priv = dev->data->dev_private;
2995
2996         mlx5_flow_list_flush(dev, &priv->flows);
2997         return 0;
2998 }
2999
3000 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3001 /**
3002  * Query flow counter.
3003  *
3004  * @param cs
3005  *   the counter set.
3006  * @param counter_value
3007  *   returned data from the counter.
3008  *
3009  * @return
3010  *   0 on success, a negative errno value otherwise and rte_errno is set.
3011  */
3012 static int
3013 mlx5_flow_query_count(struct ibv_counter_set *cs,
3014                       struct mlx5_flow_counter_stats *counter_stats,
3015                       struct rte_flow_query_count *query_count,
3016                       struct rte_flow_error *error)
3017 {
3018         uint64_t counters[2];
3019         struct ibv_query_counter_set_attr query_cs_attr = {
3020                 .cs = cs,
3021                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3022         };
3023         struct ibv_counter_set_data query_out = {
3024                 .out = counters,
3025                 .outlen = 2 * sizeof(uint64_t),
3026         };
3027         int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
3028
3029         if (err)
3030                 return rte_flow_error_set(error, err,
3031                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3032                                           NULL,
3033                                           "cannot read counter");
3034         query_count->hits_set = 1;
3035         query_count->bytes_set = 1;
3036         query_count->hits = counters[0] - counter_stats->hits;
3037         query_count->bytes = counters[1] - counter_stats->bytes;
3038         if (query_count->reset) {
3039                 counter_stats->hits = counters[0];
3040                 counter_stats->bytes = counters[1];
3041         }
3042         return 0;
3043 }
3044
3045 /**
3046  * Query a flows.
3047  *
3048  * @see rte_flow_query()
3049  * @see rte_flow_ops
3050  */
3051 int
3052 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3053                 struct rte_flow *flow,
3054                 const struct rte_flow_action *action __rte_unused,
3055                 void *data,
3056                 struct rte_flow_error *error)
3057 {
3058         if (flow->cs) {
3059                 int ret;
3060
3061                 ret = mlx5_flow_query_count(flow->cs,
3062                                             &flow->counter_stats,
3063                                             (struct rte_flow_query_count *)data,
3064                                             error);
3065                 if (ret)
3066                         return ret;
3067         } else {
3068                 return rte_flow_error_set(error, EINVAL,
3069                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3070                                           NULL,
3071                                           "no counter found for flow");
3072         }
3073         return 0;
3074 }
3075 #endif
3076
3077 /**
3078  * Isolated mode.
3079  *
3080  * @see rte_flow_isolate()
3081  * @see rte_flow_ops
3082  */
3083 int
3084 mlx5_flow_isolate(struct rte_eth_dev *dev,
3085                   int enable,
3086                   struct rte_flow_error *error)
3087 {
3088         struct priv *priv = dev->data->dev_private;
3089
3090         if (dev->data->dev_started) {
3091                 rte_flow_error_set(error, EBUSY,
3092                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3093                                    NULL,
3094                                    "port must be stopped first");
3095                 return -rte_errno;
3096         }
3097         priv->isolated = !!enable;
3098         if (enable)
3099                 dev->dev_ops = &mlx5_dev_ops_isolate;
3100         else
3101                 dev->dev_ops = &mlx5_dev_ops;
3102         return 0;
3103 }
3104
3105 /**
3106  * Convert a flow director filter to a generic flow.
3107  *
3108  * @param dev
3109  *   Pointer to Ethernet device.
3110  * @param fdir_filter
3111  *   Flow director filter to add.
3112  * @param attributes
3113  *   Generic flow parameters structure.
3114  *
3115  * @return
3116  *   0 on success, a negative errno value otherwise and rte_errno is set.
3117  */
3118 static int
3119 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3120                          const struct rte_eth_fdir_filter *fdir_filter,
3121                          struct mlx5_fdir *attributes)
3122 {
3123         struct priv *priv = dev->data->dev_private;
3124         const struct rte_eth_fdir_input *input = &fdir_filter->input;
3125         const struct rte_eth_fdir_masks *mask =
3126                 &dev->data->dev_conf.fdir_conf.mask;
3127
3128         /* Validate queue number. */
3129         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3130                 DRV_LOG(ERR, "port %u invalid queue number %d",
3131                         dev->data->port_id, fdir_filter->action.rx_queue);
3132                 rte_errno = EINVAL;
3133                 return -rte_errno;
3134         }
3135         attributes->attr.ingress = 1;
3136         attributes->items[0] = (struct rte_flow_item) {
3137                 .type = RTE_FLOW_ITEM_TYPE_ETH,
3138                 .spec = &attributes->l2,
3139                 .mask = &attributes->l2_mask,
3140         };
3141         switch (fdir_filter->action.behavior) {
3142         case RTE_ETH_FDIR_ACCEPT:
3143                 attributes->actions[0] = (struct rte_flow_action){
3144                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
3145                         .conf = &attributes->queue,
3146                 };
3147                 break;
3148         case RTE_ETH_FDIR_REJECT:
3149                 attributes->actions[0] = (struct rte_flow_action){
3150                         .type = RTE_FLOW_ACTION_TYPE_DROP,
3151                 };
3152                 break;
3153         default:
3154                 DRV_LOG(ERR, "port %u invalid behavior %d",
3155                         dev->data->port_id,
3156                         fdir_filter->action.behavior);
3157                 rte_errno = ENOTSUP;
3158                 return -rte_errno;
3159         }
3160         attributes->queue.index = fdir_filter->action.rx_queue;
3161         /* Handle L3. */
3162         switch (fdir_filter->input.flow_type) {
3163         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3164         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3165         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3166                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3167                         .src_addr = input->flow.ip4_flow.src_ip,
3168                         .dst_addr = input->flow.ip4_flow.dst_ip,
3169                         .time_to_live = input->flow.ip4_flow.ttl,
3170                         .type_of_service = input->flow.ip4_flow.tos,
3171                         .next_proto_id = input->flow.ip4_flow.proto,
3172                 };
3173                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3174                         .src_addr = mask->ipv4_mask.src_ip,
3175                         .dst_addr = mask->ipv4_mask.dst_ip,
3176                         .time_to_live = mask->ipv4_mask.ttl,
3177                         .type_of_service = mask->ipv4_mask.tos,
3178                         .next_proto_id = mask->ipv4_mask.proto,
3179                 };
3180                 attributes->items[1] = (struct rte_flow_item){
3181                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
3182                         .spec = &attributes->l3,
3183                         .mask = &attributes->l3_mask,
3184                 };
3185                 break;
3186         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3187         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3188         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3189                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3190                         .hop_limits = input->flow.ipv6_flow.hop_limits,
3191                         .proto = input->flow.ipv6_flow.proto,
3192                 };
3193
3194                 memcpy(attributes->l3.ipv6.hdr.src_addr,
3195                        input->flow.ipv6_flow.src_ip,
3196                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3197                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3198                        input->flow.ipv6_flow.dst_ip,
3199                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3200                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3201                        mask->ipv6_mask.src_ip,
3202                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3203                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3204                        mask->ipv6_mask.dst_ip,
3205                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3206                 attributes->items[1] = (struct rte_flow_item){
3207                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
3208                         .spec = &attributes->l3,
3209                         .mask = &attributes->l3_mask,
3210                 };
3211                 break;
3212         default:
3213                 DRV_LOG(ERR, "port %u invalid flow type%d",
3214                         dev->data->port_id, fdir_filter->input.flow_type);
3215                 rte_errno = ENOTSUP;
3216                 return -rte_errno;
3217         }
3218         /* Handle L4. */
3219         switch (fdir_filter->input.flow_type) {
3220         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3221                 attributes->l4.udp.hdr = (struct udp_hdr){
3222                         .src_port = input->flow.udp4_flow.src_port,
3223                         .dst_port = input->flow.udp4_flow.dst_port,
3224                 };
3225                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3226                         .src_port = mask->src_port_mask,
3227                         .dst_port = mask->dst_port_mask,
3228                 };
3229                 attributes->items[2] = (struct rte_flow_item){
3230                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3231                         .spec = &attributes->l4,
3232                         .mask = &attributes->l4_mask,
3233                 };
3234                 break;
3235         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3236                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3237                         .src_port = input->flow.tcp4_flow.src_port,
3238                         .dst_port = input->flow.tcp4_flow.dst_port,
3239                 };
3240                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3241                         .src_port = mask->src_port_mask,
3242                         .dst_port = mask->dst_port_mask,
3243                 };
3244                 attributes->items[2] = (struct rte_flow_item){
3245                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3246                         .spec = &attributes->l4,
3247                         .mask = &attributes->l4_mask,
3248                 };
3249                 break;
3250         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3251                 attributes->l4.udp.hdr = (struct udp_hdr){
3252                         .src_port = input->flow.udp6_flow.src_port,
3253                         .dst_port = input->flow.udp6_flow.dst_port,
3254                 };
3255                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3256                         .src_port = mask->src_port_mask,
3257                         .dst_port = mask->dst_port_mask,
3258                 };
3259                 attributes->items[2] = (struct rte_flow_item){
3260                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3261                         .spec = &attributes->l4,
3262                         .mask = &attributes->l4_mask,
3263                 };
3264                 break;
3265         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3266                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3267                         .src_port = input->flow.tcp6_flow.src_port,
3268                         .dst_port = input->flow.tcp6_flow.dst_port,
3269                 };
3270                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3271                         .src_port = mask->src_port_mask,
3272                         .dst_port = mask->dst_port_mask,
3273                 };
3274                 attributes->items[2] = (struct rte_flow_item){
3275                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3276                         .spec = &attributes->l4,
3277                         .mask = &attributes->l4_mask,
3278                 };
3279                 break;
3280         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3281         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3282                 break;
3283         default:
3284                 DRV_LOG(ERR, "port %u invalid flow type%d",
3285                         dev->data->port_id, fdir_filter->input.flow_type);
3286                 rte_errno = ENOTSUP;
3287                 return -rte_errno;
3288         }
3289         return 0;
3290 }
3291
3292 /**
3293  * Add new flow director filter and store it in list.
3294  *
3295  * @param dev
3296  *   Pointer to Ethernet device.
3297  * @param fdir_filter
3298  *   Flow director filter to add.
3299  *
3300  * @return
3301  *   0 on success, a negative errno value otherwise and rte_errno is set.
3302  */
3303 static int
3304 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3305                      const struct rte_eth_fdir_filter *fdir_filter)
3306 {
3307         struct priv *priv = dev->data->dev_private;
3308         struct mlx5_fdir attributes = {
3309                 .attr.group = 0,
3310                 .l2_mask = {
3311                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3312                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3313                         .type = 0,
3314                 },
3315         };
3316         struct mlx5_flow_parse parser = {
3317                 .layer = HASH_RXQ_ETH,
3318         };
3319         struct rte_flow_error error;
3320         struct rte_flow *flow;
3321         int ret;
3322
3323         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3324         if (ret)
3325                 return ret;
3326         ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3327                                 attributes.actions, &error, &parser);
3328         if (ret)
3329                 return ret;
3330         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3331                                      attributes.items, attributes.actions,
3332                                      &error);
3333         if (flow) {
3334                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3335                         (void *)flow);
3336                 return 0;
3337         }
3338         return -rte_errno;
3339 }
3340
3341 /**
3342  * Delete specific filter.
3343  *
3344  * @param dev
3345  *   Pointer to Ethernet device.
3346  * @param fdir_filter
3347  *   Filter to be deleted.
3348  *
3349  * @return
3350  *   0 on success, a negative errno value otherwise and rte_errno is set.
3351  */
3352 static int
3353 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3354                         const struct rte_eth_fdir_filter *fdir_filter)
3355 {
3356         struct priv *priv = dev->data->dev_private;
3357         struct mlx5_fdir attributes = {
3358                 .attr.group = 0,
3359         };
3360         struct mlx5_flow_parse parser = {
3361                 .create = 1,
3362                 .layer = HASH_RXQ_ETH,
3363         };
3364         struct rte_flow_error error;
3365         struct rte_flow *flow;
3366         unsigned int i;
3367         int ret;
3368
3369         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3370         if (ret)
3371                 return ret;
3372         ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3373                                 attributes.actions, &error, &parser);
3374         if (ret)
3375                 goto exit;
3376         /*
3377          * Special case for drop action which is only set in the
3378          * specifications when the flow is created.  In this situation the
3379          * drop specification is missing.
3380          */
3381         if (parser.drop) {
3382                 struct ibv_flow_spec_action_drop *drop;
3383
3384                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
3385                                 parser.queue[HASH_RXQ_ETH].offset);
3386                 *drop = (struct ibv_flow_spec_action_drop){
3387                         .type = IBV_FLOW_SPEC_ACTION_DROP,
3388                         .size = sizeof(struct ibv_flow_spec_action_drop),
3389                 };
3390                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
3391         }
3392         TAILQ_FOREACH(flow, &priv->flows, next) {
3393                 struct ibv_flow_attr *attr;
3394                 struct ibv_spec_header *attr_h;
3395                 void *spec;
3396                 struct ibv_flow_attr *flow_attr;
3397                 struct ibv_spec_header *flow_h;
3398                 void *flow_spec;
3399                 unsigned int specs_n;
3400                 unsigned int queue_id = parser.drop ? HASH_RXQ_ETH :
3401                                                       parser.layer;
3402
3403                 attr = parser.queue[queue_id].ibv_attr;
3404                 flow_attr = flow->frxq[queue_id].ibv_attr;
3405                 /* Compare first the attributes. */
3406                 if (!flow_attr ||
3407                     memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3408                         continue;
3409                 if (attr->num_of_specs == 0)
3410                         continue;
3411                 spec = (void *)((uintptr_t)attr +
3412                                 sizeof(struct ibv_flow_attr));
3413                 flow_spec = (void *)((uintptr_t)flow_attr +
3414                                      sizeof(struct ibv_flow_attr));
3415                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3416                 for (i = 0; i != specs_n; ++i) {
3417                         attr_h = spec;
3418                         flow_h = flow_spec;
3419                         if (memcmp(spec, flow_spec,
3420                                    RTE_MIN(attr_h->size, flow_h->size)))
3421                                 goto wrong_flow;
3422                         spec = (void *)((uintptr_t)spec + attr_h->size);
3423                         flow_spec = (void *)((uintptr_t)flow_spec +
3424                                              flow_h->size);
3425                 }
3426                 /* At this point, the flow match. */
3427                 break;
3428 wrong_flow:
3429                 /* The flow does not match. */
3430                 continue;
3431         }
3432         ret = rte_errno; /* Save rte_errno before cleanup. */
3433         if (flow)
3434                 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3435 exit:
3436         for (i = 0; i != hash_rxq_init_n; ++i) {
3437                 if (parser.queue[i].ibv_attr)
3438                         rte_free(parser.queue[i].ibv_attr);
3439         }
3440         rte_errno = ret; /* Restore rte_errno. */
3441         return -rte_errno;
3442 }
3443
3444 /**
3445  * Update queue for specific filter.
3446  *
3447  * @param dev
3448  *   Pointer to Ethernet device.
3449  * @param fdir_filter
3450  *   Filter to be updated.
3451  *
3452  * @return
3453  *   0 on success, a negative errno value otherwise and rte_errno is set.
3454  */
3455 static int
3456 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3457                         const struct rte_eth_fdir_filter *fdir_filter)
3458 {
3459         int ret;
3460
3461         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3462         if (ret)
3463                 return ret;
3464         return mlx5_fdir_filter_add(dev, fdir_filter);
3465 }
3466
3467 /**
3468  * Flush all filters.
3469  *
3470  * @param dev
3471  *   Pointer to Ethernet device.
3472  */
3473 static void
3474 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3475 {
3476         struct priv *priv = dev->data->dev_private;
3477
3478         mlx5_flow_list_flush(dev, &priv->flows);
3479 }
3480
3481 /**
3482  * Get flow director information.
3483  *
3484  * @param dev
3485  *   Pointer to Ethernet device.
3486  * @param[out] fdir_info
3487  *   Resulting flow director information.
3488  */
3489 static void
3490 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3491 {
3492         struct rte_eth_fdir_masks *mask =
3493                 &dev->data->dev_conf.fdir_conf.mask;
3494
3495         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3496         fdir_info->guarant_spc = 0;
3497         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3498         fdir_info->max_flexpayload = 0;
3499         fdir_info->flow_types_mask[0] = 0;
3500         fdir_info->flex_payload_unit = 0;
3501         fdir_info->max_flex_payload_segment_num = 0;
3502         fdir_info->flex_payload_limit = 0;
3503         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3504 }
3505
3506 /**
3507  * Deal with flow director operations.
3508  *
3509  * @param dev
3510  *   Pointer to Ethernet device.
3511  * @param filter_op
3512  *   Operation to perform.
3513  * @param arg
3514  *   Pointer to operation-specific structure.
3515  *
3516  * @return
3517  *   0 on success, a negative errno value otherwise and rte_errno is set.
3518  */
3519 static int
3520 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3521                     void *arg)
3522 {
3523         enum rte_fdir_mode fdir_mode =
3524                 dev->data->dev_conf.fdir_conf.mode;
3525
3526         if (filter_op == RTE_ETH_FILTER_NOP)
3527                 return 0;
3528         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3529             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3530                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3531                         dev->data->port_id, fdir_mode);
3532                 rte_errno = EINVAL;
3533                 return -rte_errno;
3534         }
3535         switch (filter_op) {
3536         case RTE_ETH_FILTER_ADD:
3537                 return mlx5_fdir_filter_add(dev, arg);
3538         case RTE_ETH_FILTER_UPDATE:
3539                 return mlx5_fdir_filter_update(dev, arg);
3540         case RTE_ETH_FILTER_DELETE:
3541                 return mlx5_fdir_filter_delete(dev, arg);
3542         case RTE_ETH_FILTER_FLUSH:
3543                 mlx5_fdir_filter_flush(dev);
3544                 break;
3545         case RTE_ETH_FILTER_INFO:
3546                 mlx5_fdir_info_get(dev, arg);
3547                 break;
3548         default:
3549                 DRV_LOG(DEBUG, "port %u unknown operation %u",
3550                         dev->data->port_id, filter_op);
3551                 rte_errno = EINVAL;
3552                 return -rte_errno;
3553         }
3554         return 0;
3555 }
3556
3557 /**
3558  * Manage filter operations.
3559  *
3560  * @param dev
3561  *   Pointer to Ethernet device structure.
3562  * @param filter_type
3563  *   Filter type.
3564  * @param filter_op
3565  *   Operation to perform.
3566  * @param arg
3567  *   Pointer to operation-specific structure.
3568  *
3569  * @return
3570  *   0 on success, a negative errno value otherwise and rte_errno is set.
3571  */
3572 int
3573 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3574                      enum rte_filter_type filter_type,
3575                      enum rte_filter_op filter_op,
3576                      void *arg)
3577 {
3578         switch (filter_type) {
3579         case RTE_ETH_FILTER_GENERIC:
3580                 if (filter_op != RTE_ETH_FILTER_GET) {
3581                         rte_errno = EINVAL;
3582                         return -rte_errno;
3583                 }
3584                 *(const void **)arg = &mlx5_flow_ops;
3585                 return 0;
3586         case RTE_ETH_FILTER_FDIR:
3587                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3588         default:
3589                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3590                         dev->data->port_id, filter_type);
3591                 rte_errno = ENOTSUP;
3592                 return -rte_errno;
3593         }
3594         return 0;
3595 }
3596
3597 /**
3598  * Detect number of Verbs flow priorities supported.
3599  *
3600  * @param dev
3601  *   Pointer to Ethernet device.
3602  *
3603  * @return
3604  *   number of supported Verbs flow priority.
3605  */
3606 unsigned int
3607 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3608 {
3609         struct priv *priv = dev->data->dev_private;
3610         unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3611         struct {
3612                 struct ibv_flow_attr attr;
3613                 struct ibv_flow_spec_eth eth;
3614                 struct ibv_flow_spec_action_drop drop;
3615         } flow_attr = {
3616                 .attr = {
3617                         .num_of_specs = 2,
3618                 },
3619                 .eth = {
3620                         .type = IBV_FLOW_SPEC_ETH,
3621                         .size = sizeof(struct ibv_flow_spec_eth),
3622                 },
3623                 .drop = {
3624                         .size = sizeof(struct ibv_flow_spec_action_drop),
3625                         .type = IBV_FLOW_SPEC_ACTION_DROP,
3626                 },
3627         };
3628         struct ibv_flow *flow;
3629
3630         do {
3631                 flow_attr.attr.priority = verb_priorities - 1;
3632                 flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3633                                               &flow_attr.attr);
3634                 if (flow) {
3635                         claim_zero(mlx5_glue->destroy_flow(flow));
3636                         /* Try more priorities. */
3637                         verb_priorities *= 2;
3638                 } else {
3639                         /* Failed, restore last right number. */
3640                         verb_priorities /= 2;
3641                         break;
3642                 }
3643         } while (1);
3644         DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3645                 " user flow priorities: %d",
3646                 dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3647         return verb_priorities;
3648 }