net/mlx5: introduce VXLAN-GPE tunnel type
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
36
37 /* Internet Protocol versions. */
38 #define MLX5_IPV4 4
39 #define MLX5_IPV6 6
40 #define MLX5_GRE 47
41
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
44         int dummy;
45 };
46 #endif
47
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
51
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54         struct rte_eth_dev *dev; /** Ethernet device. */
55         struct mlx5_flow_parse *parser; /** Parser context. */
56         struct rte_flow_error *error; /** Error context. */
57 };
58
59 static int
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61                      const void *default_mask,
62                      struct mlx5_flow_data *data);
63
64 static int
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66                       const void *default_mask,
67                       struct mlx5_flow_data *data);
68
69 static int
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71                       const void *default_mask,
72                       struct mlx5_flow_data *data);
73
74 static int
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76                       const void *default_mask,
77                       struct mlx5_flow_data *data);
78
79 static int
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81                      const void *default_mask,
82                      struct mlx5_flow_data *data);
83
84 static int
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86                      const void *default_mask,
87                      struct mlx5_flow_data *data);
88
89 static int
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91                        const void *default_mask,
92                        struct mlx5_flow_data *data);
93
94 static int
95 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
96                            const void *default_mask,
97                            struct mlx5_flow_data *data);
98
99 static int
100 mlx5_flow_create_gre(const struct rte_flow_item *item,
101                      const void *default_mask,
102                      struct mlx5_flow_data *data);
103
104 struct mlx5_flow_parse;
105
106 static void
107 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
108                       unsigned int size);
109
110 static int
111 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
112
113 static int
114 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
115
116 /* Hash RX queue types. */
117 enum hash_rxq_type {
118         HASH_RXQ_TCPV4,
119         HASH_RXQ_UDPV4,
120         HASH_RXQ_IPV4,
121         HASH_RXQ_TCPV6,
122         HASH_RXQ_UDPV6,
123         HASH_RXQ_IPV6,
124         HASH_RXQ_ETH,
125         HASH_RXQ_TUNNEL,
126 };
127
128 /* Initialization data for hash RX queue. */
129 struct hash_rxq_init {
130         uint64_t hash_fields; /* Fields that participate in the hash. */
131         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
132         unsigned int flow_priority; /* Flow priority to use. */
133         unsigned int ip_version; /* Internet protocol. */
134 };
135
136 /* Initialization data for hash RX queues. */
137 const struct hash_rxq_init hash_rxq_init[] = {
138         [HASH_RXQ_TCPV4] = {
139                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
140                                 IBV_RX_HASH_DST_IPV4 |
141                                 IBV_RX_HASH_SRC_PORT_TCP |
142                                 IBV_RX_HASH_DST_PORT_TCP),
143                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
144                 .flow_priority = 0,
145                 .ip_version = MLX5_IPV4,
146         },
147         [HASH_RXQ_UDPV4] = {
148                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
149                                 IBV_RX_HASH_DST_IPV4 |
150                                 IBV_RX_HASH_SRC_PORT_UDP |
151                                 IBV_RX_HASH_DST_PORT_UDP),
152                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
153                 .flow_priority = 0,
154                 .ip_version = MLX5_IPV4,
155         },
156         [HASH_RXQ_IPV4] = {
157                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
158                                 IBV_RX_HASH_DST_IPV4),
159                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
160                                 ETH_RSS_FRAG_IPV4),
161                 .flow_priority = 1,
162                 .ip_version = MLX5_IPV4,
163         },
164         [HASH_RXQ_TCPV6] = {
165                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
166                                 IBV_RX_HASH_DST_IPV6 |
167                                 IBV_RX_HASH_SRC_PORT_TCP |
168                                 IBV_RX_HASH_DST_PORT_TCP),
169                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
170                 .flow_priority = 0,
171                 .ip_version = MLX5_IPV6,
172         },
173         [HASH_RXQ_UDPV6] = {
174                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
175                                 IBV_RX_HASH_DST_IPV6 |
176                                 IBV_RX_HASH_SRC_PORT_UDP |
177                                 IBV_RX_HASH_DST_PORT_UDP),
178                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
179                 .flow_priority = 0,
180                 .ip_version = MLX5_IPV6,
181         },
182         [HASH_RXQ_IPV6] = {
183                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
184                                 IBV_RX_HASH_DST_IPV6),
185                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
186                                 ETH_RSS_FRAG_IPV6),
187                 .flow_priority = 1,
188                 .ip_version = MLX5_IPV6,
189         },
190         [HASH_RXQ_ETH] = {
191                 .hash_fields = 0,
192                 .dpdk_rss_hf = 0,
193                 .flow_priority = 2,
194         },
195 };
196
197 /* Number of entries in hash_rxq_init[]. */
198 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
199
200 /** Structure for holding counter stats. */
201 struct mlx5_flow_counter_stats {
202         uint64_t hits; /**< Number of packets matched by the rule. */
203         uint64_t bytes; /**< Number of bytes matched by the rule. */
204 };
205
206 /** Structure for Drop queue. */
207 struct mlx5_hrxq_drop {
208         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
209         struct ibv_qp *qp; /**< Verbs queue pair. */
210         struct ibv_wq *wq; /**< Verbs work queue. */
211         struct ibv_cq *cq; /**< Verbs completion queue. */
212 };
213
214 /* Flows structures. */
215 struct mlx5_flow {
216         uint64_t hash_fields; /**< Fields that participate in the hash. */
217         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
218         struct ibv_flow *ibv_flow; /**< Verbs flow. */
219         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
220 };
221
222 /* Drop flows structures. */
223 struct mlx5_flow_drop {
224         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
225         struct ibv_flow *ibv_flow; /**< Verbs flow. */
226 };
227
228 struct rte_flow {
229         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
230         uint32_t mark:1; /**< Set if the flow is marked. */
231         uint32_t drop:1; /**< Drop queue. */
232         struct rte_flow_action_rss rss_conf; /**< RSS configuration */
233         uint16_t (*queues)[]; /**< Queues indexes to use. */
234         uint8_t rss_key[40]; /**< copy of the RSS key. */
235         uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
236         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
237         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
238         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
239         /**< Flow with Rx queue. */
240 };
241
242 /** Static initializer for items. */
243 #define ITEMS(...) \
244         (const enum rte_flow_item_type []){ \
245                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
246         }
247
248 #define IS_TUNNEL(type) ( \
249         (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
250         (type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
251         (type) == RTE_FLOW_ITEM_TYPE_GRE)
252
253 const uint32_t flow_ptype[] = {
254         [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
255         [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
256         [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
257 };
258
259 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
260
261 const uint32_t ptype_ext[] = {
262         [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
263                                               RTE_PTYPE_L4_UDP,
264         [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)] = RTE_PTYPE_TUNNEL_VXLAN_GPE |
265                                                   RTE_PTYPE_L4_UDP,
266         [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
267 };
268
269 /** Structure to generate a simple graph of layers supported by the NIC. */
270 struct mlx5_flow_items {
271         /** List of possible actions for these items. */
272         const enum rte_flow_action_type *const actions;
273         /** Bit-masks corresponding to the possibilities for the item. */
274         const void *mask;
275         /**
276          * Default bit-masks to use when item->mask is not provided. When
277          * \default_mask is also NULL, the full supported bit-mask (\mask) is
278          * used instead.
279          */
280         const void *default_mask;
281         /** Bit-masks size in bytes. */
282         const unsigned int mask_sz;
283         /**
284          * Conversion function from rte_flow to NIC specific flow.
285          *
286          * @param item
287          *   rte_flow item to convert.
288          * @param default_mask
289          *   Default bit-masks to use when item->mask is not provided.
290          * @param data
291          *   Internal structure to store the conversion.
292          *
293          * @return
294          *   0 on success, a negative errno value otherwise and rte_errno is
295          *   set.
296          */
297         int (*convert)(const struct rte_flow_item *item,
298                        const void *default_mask,
299                        struct mlx5_flow_data *data);
300         /** Size in bytes of the destination structure. */
301         const unsigned int dst_sz;
302         /** List of possible following items.  */
303         const enum rte_flow_item_type *const items;
304 };
305
306 /** Valid action for this PMD. */
307 static const enum rte_flow_action_type valid_actions[] = {
308         RTE_FLOW_ACTION_TYPE_DROP,
309         RTE_FLOW_ACTION_TYPE_QUEUE,
310         RTE_FLOW_ACTION_TYPE_MARK,
311         RTE_FLOW_ACTION_TYPE_FLAG,
312 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
313         RTE_FLOW_ACTION_TYPE_COUNT,
314 #endif
315         RTE_FLOW_ACTION_TYPE_END,
316 };
317
318 /** Graph of supported items and associated actions. */
319 static const struct mlx5_flow_items mlx5_flow_items[] = {
320         [RTE_FLOW_ITEM_TYPE_END] = {
321                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
322                                RTE_FLOW_ITEM_TYPE_VXLAN,
323                                RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
324                                RTE_FLOW_ITEM_TYPE_GRE),
325         },
326         [RTE_FLOW_ITEM_TYPE_ETH] = {
327                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
328                                RTE_FLOW_ITEM_TYPE_IPV4,
329                                RTE_FLOW_ITEM_TYPE_IPV6),
330                 .actions = valid_actions,
331                 .mask = &(const struct rte_flow_item_eth){
332                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
333                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
334                         .type = -1,
335                 },
336                 .default_mask = &rte_flow_item_eth_mask,
337                 .mask_sz = sizeof(struct rte_flow_item_eth),
338                 .convert = mlx5_flow_create_eth,
339                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
340         },
341         [RTE_FLOW_ITEM_TYPE_VLAN] = {
342                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
343                                RTE_FLOW_ITEM_TYPE_IPV6),
344                 .actions = valid_actions,
345                 .mask = &(const struct rte_flow_item_vlan){
346                         .tci = -1,
347                         .inner_type = -1,
348                 },
349                 .default_mask = &rte_flow_item_vlan_mask,
350                 .mask_sz = sizeof(struct rte_flow_item_vlan),
351                 .convert = mlx5_flow_create_vlan,
352                 .dst_sz = 0,
353         },
354         [RTE_FLOW_ITEM_TYPE_IPV4] = {
355                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
356                                RTE_FLOW_ITEM_TYPE_TCP,
357                                RTE_FLOW_ITEM_TYPE_GRE),
358                 .actions = valid_actions,
359                 .mask = &(const struct rte_flow_item_ipv4){
360                         .hdr = {
361                                 .src_addr = -1,
362                                 .dst_addr = -1,
363                                 .type_of_service = -1,
364                                 .next_proto_id = -1,
365                         },
366                 },
367                 .default_mask = &rte_flow_item_ipv4_mask,
368                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
369                 .convert = mlx5_flow_create_ipv4,
370                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
371         },
372         [RTE_FLOW_ITEM_TYPE_IPV6] = {
373                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
374                                RTE_FLOW_ITEM_TYPE_TCP,
375                                RTE_FLOW_ITEM_TYPE_GRE),
376                 .actions = valid_actions,
377                 .mask = &(const struct rte_flow_item_ipv6){
378                         .hdr = {
379                                 .src_addr = {
380                                         0xff, 0xff, 0xff, 0xff,
381                                         0xff, 0xff, 0xff, 0xff,
382                                         0xff, 0xff, 0xff, 0xff,
383                                         0xff, 0xff, 0xff, 0xff,
384                                 },
385                                 .dst_addr = {
386                                         0xff, 0xff, 0xff, 0xff,
387                                         0xff, 0xff, 0xff, 0xff,
388                                         0xff, 0xff, 0xff, 0xff,
389                                         0xff, 0xff, 0xff, 0xff,
390                                 },
391                                 .vtc_flow = -1,
392                                 .proto = -1,
393                                 .hop_limits = -1,
394                         },
395                 },
396                 .default_mask = &rte_flow_item_ipv6_mask,
397                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
398                 .convert = mlx5_flow_create_ipv6,
399                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
400         },
401         [RTE_FLOW_ITEM_TYPE_UDP] = {
402                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
403                                RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
404                 .actions = valid_actions,
405                 .mask = &(const struct rte_flow_item_udp){
406                         .hdr = {
407                                 .src_port = -1,
408                                 .dst_port = -1,
409                         },
410                 },
411                 .default_mask = &rte_flow_item_udp_mask,
412                 .mask_sz = sizeof(struct rte_flow_item_udp),
413                 .convert = mlx5_flow_create_udp,
414                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
415         },
416         [RTE_FLOW_ITEM_TYPE_TCP] = {
417                 .actions = valid_actions,
418                 .mask = &(const struct rte_flow_item_tcp){
419                         .hdr = {
420                                 .src_port = -1,
421                                 .dst_port = -1,
422                         },
423                 },
424                 .default_mask = &rte_flow_item_tcp_mask,
425                 .mask_sz = sizeof(struct rte_flow_item_tcp),
426                 .convert = mlx5_flow_create_tcp,
427                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
428         },
429         [RTE_FLOW_ITEM_TYPE_GRE] = {
430                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
431                                RTE_FLOW_ITEM_TYPE_IPV4,
432                                RTE_FLOW_ITEM_TYPE_IPV6),
433                 .actions = valid_actions,
434                 .mask = &(const struct rte_flow_item_gre){
435                         .protocol = -1,
436                 },
437                 .default_mask = &rte_flow_item_gre_mask,
438                 .mask_sz = sizeof(struct rte_flow_item_gre),
439                 .convert = mlx5_flow_create_gre,
440                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
441         },
442         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
443                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
444                                RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
445                                RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
446                 .actions = valid_actions,
447                 .mask = &(const struct rte_flow_item_vxlan){
448                         .vni = "\xff\xff\xff",
449                 },
450                 .default_mask = &rte_flow_item_vxlan_mask,
451                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
452                 .convert = mlx5_flow_create_vxlan,
453                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
454         },
455         [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = {
456                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
457                                RTE_FLOW_ITEM_TYPE_IPV4,
458                                RTE_FLOW_ITEM_TYPE_IPV6),
459                 .actions = valid_actions,
460                 .mask = &(const struct rte_flow_item_vxlan_gpe){
461                         .vni = "\xff\xff\xff",
462                 },
463                 .default_mask = &rte_flow_item_vxlan_gpe_mask,
464                 .mask_sz = sizeof(struct rte_flow_item_vxlan_gpe),
465                 .convert = mlx5_flow_create_vxlan_gpe,
466                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
467         },
468 };
469
470 /** Structure to pass to the conversion function. */
471 struct mlx5_flow_parse {
472         uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
473         uint32_t create:1;
474         /**< Whether resources should remain after a validate. */
475         uint32_t drop:1; /**< Target is a drop queue. */
476         uint32_t mark:1; /**< Mark is present in the flow. */
477         uint32_t count:1; /**< Count is present in the flow. */
478         uint32_t mark_id; /**< Mark identifier. */
479         struct rte_flow_action_rss rss_conf; /**< RSS configuration */
480         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
481         uint8_t rss_key[40]; /**< copy of the RSS key. */
482         enum hash_rxq_type layer; /**< Last pattern layer detected. */
483         enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
484         uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
485         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
486         struct {
487                 struct ibv_flow_attr *ibv_attr;
488                 /**< Pointer to Verbs attributes. */
489                 unsigned int offset;
490                 /**< Current position or total size of the attribute. */
491                 uint64_t hash_fields; /**< Verbs hash fields. */
492         } queue[RTE_DIM(hash_rxq_init)];
493 };
494
495 static const struct rte_flow_ops mlx5_flow_ops = {
496         .validate = mlx5_flow_validate,
497         .create = mlx5_flow_create,
498         .destroy = mlx5_flow_destroy,
499         .flush = mlx5_flow_flush,
500 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
501         .query = mlx5_flow_query,
502 #else
503         .query = NULL,
504 #endif
505         .isolate = mlx5_flow_isolate,
506 };
507
508 /* Convert FDIR request to Generic flow. */
509 struct mlx5_fdir {
510         struct rte_flow_attr attr;
511         struct rte_flow_action actions[2];
512         struct rte_flow_item items[4];
513         struct rte_flow_item_eth l2;
514         struct rte_flow_item_eth l2_mask;
515         union {
516                 struct rte_flow_item_ipv4 ipv4;
517                 struct rte_flow_item_ipv6 ipv6;
518         } l3;
519         union {
520                 struct rte_flow_item_ipv4 ipv4;
521                 struct rte_flow_item_ipv6 ipv6;
522         } l3_mask;
523         union {
524                 struct rte_flow_item_udp udp;
525                 struct rte_flow_item_tcp tcp;
526         } l4;
527         union {
528                 struct rte_flow_item_udp udp;
529                 struct rte_flow_item_tcp tcp;
530         } l4_mask;
531         struct rte_flow_action_queue queue;
532 };
533
534 /* Verbs specification header. */
535 struct ibv_spec_header {
536         enum ibv_flow_spec_type type;
537         uint16_t size;
538 };
539
540 /**
541  * Check support for a given item.
542  *
543  * @param item[in]
544  *   Item specification.
545  * @param mask[in]
546  *   Bit-masks covering supported fields to compare with spec, last and mask in
547  *   \item.
548  * @param size
549  *   Bit-Mask size in bytes.
550  *
551  * @return
552  *   0 on success, a negative errno value otherwise and rte_errno is set.
553  */
554 static int
555 mlx5_flow_item_validate(const struct rte_flow_item *item,
556                         const uint8_t *mask, unsigned int size)
557 {
558         if (!item->spec && (item->mask || item->last)) {
559                 rte_errno = EINVAL;
560                 return -rte_errno;
561         }
562         if (item->spec && !item->mask) {
563                 unsigned int i;
564                 const uint8_t *spec = item->spec;
565
566                 for (i = 0; i < size; ++i)
567                         if ((spec[i] | mask[i]) != mask[i]) {
568                                 rte_errno = EINVAL;
569                                 return -rte_errno;
570                         }
571         }
572         if (item->last && !item->mask) {
573                 unsigned int i;
574                 const uint8_t *spec = item->last;
575
576                 for (i = 0; i < size; ++i)
577                         if ((spec[i] | mask[i]) != mask[i]) {
578                                 rte_errno = EINVAL;
579                                 return -rte_errno;
580                         }
581         }
582         if (item->mask) {
583                 unsigned int i;
584                 const uint8_t *spec = item->spec;
585
586                 for (i = 0; i < size; ++i)
587                         if ((spec[i] | mask[i]) != mask[i]) {
588                                 rte_errno = EINVAL;
589                                 return -rte_errno;
590                         }
591         }
592         if (item->spec && item->last) {
593                 uint8_t spec[size];
594                 uint8_t last[size];
595                 const uint8_t *apply = mask;
596                 unsigned int i;
597                 int ret;
598
599                 if (item->mask)
600                         apply = item->mask;
601                 for (i = 0; i < size; ++i) {
602                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
603                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
604                 }
605                 ret = memcmp(spec, last, size);
606                 if (ret != 0) {
607                         rte_errno = EINVAL;
608                         return -rte_errno;
609                 }
610         }
611         return 0;
612 }
613
614 /**
615  * Extract attribute to the parser.
616  *
617  * @param[in] attr
618  *   Flow rule attributes.
619  * @param[out] error
620  *   Perform verbose error reporting if not NULL.
621  *
622  * @return
623  *   0 on success, a negative errno value otherwise and rte_errno is set.
624  */
625 static int
626 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
627                              struct rte_flow_error *error)
628 {
629         if (attr->group) {
630                 rte_flow_error_set(error, ENOTSUP,
631                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
632                                    NULL,
633                                    "groups are not supported");
634                 return -rte_errno;
635         }
636         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
637                 rte_flow_error_set(error, ENOTSUP,
638                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
639                                    NULL,
640                                    "priorities are not supported");
641                 return -rte_errno;
642         }
643         if (attr->egress) {
644                 rte_flow_error_set(error, ENOTSUP,
645                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
646                                    NULL,
647                                    "egress is not supported");
648                 return -rte_errno;
649         }
650         if (attr->transfer) {
651                 rte_flow_error_set(error, ENOTSUP,
652                                    RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
653                                    NULL,
654                                    "transfer is not supported");
655                 return -rte_errno;
656         }
657         if (!attr->ingress) {
658                 rte_flow_error_set(error, ENOTSUP,
659                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
660                                    NULL,
661                                    "only ingress is supported");
662                 return -rte_errno;
663         }
664         return 0;
665 }
666
667 /**
668  * Extract actions request to the parser.
669  *
670  * @param dev
671  *   Pointer to Ethernet device.
672  * @param[in] actions
673  *   Associated actions (list terminated by the END action).
674  * @param[out] error
675  *   Perform verbose error reporting if not NULL.
676  * @param[in, out] parser
677  *   Internal parser structure.
678  *
679  * @return
680  *   0 on success, a negative errno value otherwise and rte_errno is set.
681  */
682 static int
683 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
684                           const struct rte_flow_action actions[],
685                           struct rte_flow_error *error,
686                           struct mlx5_flow_parse *parser)
687 {
688         enum { FATE = 1, MARK = 2, COUNT = 4, };
689         uint32_t overlap = 0;
690         struct priv *priv = dev->data->dev_private;
691
692         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
693                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
694                         continue;
695                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
696                         if (overlap & FATE)
697                                 goto exit_action_overlap;
698                         overlap |= FATE;
699                         parser->drop = 1;
700                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
701                         const struct rte_flow_action_queue *queue =
702                                 (const struct rte_flow_action_queue *)
703                                 actions->conf;
704
705                         if (overlap & FATE)
706                                 goto exit_action_overlap;
707                         overlap |= FATE;
708                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
709                                 goto exit_action_not_supported;
710                         parser->queues[0] = queue->index;
711                         parser->rss_conf = (struct rte_flow_action_rss){
712                                 .queue_num = 1,
713                                 .queue = parser->queues,
714                         };
715                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
716                         const struct rte_flow_action_rss *rss =
717                                 (const struct rte_flow_action_rss *)
718                                 actions->conf;
719                         const uint8_t *rss_key;
720                         uint32_t rss_key_len;
721                         uint16_t n;
722
723                         if (overlap & FATE)
724                                 goto exit_action_overlap;
725                         overlap |= FATE;
726                         if (rss->func &&
727                             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
728                                 rte_flow_error_set(error, EINVAL,
729                                                    RTE_FLOW_ERROR_TYPE_ACTION,
730                                                    actions,
731                                                    "the only supported RSS hash"
732                                                    " function is Toeplitz");
733                                 return -rte_errno;
734                         }
735 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
736                         if (parser->rss_conf.level > 1) {
737                                 rte_flow_error_set(error, EINVAL,
738                                                    RTE_FLOW_ERROR_TYPE_ACTION,
739                                                    actions,
740                                                    "a nonzero RSS encapsulation"
741                                                    " level is not supported");
742                                 return -rte_errno;
743                         }
744 #endif
745                         if (parser->rss_conf.level > 2) {
746                                 rte_flow_error_set(error, EINVAL,
747                                                    RTE_FLOW_ERROR_TYPE_ACTION,
748                                                    actions,
749                                                    "RSS encapsulation level"
750                                                    " > 1 is not supported");
751                                 return -rte_errno;
752                         }
753                         if (rss->types & MLX5_RSS_HF_MASK) {
754                                 rte_flow_error_set(error, EINVAL,
755                                                    RTE_FLOW_ERROR_TYPE_ACTION,
756                                                    actions,
757                                                    "unsupported RSS type"
758                                                    " requested");
759                                 return -rte_errno;
760                         }
761                         if (rss->key_len) {
762                                 rss_key_len = rss->key_len;
763                                 rss_key = rss->key;
764                         } else {
765                                 rss_key_len = rss_hash_default_key_len;
766                                 rss_key = rss_hash_default_key;
767                         }
768                         if (rss_key_len != RTE_DIM(parser->rss_key)) {
769                                 rte_flow_error_set(error, EINVAL,
770                                                    RTE_FLOW_ERROR_TYPE_ACTION,
771                                                    actions,
772                                                    "RSS hash key must be"
773                                                    " exactly 40 bytes long");
774                                 return -rte_errno;
775                         }
776                         if (!rss->queue_num) {
777                                 rte_flow_error_set(error, EINVAL,
778                                                    RTE_FLOW_ERROR_TYPE_ACTION,
779                                                    actions,
780                                                    "no valid queues");
781                                 return -rte_errno;
782                         }
783                         if (rss->queue_num > RTE_DIM(parser->queues)) {
784                                 rte_flow_error_set(error, EINVAL,
785                                                    RTE_FLOW_ERROR_TYPE_ACTION,
786                                                    actions,
787                                                    "too many queues for RSS"
788                                                    " context");
789                                 return -rte_errno;
790                         }
791                         for (n = 0; n < rss->queue_num; ++n) {
792                                 if (rss->queue[n] >= priv->rxqs_n) {
793                                         rte_flow_error_set(error, EINVAL,
794                                                    RTE_FLOW_ERROR_TYPE_ACTION,
795                                                    actions,
796                                                    "queue id > number of"
797                                                    " queues");
798                                         return -rte_errno;
799                                 }
800                         }
801                         parser->rss_conf = (struct rte_flow_action_rss){
802                                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
803                                 .level = rss->level,
804                                 .types = rss->types,
805                                 .key_len = rss_key_len,
806                                 .queue_num = rss->queue_num,
807                                 .key = memcpy(parser->rss_key, rss_key,
808                                               sizeof(*rss_key) * rss_key_len),
809                                 .queue = memcpy(parser->queues, rss->queue,
810                                                 sizeof(*rss->queue) *
811                                                 rss->queue_num),
812                         };
813                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
814                         const struct rte_flow_action_mark *mark =
815                                 (const struct rte_flow_action_mark *)
816                                 actions->conf;
817
818                         if (overlap & MARK)
819                                 goto exit_action_overlap;
820                         overlap |= MARK;
821                         if (!mark) {
822                                 rte_flow_error_set(error, EINVAL,
823                                                    RTE_FLOW_ERROR_TYPE_ACTION,
824                                                    actions,
825                                                    "mark must be defined");
826                                 return -rte_errno;
827                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
828                                 rte_flow_error_set(error, ENOTSUP,
829                                                    RTE_FLOW_ERROR_TYPE_ACTION,
830                                                    actions,
831                                                    "mark must be between 0"
832                                                    " and 16777199");
833                                 return -rte_errno;
834                         }
835                         parser->mark = 1;
836                         parser->mark_id = mark->id;
837                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
838                         if (overlap & MARK)
839                                 goto exit_action_overlap;
840                         overlap |= MARK;
841                         parser->mark = 1;
842                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
843                            priv->config.flow_counter_en) {
844                         if (overlap & COUNT)
845                                 goto exit_action_overlap;
846                         overlap |= COUNT;
847                         parser->count = 1;
848                 } else {
849                         goto exit_action_not_supported;
850                 }
851         }
852         /* When fate is unknown, drop traffic. */
853         if (!(overlap & FATE))
854                 parser->drop = 1;
855         if (parser->drop && parser->mark)
856                 parser->mark = 0;
857         if (!parser->rss_conf.queue_num && !parser->drop) {
858                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
859                                    NULL, "no valid action");
860                 return -rte_errno;
861         }
862         return 0;
863 exit_action_not_supported:
864         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
865                            actions, "action not supported");
866         return -rte_errno;
867 exit_action_overlap:
868         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
869                            actions, "overlapping actions are not supported");
870         return -rte_errno;
871 }
872
873 /**
874  * Validate items.
875  *
876  * @param[in] items
877  *   Pattern specification (list terminated by the END pattern item).
878  * @param[out] error
879  *   Perform verbose error reporting if not NULL.
880  * @param[in, out] parser
881  *   Internal parser structure.
882  *
883  * @return
884  *   0 on success, a negative errno value otherwise and rte_errno is set.
885  */
886 static int
887 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
888                                  const struct rte_flow_item items[],
889                                  struct rte_flow_error *error,
890                                  struct mlx5_flow_parse *parser)
891 {
892         struct priv *priv = dev->data->dev_private;
893         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
894         unsigned int i;
895         int ret = 0;
896
897         /* Initialise the offsets to start after verbs attribute. */
898         for (i = 0; i != hash_rxq_init_n; ++i)
899                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
900         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
901                 const struct mlx5_flow_items *token = NULL;
902                 unsigned int n;
903
904                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
905                         continue;
906                 for (i = 0;
907                      cur_item->items &&
908                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
909                      ++i) {
910                         if (cur_item->items[i] == items->type) {
911                                 token = &mlx5_flow_items[items->type];
912                                 break;
913                         }
914                 }
915                 if (!token) {
916                         ret = -ENOTSUP;
917                         goto exit_item_not_supported;
918                 }
919                 cur_item = token;
920                 ret = mlx5_flow_item_validate(items,
921                                               (const uint8_t *)cur_item->mask,
922                                               cur_item->mask_sz);
923                 if (ret)
924                         goto exit_item_not_supported;
925                 if (IS_TUNNEL(items->type)) {
926                         if (parser->tunnel) {
927                                 rte_flow_error_set(error, ENOTSUP,
928                                                    RTE_FLOW_ERROR_TYPE_ITEM,
929                                                    items,
930                                                    "Cannot recognize multiple"
931                                                    " tunnel encapsulations.");
932                                 return -rte_errno;
933                         }
934                         if (!priv->config.tunnel_en &&
935                             parser->rss_conf.level > 1) {
936                                 rte_flow_error_set(error, ENOTSUP,
937                                         RTE_FLOW_ERROR_TYPE_ITEM,
938                                         items,
939                                         "RSS on tunnel is not supported");
940                                 return -rte_errno;
941                         }
942                         parser->inner = IBV_FLOW_SPEC_INNER;
943                         parser->tunnel = flow_ptype[items->type];
944                 }
945                 if (parser->drop) {
946                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
947                 } else {
948                         for (n = 0; n != hash_rxq_init_n; ++n)
949                                 parser->queue[n].offset += cur_item->dst_sz;
950                 }
951         }
952         if (parser->drop) {
953                 parser->queue[HASH_RXQ_ETH].offset +=
954                         sizeof(struct ibv_flow_spec_action_drop);
955         }
956         if (parser->mark) {
957                 for (i = 0; i != hash_rxq_init_n; ++i)
958                         parser->queue[i].offset +=
959                                 sizeof(struct ibv_flow_spec_action_tag);
960         }
961         if (parser->count) {
962                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
963
964                 for (i = 0; i != hash_rxq_init_n; ++i)
965                         parser->queue[i].offset += size;
966         }
967         return 0;
968 exit_item_not_supported:
969         return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
970                                   items, "item not supported");
971 }
972
973 /**
974  * Allocate memory space to store verbs flow attributes.
975  *
976  * @param[in] size
977  *   Amount of byte to allocate.
978  * @param[out] error
979  *   Perform verbose error reporting if not NULL.
980  *
981  * @return
982  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
983  */
984 static struct ibv_flow_attr *
985 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
986 {
987         struct ibv_flow_attr *ibv_attr;
988
989         ibv_attr = rte_calloc(__func__, 1, size, 0);
990         if (!ibv_attr) {
991                 rte_flow_error_set(error, ENOMEM,
992                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
993                                    NULL,
994                                    "cannot allocate verbs spec attributes");
995                 return NULL;
996         }
997         return ibv_attr;
998 }
999
1000 /**
1001  * Make inner packet matching with an higher priority from the non Inner
1002  * matching.
1003  *
1004  * @param dev
1005  *   Pointer to Ethernet device.
1006  * @param[in, out] parser
1007  *   Internal parser structure.
1008  * @param attr
1009  *   User flow attribute.
1010  */
1011 static void
1012 mlx5_flow_update_priority(struct rte_eth_dev *dev,
1013                           struct mlx5_flow_parse *parser,
1014                           const struct rte_flow_attr *attr)
1015 {
1016         struct priv *priv = dev->data->dev_private;
1017         unsigned int i;
1018         uint16_t priority;
1019
1020         /*                      8 priorities    >= 16 priorities
1021          * Control flow:        4-7             8-15
1022          * User normal flow:    1-3             4-7
1023          * User tunnel flow:    0-2             0-3
1024          */
1025         priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
1026         if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1027                 priority /= 2;
1028         /*
1029          * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1030          * priorities, lower 4 otherwise.
1031          */
1032         if (!parser->inner) {
1033                 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1034                         priority += 1;
1035                 else
1036                         priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1037         }
1038         if (parser->drop) {
1039                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1040                                 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1041                 return;
1042         }
1043         for (i = 0; i != hash_rxq_init_n; ++i) {
1044                 if (!parser->queue[i].ibv_attr)
1045                         continue;
1046                 parser->queue[i].ibv_attr->priority = priority +
1047                                 hash_rxq_init[i].flow_priority;
1048         }
1049 }
1050
1051 /**
1052  * Finalise verbs flow attributes.
1053  *
1054  * @param[in, out] parser
1055  *   Internal parser structure.
1056  */
1057 static void
1058 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1059 {
1060         unsigned int i;
1061         uint32_t inner = parser->inner;
1062
1063         /* Don't create extra flows for outer RSS. */
1064         if (parser->tunnel && parser->rss_conf.level < 2)
1065                 return;
1066         /*
1067          * Fill missing layers in verbs specifications, or compute the correct
1068          * offset to allocate the memory space for the attributes and
1069          * specifications.
1070          */
1071         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1072                 union {
1073                         struct ibv_flow_spec_ipv4_ext ipv4;
1074                         struct ibv_flow_spec_ipv6 ipv6;
1075                         struct ibv_flow_spec_tcp_udp udp_tcp;
1076                         struct ibv_flow_spec_eth eth;
1077                 } specs;
1078                 void *dst;
1079                 uint16_t size;
1080
1081                 if (i == parser->layer)
1082                         continue;
1083                 if (parser->layer == HASH_RXQ_ETH ||
1084                     parser->layer == HASH_RXQ_TUNNEL) {
1085                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1086                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1087                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1088                                         .type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1089                                         .size = size,
1090                                 };
1091                         } else {
1092                                 size = sizeof(struct ibv_flow_spec_ipv6);
1093                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1094                                         .type = inner | IBV_FLOW_SPEC_IPV6,
1095                                         .size = size,
1096                                 };
1097                         }
1098                         if (parser->queue[i].ibv_attr) {
1099                                 dst = (void *)((uintptr_t)
1100                                                parser->queue[i].ibv_attr +
1101                                                parser->queue[i].offset);
1102                                 memcpy(dst, &specs, size);
1103                                 ++parser->queue[i].ibv_attr->num_of_specs;
1104                         }
1105                         parser->queue[i].offset += size;
1106                 }
1107                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1108                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1109                         size = sizeof(struct ibv_flow_spec_tcp_udp);
1110                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1111                                 .type = inner | ((i == HASH_RXQ_UDPV4 ||
1112                                           i == HASH_RXQ_UDPV6) ?
1113                                          IBV_FLOW_SPEC_UDP :
1114                                          IBV_FLOW_SPEC_TCP),
1115                                 .size = size,
1116                         };
1117                         if (parser->queue[i].ibv_attr) {
1118                                 dst = (void *)((uintptr_t)
1119                                                parser->queue[i].ibv_attr +
1120                                                parser->queue[i].offset);
1121                                 memcpy(dst, &specs, size);
1122                                 ++parser->queue[i].ibv_attr->num_of_specs;
1123                         }
1124                         parser->queue[i].offset += size;
1125                 }
1126         }
1127 }
1128
1129 /**
1130  * Update flows according to pattern and RSS hash fields.
1131  *
1132  * @param[in, out] parser
1133  *   Internal parser structure.
1134  *
1135  * @return
1136  *   0 on success, a negative errno value otherwise and rte_errno is set.
1137  */
1138 static int
1139 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1140 {
1141         unsigned int i;
1142         enum hash_rxq_type start;
1143         enum hash_rxq_type layer;
1144         int outer = parser->tunnel && parser->rss_conf.level < 2;
1145         uint64_t rss = parser->rss_conf.types;
1146
1147         /* Default to outer RSS. */
1148         if (!parser->rss_conf.level)
1149                 parser->rss_conf.level = 1;
1150         layer = outer ? parser->out_layer : parser->layer;
1151         if (layer == HASH_RXQ_TUNNEL)
1152                 layer = HASH_RXQ_ETH;
1153         if (outer) {
1154                 /* Only one hash type for outer RSS. */
1155                 if (rss && layer == HASH_RXQ_ETH) {
1156                         start = HASH_RXQ_TCPV4;
1157                 } else if (rss && layer != HASH_RXQ_ETH &&
1158                            !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1159                         /* If RSS not match L4 pattern, try L3 RSS. */
1160                         if (layer < HASH_RXQ_IPV4)
1161                                 layer = HASH_RXQ_IPV4;
1162                         else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1163                                 layer = HASH_RXQ_IPV6;
1164                         start = layer;
1165                 } else {
1166                         start = layer;
1167                 }
1168                 /* Scan first valid hash type. */
1169                 for (i = start; rss && i <= layer; ++i) {
1170                         if (!parser->queue[i].ibv_attr)
1171                                 continue;
1172                         if (hash_rxq_init[i].dpdk_rss_hf & rss)
1173                                 break;
1174                 }
1175                 if (rss && i <= layer)
1176                         parser->queue[layer].hash_fields =
1177                                         hash_rxq_init[i].hash_fields;
1178                 /* Trim unused hash types. */
1179                 for (i = 0; i != hash_rxq_init_n; ++i) {
1180                         if (parser->queue[i].ibv_attr && i != layer) {
1181                                 rte_free(parser->queue[i].ibv_attr);
1182                                 parser->queue[i].ibv_attr = NULL;
1183                         }
1184                 }
1185         } else {
1186                 /* Expand for inner or normal RSS. */
1187                 if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1188                         start = HASH_RXQ_TCPV4;
1189                 else if (rss && layer == HASH_RXQ_IPV6)
1190                         start = HASH_RXQ_TCPV6;
1191                 else
1192                         start = layer;
1193                 /* For L4 pattern, try L3 RSS if no L4 RSS. */
1194                 /* Trim unused hash types. */
1195                 for (i = 0; i != hash_rxq_init_n; ++i) {
1196                         if (!parser->queue[i].ibv_attr)
1197                                 continue;
1198                         if (i < start || i > layer) {
1199                                 rte_free(parser->queue[i].ibv_attr);
1200                                 parser->queue[i].ibv_attr = NULL;
1201                                 continue;
1202                         }
1203                         if (!rss)
1204                                 continue;
1205                         if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1206                                 parser->queue[i].hash_fields =
1207                                                 hash_rxq_init[i].hash_fields;
1208                         } else if (i != layer) {
1209                                 /* Remove unused RSS expansion. */
1210                                 rte_free(parser->queue[i].ibv_attr);
1211                                 parser->queue[i].ibv_attr = NULL;
1212                         } else if (layer < HASH_RXQ_IPV4 &&
1213                                    (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1214                                     rss)) {
1215                                 /* Allow IPv4 RSS on L4 pattern. */
1216                                 parser->queue[i].hash_fields =
1217                                         hash_rxq_init[HASH_RXQ_IPV4]
1218                                                 .hash_fields;
1219                         } else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1220                                    (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1221                                     rss)) {
1222                                 /* Allow IPv4 RSS on L4 pattern. */
1223                                 parser->queue[i].hash_fields =
1224                                         hash_rxq_init[HASH_RXQ_IPV6]
1225                                                 .hash_fields;
1226                         }
1227                 }
1228         }
1229         return 0;
1230 }
1231
1232 /**
1233  * Validate and convert a flow supported by the NIC.
1234  *
1235  * @param dev
1236  *   Pointer to Ethernet device.
1237  * @param[in] attr
1238  *   Flow rule attributes.
1239  * @param[in] pattern
1240  *   Pattern specification (list terminated by the END pattern item).
1241  * @param[in] actions
1242  *   Associated actions (list terminated by the END action).
1243  * @param[out] error
1244  *   Perform verbose error reporting if not NULL.
1245  * @param[in, out] parser
1246  *   Internal parser structure.
1247  *
1248  * @return
1249  *   0 on success, a negative errno value otherwise and rte_errno is set.
1250  */
1251 static int
1252 mlx5_flow_convert(struct rte_eth_dev *dev,
1253                   const struct rte_flow_attr *attr,
1254                   const struct rte_flow_item items[],
1255                   const struct rte_flow_action actions[],
1256                   struct rte_flow_error *error,
1257                   struct mlx5_flow_parse *parser)
1258 {
1259         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1260         unsigned int i;
1261         int ret;
1262
1263         /* First step. Validate the attributes, items and actions. */
1264         *parser = (struct mlx5_flow_parse){
1265                 .create = parser->create,
1266                 .layer = HASH_RXQ_ETH,
1267                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1268         };
1269         ret = mlx5_flow_convert_attributes(attr, error);
1270         if (ret)
1271                 return ret;
1272         ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1273         if (ret)
1274                 return ret;
1275         ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1276         if (ret)
1277                 return ret;
1278         mlx5_flow_convert_finalise(parser);
1279         /*
1280          * Second step.
1281          * Allocate the memory space to store verbs specifications.
1282          */
1283         if (parser->drop) {
1284                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1285
1286                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1287                         mlx5_flow_convert_allocate(offset, error);
1288                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1289                         goto exit_enomem;
1290                 parser->queue[HASH_RXQ_ETH].offset =
1291                         sizeof(struct ibv_flow_attr);
1292         } else {
1293                 for (i = 0; i != hash_rxq_init_n; ++i) {
1294                         unsigned int offset;
1295
1296                         offset = parser->queue[i].offset;
1297                         parser->queue[i].ibv_attr =
1298                                 mlx5_flow_convert_allocate(offset, error);
1299                         if (!parser->queue[i].ibv_attr)
1300                                 goto exit_enomem;
1301                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1302                 }
1303         }
1304         /* Third step. Conversion parse, fill the specifications. */
1305         parser->inner = 0;
1306         parser->tunnel = 0;
1307         parser->layer = HASH_RXQ_ETH;
1308         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1309                 struct mlx5_flow_data data = {
1310                         .dev = dev,
1311                         .parser = parser,
1312                         .error = error,
1313                 };
1314
1315                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1316                         continue;
1317                 cur_item = &mlx5_flow_items[items->type];
1318                 ret = cur_item->convert(items,
1319                                         (cur_item->default_mask ?
1320                                          cur_item->default_mask :
1321                                          cur_item->mask),
1322                                          &data);
1323                 if (ret)
1324                         goto exit_free;
1325         }
1326         if (!parser->drop) {
1327                 /* RSS check, remove unused hash types. */
1328                 ret = mlx5_flow_convert_rss(parser);
1329                 if (ret)
1330                         goto exit_free;
1331                 /* Complete missing specification. */
1332                 mlx5_flow_convert_finalise(parser);
1333         }
1334         mlx5_flow_update_priority(dev, parser, attr);
1335         if (parser->mark)
1336                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1337         if (parser->count && parser->create) {
1338                 mlx5_flow_create_count(dev, parser);
1339                 if (!parser->cs)
1340                         goto exit_count_error;
1341         }
1342 exit_free:
1343         /* Only verification is expected, all resources should be released. */
1344         if (!parser->create) {
1345                 for (i = 0; i != hash_rxq_init_n; ++i) {
1346                         if (parser->queue[i].ibv_attr) {
1347                                 rte_free(parser->queue[i].ibv_attr);
1348                                 parser->queue[i].ibv_attr = NULL;
1349                         }
1350                 }
1351         }
1352         return ret;
1353 exit_enomem:
1354         for (i = 0; i != hash_rxq_init_n; ++i) {
1355                 if (parser->queue[i].ibv_attr) {
1356                         rte_free(parser->queue[i].ibv_attr);
1357                         parser->queue[i].ibv_attr = NULL;
1358                 }
1359         }
1360         rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1361                            NULL, "cannot allocate verbs spec attributes");
1362         return -rte_errno;
1363 exit_count_error:
1364         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1365                            NULL, "cannot create counter");
1366         return -rte_errno;
1367 }
1368
1369 /**
1370  * Copy the specification created into the flow.
1371  *
1372  * @param parser
1373  *   Internal parser structure.
1374  * @param src
1375  *   Create specification.
1376  * @param size
1377  *   Size in bytes of the specification to copy.
1378  */
1379 static void
1380 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1381                       unsigned int size)
1382 {
1383         unsigned int i;
1384         void *dst;
1385
1386         for (i = 0; i != hash_rxq_init_n; ++i) {
1387                 if (!parser->queue[i].ibv_attr)
1388                         continue;
1389                 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1390                                 parser->queue[i].offset);
1391                 memcpy(dst, src, size);
1392                 ++parser->queue[i].ibv_attr->num_of_specs;
1393                 parser->queue[i].offset += size;
1394         }
1395 }
1396
1397 /**
1398  * Convert Ethernet item to Verbs specification.
1399  *
1400  * @param item[in]
1401  *   Item specification.
1402  * @param default_mask[in]
1403  *   Default bit-masks to use when item->mask is not provided.
1404  * @param data[in, out]
1405  *   User structure.
1406  *
1407  * @return
1408  *   0 on success, a negative errno value otherwise and rte_errno is set.
1409  */
1410 static int
1411 mlx5_flow_create_eth(const struct rte_flow_item *item,
1412                      const void *default_mask,
1413                      struct mlx5_flow_data *data)
1414 {
1415         const struct rte_flow_item_eth *spec = item->spec;
1416         const struct rte_flow_item_eth *mask = item->mask;
1417         struct mlx5_flow_parse *parser = data->parser;
1418         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1419         struct ibv_flow_spec_eth eth = {
1420                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1421                 .size = eth_size,
1422         };
1423
1424         parser->layer = HASH_RXQ_ETH;
1425         if (spec) {
1426                 unsigned int i;
1427
1428                 if (!mask)
1429                         mask = default_mask;
1430                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1431                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1432                 eth.val.ether_type = spec->type;
1433                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1434                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1435                 eth.mask.ether_type = mask->type;
1436                 /* Remove unwanted bits from values. */
1437                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1438                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1439                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1440                 }
1441                 eth.val.ether_type &= eth.mask.ether_type;
1442         }
1443         mlx5_flow_create_copy(parser, &eth, eth_size);
1444         return 0;
1445 }
1446
1447 /**
1448  * Convert VLAN item to Verbs specification.
1449  *
1450  * @param item[in]
1451  *   Item specification.
1452  * @param default_mask[in]
1453  *   Default bit-masks to use when item->mask is not provided.
1454  * @param data[in, out]
1455  *   User structure.
1456  *
1457  * @return
1458  *   0 on success, a negative errno value otherwise and rte_errno is set.
1459  */
1460 static int
1461 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1462                       const void *default_mask,
1463                       struct mlx5_flow_data *data)
1464 {
1465         const struct rte_flow_item_vlan *spec = item->spec;
1466         const struct rte_flow_item_vlan *mask = item->mask;
1467         struct mlx5_flow_parse *parser = data->parser;
1468         struct ibv_flow_spec_eth *eth;
1469         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1470         const char *msg = "VLAN cannot be empty";
1471
1472         if (spec) {
1473                 unsigned int i;
1474                 if (!mask)
1475                         mask = default_mask;
1476
1477                 for (i = 0; i != hash_rxq_init_n; ++i) {
1478                         if (!parser->queue[i].ibv_attr)
1479                                 continue;
1480
1481                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1482                                        parser->queue[i].offset - eth_size);
1483                         eth->val.vlan_tag = spec->tci;
1484                         eth->mask.vlan_tag = mask->tci;
1485                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1486                         /*
1487                          * From verbs perspective an empty VLAN is equivalent
1488                          * to a packet without VLAN layer.
1489                          */
1490                         if (!eth->mask.vlan_tag)
1491                                 goto error;
1492                         /* Outer TPID cannot be matched. */
1493                         if (eth->mask.ether_type) {
1494                                 msg = "VLAN TPID matching is not supported";
1495                                 goto error;
1496                         }
1497                         eth->val.ether_type = spec->inner_type;
1498                         eth->mask.ether_type = mask->inner_type;
1499                         eth->val.ether_type &= eth->mask.ether_type;
1500                 }
1501                 return 0;
1502         }
1503 error:
1504         return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1505                                   item, msg);
1506 }
1507
1508 /**
1509  * Convert IPv4 item to Verbs specification.
1510  *
1511  * @param item[in]
1512  *   Item specification.
1513  * @param default_mask[in]
1514  *   Default bit-masks to use when item->mask is not provided.
1515  * @param data[in, out]
1516  *   User structure.
1517  *
1518  * @return
1519  *   0 on success, a negative errno value otherwise and rte_errno is set.
1520  */
1521 static int
1522 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1523                       const void *default_mask,
1524                       struct mlx5_flow_data *data)
1525 {
1526         struct priv *priv = data->dev->data->dev_private;
1527         const struct rte_flow_item_ipv4 *spec = item->spec;
1528         const struct rte_flow_item_ipv4 *mask = item->mask;
1529         struct mlx5_flow_parse *parser = data->parser;
1530         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1531         struct ibv_flow_spec_ipv4_ext ipv4 = {
1532                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1533                 .size = ipv4_size,
1534         };
1535
1536         if (parser->layer == HASH_RXQ_TUNNEL &&
1537             parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1538             !priv->config.l3_vxlan_en)
1539                 return rte_flow_error_set(data->error, EINVAL,
1540                                           RTE_FLOW_ERROR_TYPE_ITEM,
1541                                           item,
1542                                           "L3 VXLAN not enabled by device"
1543                                           " parameter and/or not configured"
1544                                           " in firmware");
1545         parser->layer = HASH_RXQ_IPV4;
1546         if (spec) {
1547                 if (!mask)
1548                         mask = default_mask;
1549                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1550                         .src_ip = spec->hdr.src_addr,
1551                         .dst_ip = spec->hdr.dst_addr,
1552                         .proto = spec->hdr.next_proto_id,
1553                         .tos = spec->hdr.type_of_service,
1554                 };
1555                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1556                         .src_ip = mask->hdr.src_addr,
1557                         .dst_ip = mask->hdr.dst_addr,
1558                         .proto = mask->hdr.next_proto_id,
1559                         .tos = mask->hdr.type_of_service,
1560                 };
1561                 /* Remove unwanted bits from values. */
1562                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1563                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1564                 ipv4.val.proto &= ipv4.mask.proto;
1565                 ipv4.val.tos &= ipv4.mask.tos;
1566         }
1567         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1568         return 0;
1569 }
1570
1571 /**
1572  * Convert IPv6 item to Verbs specification.
1573  *
1574  * @param item[in]
1575  *   Item specification.
1576  * @param default_mask[in]
1577  *   Default bit-masks to use when item->mask is not provided.
1578  * @param data[in, out]
1579  *   User structure.
1580  *
1581  * @return
1582  *   0 on success, a negative errno value otherwise and rte_errno is set.
1583  */
1584 static int
1585 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1586                       const void *default_mask,
1587                       struct mlx5_flow_data *data)
1588 {
1589         struct priv *priv = data->dev->data->dev_private;
1590         const struct rte_flow_item_ipv6 *spec = item->spec;
1591         const struct rte_flow_item_ipv6 *mask = item->mask;
1592         struct mlx5_flow_parse *parser = data->parser;
1593         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1594         struct ibv_flow_spec_ipv6 ipv6 = {
1595                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1596                 .size = ipv6_size,
1597         };
1598
1599         if (parser->layer == HASH_RXQ_TUNNEL &&
1600             parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1601             !priv->config.l3_vxlan_en)
1602                 return rte_flow_error_set(data->error, EINVAL,
1603                                           RTE_FLOW_ERROR_TYPE_ITEM,
1604                                           item,
1605                                           "L3 VXLAN not enabled by device"
1606                                           " parameter and/or not configured"
1607                                           " in firmware");
1608         parser->layer = HASH_RXQ_IPV6;
1609         if (spec) {
1610                 unsigned int i;
1611                 uint32_t vtc_flow_val;
1612                 uint32_t vtc_flow_mask;
1613
1614                 if (!mask)
1615                         mask = default_mask;
1616                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1617                        RTE_DIM(ipv6.val.src_ip));
1618                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1619                        RTE_DIM(ipv6.val.dst_ip));
1620                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1621                        RTE_DIM(ipv6.mask.src_ip));
1622                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1623                        RTE_DIM(ipv6.mask.dst_ip));
1624                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1625                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1626                 ipv6.val.flow_label =
1627                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1628                                          IPV6_HDR_FL_SHIFT);
1629                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1630                                          IPV6_HDR_TC_SHIFT;
1631                 ipv6.val.next_hdr = spec->hdr.proto;
1632                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1633                 ipv6.mask.flow_label =
1634                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1635                                          IPV6_HDR_FL_SHIFT);
1636                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1637                                           IPV6_HDR_TC_SHIFT;
1638                 ipv6.mask.next_hdr = mask->hdr.proto;
1639                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1640                 /* Remove unwanted bits from values. */
1641                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1642                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1643                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1644                 }
1645                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1646                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1647                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1648                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1649         }
1650         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1651         return 0;
1652 }
1653
1654 /**
1655  * Convert UDP item to Verbs specification.
1656  *
1657  * @param item[in]
1658  *   Item specification.
1659  * @param default_mask[in]
1660  *   Default bit-masks to use when item->mask is not provided.
1661  * @param data[in, out]
1662  *   User structure.
1663  *
1664  * @return
1665  *   0 on success, a negative errno value otherwise and rte_errno is set.
1666  */
1667 static int
1668 mlx5_flow_create_udp(const struct rte_flow_item *item,
1669                      const void *default_mask,
1670                      struct mlx5_flow_data *data)
1671 {
1672         const struct rte_flow_item_udp *spec = item->spec;
1673         const struct rte_flow_item_udp *mask = item->mask;
1674         struct mlx5_flow_parse *parser = data->parser;
1675         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1676         struct ibv_flow_spec_tcp_udp udp = {
1677                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1678                 .size = udp_size,
1679         };
1680
1681         if (parser->layer == HASH_RXQ_IPV4)
1682                 parser->layer = HASH_RXQ_UDPV4;
1683         else
1684                 parser->layer = HASH_RXQ_UDPV6;
1685         if (spec) {
1686                 if (!mask)
1687                         mask = default_mask;
1688                 udp.val.dst_port = spec->hdr.dst_port;
1689                 udp.val.src_port = spec->hdr.src_port;
1690                 udp.mask.dst_port = mask->hdr.dst_port;
1691                 udp.mask.src_port = mask->hdr.src_port;
1692                 /* Remove unwanted bits from values. */
1693                 udp.val.src_port &= udp.mask.src_port;
1694                 udp.val.dst_port &= udp.mask.dst_port;
1695         }
1696         mlx5_flow_create_copy(parser, &udp, udp_size);
1697         return 0;
1698 }
1699
1700 /**
1701  * Convert TCP item to Verbs specification.
1702  *
1703  * @param item[in]
1704  *   Item specification.
1705  * @param default_mask[in]
1706  *   Default bit-masks to use when item->mask is not provided.
1707  * @param data[in, out]
1708  *   User structure.
1709  *
1710  * @return
1711  *   0 on success, a negative errno value otherwise and rte_errno is set.
1712  */
1713 static int
1714 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1715                      const void *default_mask,
1716                      struct mlx5_flow_data *data)
1717 {
1718         const struct rte_flow_item_tcp *spec = item->spec;
1719         const struct rte_flow_item_tcp *mask = item->mask;
1720         struct mlx5_flow_parse *parser = data->parser;
1721         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1722         struct ibv_flow_spec_tcp_udp tcp = {
1723                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1724                 .size = tcp_size,
1725         };
1726
1727         if (parser->layer == HASH_RXQ_IPV4)
1728                 parser->layer = HASH_RXQ_TCPV4;
1729         else
1730                 parser->layer = HASH_RXQ_TCPV6;
1731         if (spec) {
1732                 if (!mask)
1733                         mask = default_mask;
1734                 tcp.val.dst_port = spec->hdr.dst_port;
1735                 tcp.val.src_port = spec->hdr.src_port;
1736                 tcp.mask.dst_port = mask->hdr.dst_port;
1737                 tcp.mask.src_port = mask->hdr.src_port;
1738                 /* Remove unwanted bits from values. */
1739                 tcp.val.src_port &= tcp.mask.src_port;
1740                 tcp.val.dst_port &= tcp.mask.dst_port;
1741         }
1742         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1743         return 0;
1744 }
1745
1746 /**
1747  * Convert VXLAN item to Verbs specification.
1748  *
1749  * @param item[in]
1750  *   Item specification.
1751  * @param default_mask[in]
1752  *   Default bit-masks to use when item->mask is not provided.
1753  * @param data[in, out]
1754  *   User structure.
1755  *
1756  * @return
1757  *   0 on success, a negative errno value otherwise and rte_errno is set.
1758  */
1759 static int
1760 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1761                        const void *default_mask,
1762                        struct mlx5_flow_data *data)
1763 {
1764         const struct rte_flow_item_vxlan *spec = item->spec;
1765         const struct rte_flow_item_vxlan *mask = item->mask;
1766         struct mlx5_flow_parse *parser = data->parser;
1767         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1768         struct ibv_flow_spec_tunnel vxlan = {
1769                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1770                 .size = size,
1771         };
1772         union vni {
1773                 uint32_t vlan_id;
1774                 uint8_t vni[4];
1775         } id;
1776
1777         id.vni[0] = 0;
1778         parser->inner = IBV_FLOW_SPEC_INNER;
1779         parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1780         parser->out_layer = parser->layer;
1781         parser->layer = HASH_RXQ_TUNNEL;
1782         /* Default VXLAN to outer RSS. */
1783         if (!parser->rss_conf.level)
1784                 parser->rss_conf.level = 1;
1785         if (spec) {
1786                 if (!mask)
1787                         mask = default_mask;
1788                 memcpy(&id.vni[1], spec->vni, 3);
1789                 vxlan.val.tunnel_id = id.vlan_id;
1790                 memcpy(&id.vni[1], mask->vni, 3);
1791                 vxlan.mask.tunnel_id = id.vlan_id;
1792                 /* Remove unwanted bits from values. */
1793                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1794         }
1795         /*
1796          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1797          * layer is defined in the Verbs specification it is interpreted as
1798          * wildcard and all packets will match this rule, if it follows a full
1799          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1800          * before will also match this rule.
1801          * To avoid such situation, VNI 0 is currently refused.
1802          */
1803         if (!vxlan.val.tunnel_id)
1804                 return rte_flow_error_set(data->error, EINVAL,
1805                                           RTE_FLOW_ERROR_TYPE_ITEM,
1806                                           item,
1807                                           "VxLAN vni cannot be 0");
1808         mlx5_flow_create_copy(parser, &vxlan, size);
1809         return 0;
1810 }
1811
1812 /**
1813  * Convert VXLAN-GPE item to Verbs specification.
1814  *
1815  * @param item[in]
1816  *   Item specification.
1817  * @param default_mask[in]
1818  *   Default bit-masks to use when item->mask is not provided.
1819  * @param data[in, out]
1820  *   User structure.
1821  *
1822  * @return
1823  *   0 on success, a negative errno value otherwise and rte_errno is set.
1824  */
1825 static int
1826 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
1827                            const void *default_mask,
1828                            struct mlx5_flow_data *data)
1829 {
1830         struct priv *priv = data->dev->data->dev_private;
1831         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1832         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1833         struct mlx5_flow_parse *parser = data->parser;
1834         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1835         struct ibv_flow_spec_tunnel vxlan = {
1836                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1837                 .size = size,
1838         };
1839         union vni {
1840                 uint32_t vlan_id;
1841                 uint8_t vni[4];
1842         } id;
1843
1844         if (!priv->config.l3_vxlan_en)
1845                 return rte_flow_error_set(data->error, EINVAL,
1846                                           RTE_FLOW_ERROR_TYPE_ITEM,
1847                                           item,
1848                                           "L3 VXLAN not enabled by device"
1849                                           " parameter and/or not configured"
1850                                           " in firmware");
1851         id.vni[0] = 0;
1852         parser->inner = IBV_FLOW_SPEC_INNER;
1853         parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)];
1854         parser->out_layer = parser->layer;
1855         parser->layer = HASH_RXQ_TUNNEL;
1856         /* Default VXLAN-GPE to outer RSS. */
1857         if (!parser->rss_conf.level)
1858                 parser->rss_conf.level = 1;
1859         if (spec) {
1860                 if (!mask)
1861                         mask = default_mask;
1862                 memcpy(&id.vni[1], spec->vni, 3);
1863                 vxlan.val.tunnel_id = id.vlan_id;
1864                 memcpy(&id.vni[1], mask->vni, 3);
1865                 vxlan.mask.tunnel_id = id.vlan_id;
1866                 if (spec->protocol)
1867                         return rte_flow_error_set(data->error, EINVAL,
1868                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1869                                                   item,
1870                                                   "VxLAN-GPE protocol not"
1871                                                   " supported");
1872                 /* Remove unwanted bits from values. */
1873                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1874         }
1875         /*
1876          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1877          * layer is defined in the Verbs specification it is interpreted as
1878          * wildcard and all packets will match this rule, if it follows a full
1879          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1880          * before will also match this rule.
1881          * To avoid such situation, VNI 0 is currently refused.
1882          */
1883         /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1884         if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1885                 return rte_flow_error_set(data->error, EINVAL,
1886                                           RTE_FLOW_ERROR_TYPE_ITEM,
1887                                           item,
1888                                           "VxLAN-GPE vni cannot be 0");
1889         mlx5_flow_create_copy(parser, &vxlan, size);
1890         return 0;
1891 }
1892
1893 /**
1894  * Convert GRE item to Verbs specification.
1895  *
1896  * @param item[in]
1897  *   Item specification.
1898  * @param default_mask[in]
1899  *   Default bit-masks to use when item->mask is not provided.
1900  * @param data[in, out]
1901  *   User structure.
1902  *
1903  * @return
1904  *   0 on success, a negative errno value otherwise and rte_errno is set.
1905  */
1906 static int
1907 mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
1908                      const void *default_mask __rte_unused,
1909                      struct mlx5_flow_data *data)
1910 {
1911         struct mlx5_flow_parse *parser = data->parser;
1912         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1913         struct ibv_flow_spec_tunnel tunnel = {
1914                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1915                 .size = size,
1916         };
1917         struct ibv_flow_spec_ipv4_ext *ipv4;
1918         struct ibv_flow_spec_ipv6 *ipv6;
1919         unsigned int i;
1920
1921         parser->inner = IBV_FLOW_SPEC_INNER;
1922         parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1923         parser->out_layer = parser->layer;
1924         parser->layer = HASH_RXQ_TUNNEL;
1925         /* Default GRE to inner RSS. */
1926         if (!parser->rss_conf.level)
1927                 parser->rss_conf.level = 2;
1928         /* Update encapsulation IP layer protocol. */
1929         for (i = 0; i != hash_rxq_init_n; ++i) {
1930                 if (!parser->queue[i].ibv_attr)
1931                         continue;
1932                 if (parser->out_layer == HASH_RXQ_IPV4) {
1933                         ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1934                                 parser->queue[i].offset -
1935                                 sizeof(struct ibv_flow_spec_ipv4_ext));
1936                         if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1937                                 break;
1938                         ipv4->val.proto = MLX5_GRE;
1939                         ipv4->mask.proto = 0xff;
1940                 } else if (parser->out_layer == HASH_RXQ_IPV6) {
1941                         ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1942                                 parser->queue[i].offset -
1943                                 sizeof(struct ibv_flow_spec_ipv6));
1944                         if (ipv6->mask.next_hdr &&
1945                             ipv6->val.next_hdr != MLX5_GRE)
1946                                 break;
1947                         ipv6->val.next_hdr = MLX5_GRE;
1948                         ipv6->mask.next_hdr = 0xff;
1949                 }
1950         }
1951         if (i != hash_rxq_init_n)
1952                 return rte_flow_error_set(data->error, EINVAL,
1953                                           RTE_FLOW_ERROR_TYPE_ITEM,
1954                                           item,
1955                                           "IP protocol of GRE must be 47");
1956         mlx5_flow_create_copy(parser, &tunnel, size);
1957         return 0;
1958 }
1959
1960 /**
1961  * Convert mark/flag action to Verbs specification.
1962  *
1963  * @param parser
1964  *   Internal parser structure.
1965  * @param mark_id
1966  *   Mark identifier.
1967  *
1968  * @return
1969  *   0 on success, a negative errno value otherwise and rte_errno is set.
1970  */
1971 static int
1972 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1973 {
1974         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1975         struct ibv_flow_spec_action_tag tag = {
1976                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1977                 .size = size,
1978                 .tag_id = mlx5_flow_mark_set(mark_id),
1979         };
1980
1981         assert(parser->mark);
1982         mlx5_flow_create_copy(parser, &tag, size);
1983         return 0;
1984 }
1985
1986 /**
1987  * Convert count action to Verbs specification.
1988  *
1989  * @param dev
1990  *   Pointer to Ethernet device.
1991  * @param parser
1992  *   Pointer to MLX5 flow parser structure.
1993  *
1994  * @return
1995  *   0 on success, a negative errno value otherwise and rte_errno is set.
1996  */
1997 static int
1998 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1999                        struct mlx5_flow_parse *parser __rte_unused)
2000 {
2001 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2002         struct priv *priv = dev->data->dev_private;
2003         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
2004         struct ibv_counter_set_init_attr init_attr = {0};
2005         struct ibv_flow_spec_counter_action counter = {
2006                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
2007                 .size = size,
2008                 .counter_set_handle = 0,
2009         };
2010
2011         init_attr.counter_set_id = 0;
2012         parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
2013         if (!parser->cs) {
2014                 rte_errno = EINVAL;
2015                 return -rte_errno;
2016         }
2017         counter.counter_set_handle = parser->cs->handle;
2018         mlx5_flow_create_copy(parser, &counter, size);
2019 #endif
2020         return 0;
2021 }
2022
2023 /**
2024  * Complete flow rule creation with a drop queue.
2025  *
2026  * @param dev
2027  *   Pointer to Ethernet device.
2028  * @param parser
2029  *   Internal parser structure.
2030  * @param flow
2031  *   Pointer to the rte_flow.
2032  * @param[out] error
2033  *   Perform verbose error reporting if not NULL.
2034  *
2035  * @return
2036  *   0 on success, a negative errno value otherwise and rte_errno is set.
2037  */
2038 static int
2039 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
2040                                    struct mlx5_flow_parse *parser,
2041                                    struct rte_flow *flow,
2042                                    struct rte_flow_error *error)
2043 {
2044         struct priv *priv = dev->data->dev_private;
2045         struct ibv_flow_spec_action_drop *drop;
2046         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
2047
2048         assert(priv->pd);
2049         assert(priv->ctx);
2050         flow->drop = 1;
2051         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
2052                         parser->queue[HASH_RXQ_ETH].offset);
2053         *drop = (struct ibv_flow_spec_action_drop){
2054                         .type = IBV_FLOW_SPEC_ACTION_DROP,
2055                         .size = size,
2056         };
2057         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
2058         parser->queue[HASH_RXQ_ETH].offset += size;
2059         flow->frxq[HASH_RXQ_ETH].ibv_attr =
2060                 parser->queue[HASH_RXQ_ETH].ibv_attr;
2061         if (parser->count)
2062                 flow->cs = parser->cs;
2063         if (!priv->dev->data->dev_started)
2064                 return 0;
2065         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
2066         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2067                 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
2068                                        flow->frxq[HASH_RXQ_ETH].ibv_attr);
2069         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2070                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
2071                                    NULL, "flow rule creation failure");
2072                 goto error;
2073         }
2074         return 0;
2075 error:
2076         assert(flow);
2077         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2078                 claim_zero(mlx5_glue->destroy_flow
2079                            (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2080                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2081         }
2082         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
2083                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2084                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
2085         }
2086         if (flow->cs) {
2087                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2088                 flow->cs = NULL;
2089                 parser->cs = NULL;
2090         }
2091         return -rte_errno;
2092 }
2093
2094 /**
2095  * Create hash Rx queues when RSS is enabled.
2096  *
2097  * @param dev
2098  *   Pointer to Ethernet device.
2099  * @param parser
2100  *   Internal parser structure.
2101  * @param flow
2102  *   Pointer to the rte_flow.
2103  * @param[out] error
2104  *   Perform verbose error reporting if not NULL.
2105  *
2106  * @return
2107  *   0 on success, a negative errno value otherwise and rte_errno is set.
2108  */
2109 static int
2110 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2111                                   struct mlx5_flow_parse *parser,
2112                                   struct rte_flow *flow,
2113                                   struct rte_flow_error *error)
2114 {
2115         struct priv *priv = dev->data->dev_private;
2116         unsigned int i;
2117
2118         for (i = 0; i != hash_rxq_init_n; ++i) {
2119                 if (!parser->queue[i].ibv_attr)
2120                         continue;
2121                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2122                 parser->queue[i].ibv_attr = NULL;
2123                 flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2124                 if (!priv->dev->data->dev_started)
2125                         continue;
2126                 flow->frxq[i].hrxq =
2127                         mlx5_hrxq_get(dev,
2128                                       parser->rss_conf.key,
2129                                       parser->rss_conf.key_len,
2130                                       flow->frxq[i].hash_fields,
2131                                       parser->rss_conf.queue,
2132                                       parser->rss_conf.queue_num,
2133                                       parser->tunnel,
2134                                       parser->rss_conf.level);
2135                 if (flow->frxq[i].hrxq)
2136                         continue;
2137                 flow->frxq[i].hrxq =
2138                         mlx5_hrxq_new(dev,
2139                                       parser->rss_conf.key,
2140                                       parser->rss_conf.key_len,
2141                                       flow->frxq[i].hash_fields,
2142                                       parser->rss_conf.queue,
2143                                       parser->rss_conf.queue_num,
2144                                       parser->tunnel,
2145                                       parser->rss_conf.level);
2146                 if (!flow->frxq[i].hrxq) {
2147                         return rte_flow_error_set(error, ENOMEM,
2148                                                   RTE_FLOW_ERROR_TYPE_HANDLE,
2149                                                   NULL,
2150                                                   "cannot create hash rxq");
2151                 }
2152         }
2153         return 0;
2154 }
2155
2156 /**
2157  * RXQ update after flow rule creation.
2158  *
2159  * @param dev
2160  *   Pointer to Ethernet device.
2161  * @param flow
2162  *   Pointer to the flow rule.
2163  */
2164 static void
2165 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2166 {
2167         struct priv *priv = dev->data->dev_private;
2168         unsigned int i;
2169         unsigned int j;
2170
2171         if (!dev->data->dev_started)
2172                 return;
2173         for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2174                 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2175                                                  [(*flow->queues)[i]];
2176                 struct mlx5_rxq_ctrl *rxq_ctrl =
2177                         container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2178                 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2179
2180                 rxq_data->mark |= flow->mark;
2181                 if (!tunnel)
2182                         continue;
2183                 rxq_ctrl->tunnel_types[tunnel] += 1;
2184                 /* Clear tunnel type if more than one tunnel types set. */
2185                 for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2186                         if (j == tunnel)
2187                                 continue;
2188                         if (rxq_ctrl->tunnel_types[j] > 0) {
2189                                 rxq_data->tunnel = 0;
2190                                 break;
2191                         }
2192                 }
2193                 if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2194                         rxq_data->tunnel = flow->tunnel;
2195         }
2196 }
2197
2198 /**
2199  * Dump flow hash RX queue detail.
2200  *
2201  * @param dev
2202  *   Pointer to Ethernet device.
2203  * @param flow
2204  *   Pointer to the rte_flow.
2205  * @param hrxq_idx
2206  *   Hash RX queue index.
2207  */
2208 static void
2209 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
2210                struct rte_flow *flow __rte_unused,
2211                unsigned int hrxq_idx __rte_unused)
2212 {
2213 #ifndef NDEBUG
2214         uintptr_t spec_ptr;
2215         uint16_t j;
2216         char buf[256];
2217         uint8_t off;
2218
2219         spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
2220         for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
2221              j++) {
2222                 struct ibv_flow_spec *spec = (void *)spec_ptr;
2223                 off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
2224                                spec->hdr.size);
2225                 spec_ptr += spec->hdr.size;
2226         }
2227         DRV_LOG(DEBUG,
2228                 "port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
2229                 " hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
2230                 " flags:%x, comp_mask:%x specs:%s",
2231                 dev->data->port_id, (void *)flow, hrxq_idx,
2232                 (void *)flow->frxq[hrxq_idx].hrxq,
2233                 (void *)flow->frxq[hrxq_idx].hrxq->qp,
2234                 (void *)flow->frxq[hrxq_idx].hrxq->ind_table,
2235                 flow->frxq[hrxq_idx].hash_fields |
2236                 (flow->tunnel &&
2237                  flow->rss_conf.level > 1 ? (uint32_t)IBV_RX_HASH_INNER : 0),
2238                 flow->rss_conf.queue_num,
2239                 flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
2240                 flow->frxq[hrxq_idx].ibv_attr->size,
2241                 flow->frxq[hrxq_idx].ibv_attr->priority,
2242                 flow->frxq[hrxq_idx].ibv_attr->type,
2243                 flow->frxq[hrxq_idx].ibv_attr->flags,
2244                 flow->frxq[hrxq_idx].ibv_attr->comp_mask,
2245                 buf);
2246 #endif
2247 }
2248
2249 /**
2250  * Complete flow rule creation.
2251  *
2252  * @param dev
2253  *   Pointer to Ethernet device.
2254  * @param parser
2255  *   Internal parser structure.
2256  * @param flow
2257  *   Pointer to the rte_flow.
2258  * @param[out] error
2259  *   Perform verbose error reporting if not NULL.
2260  *
2261  * @return
2262  *   0 on success, a negative errno value otherwise and rte_errno is set.
2263  */
2264 static int
2265 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2266                               struct mlx5_flow_parse *parser,
2267                               struct rte_flow *flow,
2268                               struct rte_flow_error *error)
2269 {
2270         struct priv *priv = dev->data->dev_private;
2271         int ret;
2272         unsigned int i;
2273         unsigned int flows_n = 0;
2274
2275         assert(priv->pd);
2276         assert(priv->ctx);
2277         assert(!parser->drop);
2278         ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2279         if (ret)
2280                 goto error;
2281         if (parser->count)
2282                 flow->cs = parser->cs;
2283         if (!priv->dev->data->dev_started)
2284                 return 0;
2285         for (i = 0; i != hash_rxq_init_n; ++i) {
2286                 if (!flow->frxq[i].hrxq)
2287                         continue;
2288                 flow->frxq[i].ibv_flow =
2289                         mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2290                                                flow->frxq[i].ibv_attr);
2291                 mlx5_flow_dump(dev, flow, i);
2292                 if (!flow->frxq[i].ibv_flow) {
2293                         rte_flow_error_set(error, ENOMEM,
2294                                            RTE_FLOW_ERROR_TYPE_HANDLE,
2295                                            NULL, "flow rule creation failure");
2296                         goto error;
2297                 }
2298                 ++flows_n;
2299         }
2300         if (!flows_n) {
2301                 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2302                                    NULL, "internal error in flow creation");
2303                 goto error;
2304         }
2305         mlx5_flow_create_update_rxqs(dev, flow);
2306         return 0;
2307 error:
2308         ret = rte_errno; /* Save rte_errno before cleanup. */
2309         assert(flow);
2310         for (i = 0; i != hash_rxq_init_n; ++i) {
2311                 if (flow->frxq[i].ibv_flow) {
2312                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2313
2314                         claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2315                 }
2316                 if (flow->frxq[i].hrxq)
2317                         mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2318                 if (flow->frxq[i].ibv_attr)
2319                         rte_free(flow->frxq[i].ibv_attr);
2320         }
2321         if (flow->cs) {
2322                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2323                 flow->cs = NULL;
2324                 parser->cs = NULL;
2325         }
2326         rte_errno = ret; /* Restore rte_errno. */
2327         return -rte_errno;
2328 }
2329
2330 /**
2331  * Convert a flow.
2332  *
2333  * @param dev
2334  *   Pointer to Ethernet device.
2335  * @param list
2336  *   Pointer to a TAILQ flow list.
2337  * @param[in] attr
2338  *   Flow rule attributes.
2339  * @param[in] pattern
2340  *   Pattern specification (list terminated by the END pattern item).
2341  * @param[in] actions
2342  *   Associated actions (list terminated by the END action).
2343  * @param[out] error
2344  *   Perform verbose error reporting if not NULL.
2345  *
2346  * @return
2347  *   A flow on success, NULL otherwise and rte_errno is set.
2348  */
2349 static struct rte_flow *
2350 mlx5_flow_list_create(struct rte_eth_dev *dev,
2351                       struct mlx5_flows *list,
2352                       const struct rte_flow_attr *attr,
2353                       const struct rte_flow_item items[],
2354                       const struct rte_flow_action actions[],
2355                       struct rte_flow_error *error)
2356 {
2357         struct mlx5_flow_parse parser = { .create = 1, };
2358         struct rte_flow *flow = NULL;
2359         unsigned int i;
2360         int ret;
2361
2362         ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2363         if (ret)
2364                 goto exit;
2365         flow = rte_calloc(__func__, 1,
2366                           sizeof(*flow) +
2367                           parser.rss_conf.queue_num * sizeof(uint16_t),
2368                           0);
2369         if (!flow) {
2370                 rte_flow_error_set(error, ENOMEM,
2371                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2372                                    NULL,
2373                                    "cannot allocate flow memory");
2374                 return NULL;
2375         }
2376         /* Copy configuration. */
2377         flow->queues = (uint16_t (*)[])(flow + 1);
2378         flow->tunnel = parser.tunnel;
2379         flow->rss_conf = (struct rte_flow_action_rss){
2380                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2381                 .level = 0,
2382                 .types = parser.rss_conf.types,
2383                 .key_len = parser.rss_conf.key_len,
2384                 .queue_num = parser.rss_conf.queue_num,
2385                 .key = memcpy(flow->rss_key, parser.rss_conf.key,
2386                               sizeof(*parser.rss_conf.key) *
2387                               parser.rss_conf.key_len),
2388                 .queue = memcpy(flow->queues, parser.rss_conf.queue,
2389                                 sizeof(*parser.rss_conf.queue) *
2390                                 parser.rss_conf.queue_num),
2391         };
2392         flow->mark = parser.mark;
2393         /* finalise the flow. */
2394         if (parser.drop)
2395                 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2396                                                          error);
2397         else
2398                 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2399         if (ret)
2400                 goto exit;
2401         TAILQ_INSERT_TAIL(list, flow, next);
2402         DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2403                 (void *)flow);
2404         return flow;
2405 exit:
2406         DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2407                 error->message);
2408         for (i = 0; i != hash_rxq_init_n; ++i) {
2409                 if (parser.queue[i].ibv_attr)
2410                         rte_free(parser.queue[i].ibv_attr);
2411         }
2412         rte_free(flow);
2413         return NULL;
2414 }
2415
2416 /**
2417  * Validate a flow supported by the NIC.
2418  *
2419  * @see rte_flow_validate()
2420  * @see rte_flow_ops
2421  */
2422 int
2423 mlx5_flow_validate(struct rte_eth_dev *dev,
2424                    const struct rte_flow_attr *attr,
2425                    const struct rte_flow_item items[],
2426                    const struct rte_flow_action actions[],
2427                    struct rte_flow_error *error)
2428 {
2429         struct mlx5_flow_parse parser = { .create = 0, };
2430
2431         return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2432 }
2433
2434 /**
2435  * Create a flow.
2436  *
2437  * @see rte_flow_create()
2438  * @see rte_flow_ops
2439  */
2440 struct rte_flow *
2441 mlx5_flow_create(struct rte_eth_dev *dev,
2442                  const struct rte_flow_attr *attr,
2443                  const struct rte_flow_item items[],
2444                  const struct rte_flow_action actions[],
2445                  struct rte_flow_error *error)
2446 {
2447         struct priv *priv = dev->data->dev_private;
2448
2449         return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2450                                      error);
2451 }
2452
2453 /**
2454  * Destroy a flow in a list.
2455  *
2456  * @param dev
2457  *   Pointer to Ethernet device.
2458  * @param list
2459  *   Pointer to a TAILQ flow list.
2460  * @param[in] flow
2461  *   Flow to destroy.
2462  */
2463 static void
2464 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2465                        struct rte_flow *flow)
2466 {
2467         struct priv *priv = dev->data->dev_private;
2468         unsigned int i;
2469
2470         if (flow->drop || !dev->data->dev_started)
2471                 goto free;
2472         for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2473                 /* Update queue tunnel type. */
2474                 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2475                                                  [(*flow->queues)[i]];
2476                 struct mlx5_rxq_ctrl *rxq_ctrl =
2477                         container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2478                 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2479
2480                 assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2481                 rxq_ctrl->tunnel_types[tunnel] -= 1;
2482                 if (!rxq_ctrl->tunnel_types[tunnel]) {
2483                         /* Update tunnel type. */
2484                         uint8_t j;
2485                         uint8_t types = 0;
2486                         uint8_t last;
2487
2488                         for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2489                                 if (rxq_ctrl->tunnel_types[j]) {
2490                                         types += 1;
2491                                         last = j;
2492                                 }
2493                         /* Keep same if more than one tunnel types left. */
2494                         if (types == 1)
2495                                 rxq_data->tunnel = ptype_ext[last];
2496                         else if (types == 0)
2497                                 /* No tunnel type left. */
2498                                 rxq_data->tunnel = 0;
2499                 }
2500         }
2501         for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2502                 struct rte_flow *tmp;
2503                 int mark = 0;
2504
2505                 /*
2506                  * To remove the mark from the queue, the queue must not be
2507                  * present in any other marked flow (RSS or not).
2508                  */
2509                 TAILQ_FOREACH(tmp, list, next) {
2510                         unsigned int j;
2511                         uint16_t *tqs = NULL;
2512                         uint16_t tq_n = 0;
2513
2514                         if (!tmp->mark)
2515                                 continue;
2516                         for (j = 0; j != hash_rxq_init_n; ++j) {
2517                                 if (!tmp->frxq[j].hrxq)
2518                                         continue;
2519                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2520                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2521                         }
2522                         if (!tq_n)
2523                                 continue;
2524                         for (j = 0; (j != tq_n) && !mark; j++)
2525                                 if (tqs[j] == (*flow->queues)[i])
2526                                         mark = 1;
2527                 }
2528                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2529         }
2530 free:
2531         if (flow->drop) {
2532                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2533                         claim_zero(mlx5_glue->destroy_flow
2534                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2535                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2536         } else {
2537                 for (i = 0; i != hash_rxq_init_n; ++i) {
2538                         struct mlx5_flow *frxq = &flow->frxq[i];
2539
2540                         if (frxq->ibv_flow)
2541                                 claim_zero(mlx5_glue->destroy_flow
2542                                            (frxq->ibv_flow));
2543                         if (frxq->hrxq)
2544                                 mlx5_hrxq_release(dev, frxq->hrxq);
2545                         if (frxq->ibv_attr)
2546                                 rte_free(frxq->ibv_attr);
2547                 }
2548         }
2549         if (flow->cs) {
2550                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2551                 flow->cs = NULL;
2552         }
2553         TAILQ_REMOVE(list, flow, next);
2554         DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2555                 (void *)flow);
2556         rte_free(flow);
2557 }
2558
2559 /**
2560  * Destroy all flows.
2561  *
2562  * @param dev
2563  *   Pointer to Ethernet device.
2564  * @param list
2565  *   Pointer to a TAILQ flow list.
2566  */
2567 void
2568 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2569 {
2570         while (!TAILQ_EMPTY(list)) {
2571                 struct rte_flow *flow;
2572
2573                 flow = TAILQ_FIRST(list);
2574                 mlx5_flow_list_destroy(dev, list, flow);
2575         }
2576 }
2577
2578 /**
2579  * Create drop queue.
2580  *
2581  * @param dev
2582  *   Pointer to Ethernet device.
2583  *
2584  * @return
2585  *   0 on success, a negative errno value otherwise and rte_errno is set.
2586  */
2587 int
2588 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2589 {
2590         struct priv *priv = dev->data->dev_private;
2591         struct mlx5_hrxq_drop *fdq = NULL;
2592
2593         assert(priv->pd);
2594         assert(priv->ctx);
2595         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2596         if (!fdq) {
2597                 DRV_LOG(WARNING,
2598                         "port %u cannot allocate memory for drop queue",
2599                         dev->data->port_id);
2600                 rte_errno = ENOMEM;
2601                 return -rte_errno;
2602         }
2603         fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2604         if (!fdq->cq) {
2605                 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2606                         dev->data->port_id);
2607                 rte_errno = errno;
2608                 goto error;
2609         }
2610         fdq->wq = mlx5_glue->create_wq
2611                 (priv->ctx,
2612                  &(struct ibv_wq_init_attr){
2613                         .wq_type = IBV_WQT_RQ,
2614                         .max_wr = 1,
2615                         .max_sge = 1,
2616                         .pd = priv->pd,
2617                         .cq = fdq->cq,
2618                  });
2619         if (!fdq->wq) {
2620                 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2621                         dev->data->port_id);
2622                 rte_errno = errno;
2623                 goto error;
2624         }
2625         fdq->ind_table = mlx5_glue->create_rwq_ind_table
2626                 (priv->ctx,
2627                  &(struct ibv_rwq_ind_table_init_attr){
2628                         .log_ind_tbl_size = 0,
2629                         .ind_tbl = &fdq->wq,
2630                         .comp_mask = 0,
2631                  });
2632         if (!fdq->ind_table) {
2633                 DRV_LOG(WARNING,
2634                         "port %u cannot allocate indirection table for drop"
2635                         " queue",
2636                         dev->data->port_id);
2637                 rte_errno = errno;
2638                 goto error;
2639         }
2640         fdq->qp = mlx5_glue->create_qp_ex
2641                 (priv->ctx,
2642                  &(struct ibv_qp_init_attr_ex){
2643                         .qp_type = IBV_QPT_RAW_PACKET,
2644                         .comp_mask =
2645                                 IBV_QP_INIT_ATTR_PD |
2646                                 IBV_QP_INIT_ATTR_IND_TABLE |
2647                                 IBV_QP_INIT_ATTR_RX_HASH,
2648                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2649                                 .rx_hash_function =
2650                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2651                                 .rx_hash_key_len = rss_hash_default_key_len,
2652                                 .rx_hash_key = rss_hash_default_key,
2653                                 .rx_hash_fields_mask = 0,
2654                                 },
2655                         .rwq_ind_tbl = fdq->ind_table,
2656                         .pd = priv->pd
2657                  });
2658         if (!fdq->qp) {
2659                 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2660                         dev->data->port_id);
2661                 rte_errno = errno;
2662                 goto error;
2663         }
2664         priv->flow_drop_queue = fdq;
2665         return 0;
2666 error:
2667         if (fdq->qp)
2668                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2669         if (fdq->ind_table)
2670                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2671         if (fdq->wq)
2672                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2673         if (fdq->cq)
2674                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2675         if (fdq)
2676                 rte_free(fdq);
2677         priv->flow_drop_queue = NULL;
2678         return -rte_errno;
2679 }
2680
2681 /**
2682  * Delete drop queue.
2683  *
2684  * @param dev
2685  *   Pointer to Ethernet device.
2686  */
2687 void
2688 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2689 {
2690         struct priv *priv = dev->data->dev_private;
2691         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2692
2693         if (!fdq)
2694                 return;
2695         if (fdq->qp)
2696                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2697         if (fdq->ind_table)
2698                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2699         if (fdq->wq)
2700                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2701         if (fdq->cq)
2702                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2703         rte_free(fdq);
2704         priv->flow_drop_queue = NULL;
2705 }
2706
2707 /**
2708  * Remove all flows.
2709  *
2710  * @param dev
2711  *   Pointer to Ethernet device.
2712  * @param list
2713  *   Pointer to a TAILQ flow list.
2714  */
2715 void
2716 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2717 {
2718         struct priv *priv = dev->data->dev_private;
2719         struct rte_flow *flow;
2720         unsigned int i;
2721
2722         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2723                 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2724
2725                 if (flow->drop) {
2726                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2727                                 continue;
2728                         claim_zero(mlx5_glue->destroy_flow
2729                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2730                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2731                         DRV_LOG(DEBUG, "port %u flow %p removed",
2732                                 dev->data->port_id, (void *)flow);
2733                         /* Next flow. */
2734                         continue;
2735                 }
2736                 /* Verify the flow has not already been cleaned. */
2737                 for (i = 0; i != hash_rxq_init_n; ++i) {
2738                         if (!flow->frxq[i].ibv_flow)
2739                                 continue;
2740                         /*
2741                          * Indirection table may be necessary to remove the
2742                          * flags in the Rx queues.
2743                          * This helps to speed-up the process by avoiding
2744                          * another loop.
2745                          */
2746                         ind_tbl = flow->frxq[i].hrxq->ind_table;
2747                         break;
2748                 }
2749                 if (i == hash_rxq_init_n)
2750                         return;
2751                 if (flow->mark) {
2752                         assert(ind_tbl);
2753                         for (i = 0; i != ind_tbl->queues_n; ++i)
2754                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2755                 }
2756                 for (i = 0; i != hash_rxq_init_n; ++i) {
2757                         if (!flow->frxq[i].ibv_flow)
2758                                 continue;
2759                         claim_zero(mlx5_glue->destroy_flow
2760                                    (flow->frxq[i].ibv_flow));
2761                         flow->frxq[i].ibv_flow = NULL;
2762                         mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2763                         flow->frxq[i].hrxq = NULL;
2764                 }
2765                 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2766                         (void *)flow);
2767         }
2768         /* Cleanup Rx queue tunnel info. */
2769         for (i = 0; i != priv->rxqs_n; ++i) {
2770                 struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2771                 struct mlx5_rxq_ctrl *rxq_ctrl =
2772                         container_of(q, struct mlx5_rxq_ctrl, rxq);
2773
2774                 if (!q)
2775                         continue;
2776                 memset((void *)rxq_ctrl->tunnel_types, 0,
2777                        sizeof(rxq_ctrl->tunnel_types));
2778                 q->tunnel = 0;
2779         }
2780 }
2781
2782 /**
2783  * Add all flows.
2784  *
2785  * @param dev
2786  *   Pointer to Ethernet device.
2787  * @param list
2788  *   Pointer to a TAILQ flow list.
2789  *
2790  * @return
2791  *   0 on success, a negative errno value otherwise and rte_errno is set.
2792  */
2793 int
2794 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2795 {
2796         struct priv *priv = dev->data->dev_private;
2797         struct rte_flow *flow;
2798
2799         TAILQ_FOREACH(flow, list, next) {
2800                 unsigned int i;
2801
2802                 if (flow->drop) {
2803                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2804                                 mlx5_glue->create_flow
2805                                 (priv->flow_drop_queue->qp,
2806                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2807                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2808                                 DRV_LOG(DEBUG,
2809                                         "port %u flow %p cannot be applied",
2810                                         dev->data->port_id, (void *)flow);
2811                                 rte_errno = EINVAL;
2812                                 return -rte_errno;
2813                         }
2814                         DRV_LOG(DEBUG, "port %u flow %p applied",
2815                                 dev->data->port_id, (void *)flow);
2816                         /* Next flow. */
2817                         continue;
2818                 }
2819                 for (i = 0; i != hash_rxq_init_n; ++i) {
2820                         if (!flow->frxq[i].ibv_attr)
2821                                 continue;
2822                         flow->frxq[i].hrxq =
2823                                 mlx5_hrxq_get(dev, flow->rss_conf.key,
2824                                               flow->rss_conf.key_len,
2825                                               flow->frxq[i].hash_fields,
2826                                               flow->rss_conf.queue,
2827                                               flow->rss_conf.queue_num,
2828                                               flow->tunnel,
2829                                               flow->rss_conf.level);
2830                         if (flow->frxq[i].hrxq)
2831                                 goto flow_create;
2832                         flow->frxq[i].hrxq =
2833                                 mlx5_hrxq_new(dev, flow->rss_conf.key,
2834                                               flow->rss_conf.key_len,
2835                                               flow->frxq[i].hash_fields,
2836                                               flow->rss_conf.queue,
2837                                               flow->rss_conf.queue_num,
2838                                               flow->tunnel,
2839                                               flow->rss_conf.level);
2840                         if (!flow->frxq[i].hrxq) {
2841                                 DRV_LOG(DEBUG,
2842                                         "port %u flow %p cannot create hash"
2843                                         " rxq",
2844                                         dev->data->port_id, (void *)flow);
2845                                 rte_errno = EINVAL;
2846                                 return -rte_errno;
2847                         }
2848 flow_create:
2849                         mlx5_flow_dump(dev, flow, i);
2850                         flow->frxq[i].ibv_flow =
2851                                 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2852                                                        flow->frxq[i].ibv_attr);
2853                         if (!flow->frxq[i].ibv_flow) {
2854                                 DRV_LOG(DEBUG,
2855                                         "port %u flow %p type %u cannot be"
2856                                         " applied",
2857                                         dev->data->port_id, (void *)flow, i);
2858                                 rte_errno = EINVAL;
2859                                 return -rte_errno;
2860                         }
2861                 }
2862                 mlx5_flow_create_update_rxqs(dev, flow);
2863         }
2864         return 0;
2865 }
2866
2867 /**
2868  * Verify the flow list is empty
2869  *
2870  * @param dev
2871  *  Pointer to Ethernet device.
2872  *
2873  * @return the number of flows not released.
2874  */
2875 int
2876 mlx5_flow_verify(struct rte_eth_dev *dev)
2877 {
2878         struct priv *priv = dev->data->dev_private;
2879         struct rte_flow *flow;
2880         int ret = 0;
2881
2882         TAILQ_FOREACH(flow, &priv->flows, next) {
2883                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2884                         dev->data->port_id, (void *)flow);
2885                 ++ret;
2886         }
2887         return ret;
2888 }
2889
2890 /**
2891  * Enable a control flow configured from the control plane.
2892  *
2893  * @param dev
2894  *   Pointer to Ethernet device.
2895  * @param eth_spec
2896  *   An Ethernet flow spec to apply.
2897  * @param eth_mask
2898  *   An Ethernet flow mask to apply.
2899  * @param vlan_spec
2900  *   A VLAN flow spec to apply.
2901  * @param vlan_mask
2902  *   A VLAN flow mask to apply.
2903  *
2904  * @return
2905  *   0 on success, a negative errno value otherwise and rte_errno is set.
2906  */
2907 int
2908 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2909                     struct rte_flow_item_eth *eth_spec,
2910                     struct rte_flow_item_eth *eth_mask,
2911                     struct rte_flow_item_vlan *vlan_spec,
2912                     struct rte_flow_item_vlan *vlan_mask)
2913 {
2914         struct priv *priv = dev->data->dev_private;
2915         const struct rte_flow_attr attr = {
2916                 .ingress = 1,
2917                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2918         };
2919         struct rte_flow_item items[] = {
2920                 {
2921                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2922                         .spec = eth_spec,
2923                         .last = NULL,
2924                         .mask = eth_mask,
2925                 },
2926                 {
2927                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2928                                 RTE_FLOW_ITEM_TYPE_END,
2929                         .spec = vlan_spec,
2930                         .last = NULL,
2931                         .mask = vlan_mask,
2932                 },
2933                 {
2934                         .type = RTE_FLOW_ITEM_TYPE_END,
2935                 },
2936         };
2937         uint16_t queue[priv->reta_idx_n];
2938         struct rte_flow_action_rss action_rss = {
2939                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2940                 .level = 0,
2941                 .types = priv->rss_conf.rss_hf,
2942                 .key_len = priv->rss_conf.rss_key_len,
2943                 .queue_num = priv->reta_idx_n,
2944                 .key = priv->rss_conf.rss_key,
2945                 .queue = queue,
2946         };
2947         struct rte_flow_action actions[] = {
2948                 {
2949                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2950                         .conf = &action_rss,
2951                 },
2952                 {
2953                         .type = RTE_FLOW_ACTION_TYPE_END,
2954                 },
2955         };
2956         struct rte_flow *flow;
2957         struct rte_flow_error error;
2958         unsigned int i;
2959
2960         if (!priv->reta_idx_n) {
2961                 rte_errno = EINVAL;
2962                 return -rte_errno;
2963         }
2964         for (i = 0; i != priv->reta_idx_n; ++i)
2965                 queue[i] = (*priv->reta_idx)[i];
2966         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2967                                      actions, &error);
2968         if (!flow)
2969                 return -rte_errno;
2970         return 0;
2971 }
2972
2973 /**
2974  * Enable a flow control configured from the control plane.
2975  *
2976  * @param dev
2977  *   Pointer to Ethernet device.
2978  * @param eth_spec
2979  *   An Ethernet flow spec to apply.
2980  * @param eth_mask
2981  *   An Ethernet flow mask to apply.
2982  *
2983  * @return
2984  *   0 on success, a negative errno value otherwise and rte_errno is set.
2985  */
2986 int
2987 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2988                struct rte_flow_item_eth *eth_spec,
2989                struct rte_flow_item_eth *eth_mask)
2990 {
2991         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2992 }
2993
2994 /**
2995  * Destroy a flow.
2996  *
2997  * @see rte_flow_destroy()
2998  * @see rte_flow_ops
2999  */
3000 int
3001 mlx5_flow_destroy(struct rte_eth_dev *dev,
3002                   struct rte_flow *flow,
3003                   struct rte_flow_error *error __rte_unused)
3004 {
3005         struct priv *priv = dev->data->dev_private;
3006
3007         mlx5_flow_list_destroy(dev, &priv->flows, flow);
3008         return 0;
3009 }
3010
3011 /**
3012  * Destroy all flows.
3013  *
3014  * @see rte_flow_flush()
3015  * @see rte_flow_ops
3016  */
3017 int
3018 mlx5_flow_flush(struct rte_eth_dev *dev,
3019                 struct rte_flow_error *error __rte_unused)
3020 {
3021         struct priv *priv = dev->data->dev_private;
3022
3023         mlx5_flow_list_flush(dev, &priv->flows);
3024         return 0;
3025 }
3026
3027 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3028 /**
3029  * Query flow counter.
3030  *
3031  * @param cs
3032  *   the counter set.
3033  * @param counter_value
3034  *   returned data from the counter.
3035  *
3036  * @return
3037  *   0 on success, a negative errno value otherwise and rte_errno is set.
3038  */
3039 static int
3040 mlx5_flow_query_count(struct ibv_counter_set *cs,
3041                       struct mlx5_flow_counter_stats *counter_stats,
3042                       struct rte_flow_query_count *query_count,
3043                       struct rte_flow_error *error)
3044 {
3045         uint64_t counters[2];
3046         struct ibv_query_counter_set_attr query_cs_attr = {
3047                 .cs = cs,
3048                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3049         };
3050         struct ibv_counter_set_data query_out = {
3051                 .out = counters,
3052                 .outlen = 2 * sizeof(uint64_t),
3053         };
3054         int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
3055
3056         if (err)
3057                 return rte_flow_error_set(error, err,
3058                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3059                                           NULL,
3060                                           "cannot read counter");
3061         query_count->hits_set = 1;
3062         query_count->bytes_set = 1;
3063         query_count->hits = counters[0] - counter_stats->hits;
3064         query_count->bytes = counters[1] - counter_stats->bytes;
3065         if (query_count->reset) {
3066                 counter_stats->hits = counters[0];
3067                 counter_stats->bytes = counters[1];
3068         }
3069         return 0;
3070 }
3071
3072 /**
3073  * Query a flows.
3074  *
3075  * @see rte_flow_query()
3076  * @see rte_flow_ops
3077  */
3078 int
3079 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3080                 struct rte_flow *flow,
3081                 enum rte_flow_action_type action __rte_unused,
3082                 void *data,
3083                 struct rte_flow_error *error)
3084 {
3085         if (flow->cs) {
3086                 int ret;
3087
3088                 ret = mlx5_flow_query_count(flow->cs,
3089                                             &flow->counter_stats,
3090                                             (struct rte_flow_query_count *)data,
3091                                             error);
3092                 if (ret)
3093                         return ret;
3094         } else {
3095                 return rte_flow_error_set(error, EINVAL,
3096                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3097                                           NULL,
3098                                           "no counter found for flow");
3099         }
3100         return 0;
3101 }
3102 #endif
3103
3104 /**
3105  * Isolated mode.
3106  *
3107  * @see rte_flow_isolate()
3108  * @see rte_flow_ops
3109  */
3110 int
3111 mlx5_flow_isolate(struct rte_eth_dev *dev,
3112                   int enable,
3113                   struct rte_flow_error *error)
3114 {
3115         struct priv *priv = dev->data->dev_private;
3116
3117         if (dev->data->dev_started) {
3118                 rte_flow_error_set(error, EBUSY,
3119                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3120                                    NULL,
3121                                    "port must be stopped first");
3122                 return -rte_errno;
3123         }
3124         priv->isolated = !!enable;
3125         if (enable)
3126                 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
3127         else
3128                 priv->dev->dev_ops = &mlx5_dev_ops;
3129         return 0;
3130 }
3131
3132 /**
3133  * Convert a flow director filter to a generic flow.
3134  *
3135  * @param dev
3136  *   Pointer to Ethernet device.
3137  * @param fdir_filter
3138  *   Flow director filter to add.
3139  * @param attributes
3140  *   Generic flow parameters structure.
3141  *
3142  * @return
3143  *   0 on success, a negative errno value otherwise and rte_errno is set.
3144  */
3145 static int
3146 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3147                          const struct rte_eth_fdir_filter *fdir_filter,
3148                          struct mlx5_fdir *attributes)
3149 {
3150         struct priv *priv = dev->data->dev_private;
3151         const struct rte_eth_fdir_input *input = &fdir_filter->input;
3152         const struct rte_eth_fdir_masks *mask =
3153                 &dev->data->dev_conf.fdir_conf.mask;
3154
3155         /* Validate queue number. */
3156         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3157                 DRV_LOG(ERR, "port %u invalid queue number %d",
3158                         dev->data->port_id, fdir_filter->action.rx_queue);
3159                 rte_errno = EINVAL;
3160                 return -rte_errno;
3161         }
3162         attributes->attr.ingress = 1;
3163         attributes->items[0] = (struct rte_flow_item) {
3164                 .type = RTE_FLOW_ITEM_TYPE_ETH,
3165                 .spec = &attributes->l2,
3166                 .mask = &attributes->l2_mask,
3167         };
3168         switch (fdir_filter->action.behavior) {
3169         case RTE_ETH_FDIR_ACCEPT:
3170                 attributes->actions[0] = (struct rte_flow_action){
3171                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
3172                         .conf = &attributes->queue,
3173                 };
3174                 break;
3175         case RTE_ETH_FDIR_REJECT:
3176                 attributes->actions[0] = (struct rte_flow_action){
3177                         .type = RTE_FLOW_ACTION_TYPE_DROP,
3178                 };
3179                 break;
3180         default:
3181                 DRV_LOG(ERR, "port %u invalid behavior %d",
3182                         dev->data->port_id,
3183                         fdir_filter->action.behavior);
3184                 rte_errno = ENOTSUP;
3185                 return -rte_errno;
3186         }
3187         attributes->queue.index = fdir_filter->action.rx_queue;
3188         /* Handle L3. */
3189         switch (fdir_filter->input.flow_type) {
3190         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3191         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3192         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3193                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3194                         .src_addr = input->flow.ip4_flow.src_ip,
3195                         .dst_addr = input->flow.ip4_flow.dst_ip,
3196                         .time_to_live = input->flow.ip4_flow.ttl,
3197                         .type_of_service = input->flow.ip4_flow.tos,
3198                         .next_proto_id = input->flow.ip4_flow.proto,
3199                 };
3200                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3201                         .src_addr = mask->ipv4_mask.src_ip,
3202                         .dst_addr = mask->ipv4_mask.dst_ip,
3203                         .time_to_live = mask->ipv4_mask.ttl,
3204                         .type_of_service = mask->ipv4_mask.tos,
3205                         .next_proto_id = mask->ipv4_mask.proto,
3206                 };
3207                 attributes->items[1] = (struct rte_flow_item){
3208                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
3209                         .spec = &attributes->l3,
3210                         .mask = &attributes->l3_mask,
3211                 };
3212                 break;
3213         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3214         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3215         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3216                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3217                         .hop_limits = input->flow.ipv6_flow.hop_limits,
3218                         .proto = input->flow.ipv6_flow.proto,
3219                 };
3220
3221                 memcpy(attributes->l3.ipv6.hdr.src_addr,
3222                        input->flow.ipv6_flow.src_ip,
3223                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3224                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3225                        input->flow.ipv6_flow.dst_ip,
3226                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3227                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3228                        mask->ipv6_mask.src_ip,
3229                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3230                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3231                        mask->ipv6_mask.dst_ip,
3232                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3233                 attributes->items[1] = (struct rte_flow_item){
3234                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
3235                         .spec = &attributes->l3,
3236                         .mask = &attributes->l3_mask,
3237                 };
3238                 break;
3239         default:
3240                 DRV_LOG(ERR, "port %u invalid flow type%d",
3241                         dev->data->port_id, fdir_filter->input.flow_type);
3242                 rte_errno = ENOTSUP;
3243                 return -rte_errno;
3244         }
3245         /* Handle L4. */
3246         switch (fdir_filter->input.flow_type) {
3247         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3248                 attributes->l4.udp.hdr = (struct udp_hdr){
3249                         .src_port = input->flow.udp4_flow.src_port,
3250                         .dst_port = input->flow.udp4_flow.dst_port,
3251                 };
3252                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3253                         .src_port = mask->src_port_mask,
3254                         .dst_port = mask->dst_port_mask,
3255                 };
3256                 attributes->items[2] = (struct rte_flow_item){
3257                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3258                         .spec = &attributes->l4,
3259                         .mask = &attributes->l4_mask,
3260                 };
3261                 break;
3262         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3263                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3264                         .src_port = input->flow.tcp4_flow.src_port,
3265                         .dst_port = input->flow.tcp4_flow.dst_port,
3266                 };
3267                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3268                         .src_port = mask->src_port_mask,
3269                         .dst_port = mask->dst_port_mask,
3270                 };
3271                 attributes->items[2] = (struct rte_flow_item){
3272                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3273                         .spec = &attributes->l4,
3274                         .mask = &attributes->l4_mask,
3275                 };
3276                 break;
3277         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3278                 attributes->l4.udp.hdr = (struct udp_hdr){
3279                         .src_port = input->flow.udp6_flow.src_port,
3280                         .dst_port = input->flow.udp6_flow.dst_port,
3281                 };
3282                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3283                         .src_port = mask->src_port_mask,
3284                         .dst_port = mask->dst_port_mask,
3285                 };
3286                 attributes->items[2] = (struct rte_flow_item){
3287                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3288                         .spec = &attributes->l4,
3289                         .mask = &attributes->l4_mask,
3290                 };
3291                 break;
3292         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3293                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3294                         .src_port = input->flow.tcp6_flow.src_port,
3295                         .dst_port = input->flow.tcp6_flow.dst_port,
3296                 };
3297                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3298                         .src_port = mask->src_port_mask,
3299                         .dst_port = mask->dst_port_mask,
3300                 };
3301                 attributes->items[2] = (struct rte_flow_item){
3302                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3303                         .spec = &attributes->l4,
3304                         .mask = &attributes->l4_mask,
3305                 };
3306                 break;
3307         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3308         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3309                 break;
3310         default:
3311                 DRV_LOG(ERR, "port %u invalid flow type%d",
3312                         dev->data->port_id, fdir_filter->input.flow_type);
3313                 rte_errno = ENOTSUP;
3314                 return -rte_errno;
3315         }
3316         return 0;
3317 }
3318
3319 /**
3320  * Add new flow director filter and store it in list.
3321  *
3322  * @param dev
3323  *   Pointer to Ethernet device.
3324  * @param fdir_filter
3325  *   Flow director filter to add.
3326  *
3327  * @return
3328  *   0 on success, a negative errno value otherwise and rte_errno is set.
3329  */
3330 static int
3331 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3332                      const struct rte_eth_fdir_filter *fdir_filter)
3333 {
3334         struct priv *priv = dev->data->dev_private;
3335         struct mlx5_fdir attributes = {
3336                 .attr.group = 0,
3337                 .l2_mask = {
3338                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3339                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3340                         .type = 0,
3341                 },
3342         };
3343         struct mlx5_flow_parse parser = {
3344                 .layer = HASH_RXQ_ETH,
3345         };
3346         struct rte_flow_error error;
3347         struct rte_flow *flow;
3348         int ret;
3349
3350         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3351         if (ret)
3352                 return ret;
3353         ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3354                                 attributes.actions, &error, &parser);
3355         if (ret)
3356                 return ret;
3357         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3358                                      attributes.items, attributes.actions,
3359                                      &error);
3360         if (flow) {
3361                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3362                         (void *)flow);
3363                 return 0;
3364         }
3365         return -rte_errno;
3366 }
3367
3368 /**
3369  * Delete specific filter.
3370  *
3371  * @param dev
3372  *   Pointer to Ethernet device.
3373  * @param fdir_filter
3374  *   Filter to be deleted.
3375  *
3376  * @return
3377  *   0 on success, a negative errno value otherwise and rte_errno is set.
3378  */
3379 static int
3380 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3381                         const struct rte_eth_fdir_filter *fdir_filter)
3382 {
3383         struct priv *priv = dev->data->dev_private;
3384         struct mlx5_fdir attributes = {
3385                 .attr.group = 0,
3386         };
3387         struct mlx5_flow_parse parser = {
3388                 .create = 1,
3389                 .layer = HASH_RXQ_ETH,
3390         };
3391         struct rte_flow_error error;
3392         struct rte_flow *flow;
3393         unsigned int i;
3394         int ret;
3395
3396         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3397         if (ret)
3398                 return ret;
3399         ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3400                                 attributes.actions, &error, &parser);
3401         if (ret)
3402                 goto exit;
3403         /*
3404          * Special case for drop action which is only set in the
3405          * specifications when the flow is created.  In this situation the
3406          * drop specification is missing.
3407          */
3408         if (parser.drop) {
3409                 struct ibv_flow_spec_action_drop *drop;
3410
3411                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
3412                                 parser.queue[HASH_RXQ_ETH].offset);
3413                 *drop = (struct ibv_flow_spec_action_drop){
3414                         .type = IBV_FLOW_SPEC_ACTION_DROP,
3415                         .size = sizeof(struct ibv_flow_spec_action_drop),
3416                 };
3417                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
3418         }
3419         TAILQ_FOREACH(flow, &priv->flows, next) {
3420                 struct ibv_flow_attr *attr;
3421                 struct ibv_spec_header *attr_h;
3422                 void *spec;
3423                 struct ibv_flow_attr *flow_attr;
3424                 struct ibv_spec_header *flow_h;
3425                 void *flow_spec;
3426                 unsigned int specs_n;
3427
3428                 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
3429                 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
3430                 /* Compare first the attributes. */
3431                 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3432                         continue;
3433                 if (attr->num_of_specs == 0)
3434                         continue;
3435                 spec = (void *)((uintptr_t)attr +
3436                                 sizeof(struct ibv_flow_attr));
3437                 flow_spec = (void *)((uintptr_t)flow_attr +
3438                                      sizeof(struct ibv_flow_attr));
3439                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3440                 for (i = 0; i != specs_n; ++i) {
3441                         attr_h = spec;
3442                         flow_h = flow_spec;
3443                         if (memcmp(spec, flow_spec,
3444                                    RTE_MIN(attr_h->size, flow_h->size)))
3445                                 goto wrong_flow;
3446                         spec = (void *)((uintptr_t)spec + attr_h->size);
3447                         flow_spec = (void *)((uintptr_t)flow_spec +
3448                                              flow_h->size);
3449                 }
3450                 /* At this point, the flow match. */
3451                 break;
3452 wrong_flow:
3453                 /* The flow does not match. */
3454                 continue;
3455         }
3456         ret = rte_errno; /* Save rte_errno before cleanup. */
3457         if (flow)
3458                 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3459 exit:
3460         for (i = 0; i != hash_rxq_init_n; ++i) {
3461                 if (parser.queue[i].ibv_attr)
3462                         rte_free(parser.queue[i].ibv_attr);
3463         }
3464         rte_errno = ret; /* Restore rte_errno. */
3465         return -rte_errno;
3466 }
3467
3468 /**
3469  * Update queue for specific filter.
3470  *
3471  * @param dev
3472  *   Pointer to Ethernet device.
3473  * @param fdir_filter
3474  *   Filter to be updated.
3475  *
3476  * @return
3477  *   0 on success, a negative errno value otherwise and rte_errno is set.
3478  */
3479 static int
3480 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3481                         const struct rte_eth_fdir_filter *fdir_filter)
3482 {
3483         int ret;
3484
3485         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3486         if (ret)
3487                 return ret;
3488         return mlx5_fdir_filter_add(dev, fdir_filter);
3489 }
3490
3491 /**
3492  * Flush all filters.
3493  *
3494  * @param dev
3495  *   Pointer to Ethernet device.
3496  */
3497 static void
3498 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3499 {
3500         struct priv *priv = dev->data->dev_private;
3501
3502         mlx5_flow_list_flush(dev, &priv->flows);
3503 }
3504
3505 /**
3506  * Get flow director information.
3507  *
3508  * @param dev
3509  *   Pointer to Ethernet device.
3510  * @param[out] fdir_info
3511  *   Resulting flow director information.
3512  */
3513 static void
3514 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3515 {
3516         struct priv *priv = dev->data->dev_private;
3517         struct rte_eth_fdir_masks *mask =
3518                 &priv->dev->data->dev_conf.fdir_conf.mask;
3519
3520         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3521         fdir_info->guarant_spc = 0;
3522         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3523         fdir_info->max_flexpayload = 0;
3524         fdir_info->flow_types_mask[0] = 0;
3525         fdir_info->flex_payload_unit = 0;
3526         fdir_info->max_flex_payload_segment_num = 0;
3527         fdir_info->flex_payload_limit = 0;
3528         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3529 }
3530
3531 /**
3532  * Deal with flow director operations.
3533  *
3534  * @param dev
3535  *   Pointer to Ethernet device.
3536  * @param filter_op
3537  *   Operation to perform.
3538  * @param arg
3539  *   Pointer to operation-specific structure.
3540  *
3541  * @return
3542  *   0 on success, a negative errno value otherwise and rte_errno is set.
3543  */
3544 static int
3545 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3546                     void *arg)
3547 {
3548         struct priv *priv = dev->data->dev_private;
3549         enum rte_fdir_mode fdir_mode =
3550                 priv->dev->data->dev_conf.fdir_conf.mode;
3551
3552         if (filter_op == RTE_ETH_FILTER_NOP)
3553                 return 0;
3554         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3555             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3556                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3557                         dev->data->port_id, fdir_mode);
3558                 rte_errno = EINVAL;
3559                 return -rte_errno;
3560         }
3561         switch (filter_op) {
3562         case RTE_ETH_FILTER_ADD:
3563                 return mlx5_fdir_filter_add(dev, arg);
3564         case RTE_ETH_FILTER_UPDATE:
3565                 return mlx5_fdir_filter_update(dev, arg);
3566         case RTE_ETH_FILTER_DELETE:
3567                 return mlx5_fdir_filter_delete(dev, arg);
3568         case RTE_ETH_FILTER_FLUSH:
3569                 mlx5_fdir_filter_flush(dev);
3570                 break;
3571         case RTE_ETH_FILTER_INFO:
3572                 mlx5_fdir_info_get(dev, arg);
3573                 break;
3574         default:
3575                 DRV_LOG(DEBUG, "port %u unknown operation %u",
3576                         dev->data->port_id, filter_op);
3577                 rte_errno = EINVAL;
3578                 return -rte_errno;
3579         }
3580         return 0;
3581 }
3582
3583 /**
3584  * Manage filter operations.
3585  *
3586  * @param dev
3587  *   Pointer to Ethernet device structure.
3588  * @param filter_type
3589  *   Filter type.
3590  * @param filter_op
3591  *   Operation to perform.
3592  * @param arg
3593  *   Pointer to operation-specific structure.
3594  *
3595  * @return
3596  *   0 on success, a negative errno value otherwise and rte_errno is set.
3597  */
3598 int
3599 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3600                      enum rte_filter_type filter_type,
3601                      enum rte_filter_op filter_op,
3602                      void *arg)
3603 {
3604         switch (filter_type) {
3605         case RTE_ETH_FILTER_GENERIC:
3606                 if (filter_op != RTE_ETH_FILTER_GET) {
3607                         rte_errno = EINVAL;
3608                         return -rte_errno;
3609                 }
3610                 *(const void **)arg = &mlx5_flow_ops;
3611                 return 0;
3612         case RTE_ETH_FILTER_FDIR:
3613                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3614         default:
3615                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3616                         dev->data->port_id, filter_type);
3617                 rte_errno = ENOTSUP;
3618                 return -rte_errno;
3619         }
3620         return 0;
3621 }
3622
3623 /**
3624  * Detect number of Verbs flow priorities supported.
3625  *
3626  * @param dev
3627  *   Pointer to Ethernet device.
3628  *
3629  * @return
3630  *   number of supported Verbs flow priority.
3631  */
3632 unsigned int
3633 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3634 {
3635         struct priv *priv = dev->data->dev_private;
3636         unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3637         struct {
3638                 struct ibv_flow_attr attr;
3639                 struct ibv_flow_spec_eth eth;
3640                 struct ibv_flow_spec_action_drop drop;
3641         } flow_attr = {
3642                 .attr = {
3643                         .num_of_specs = 2,
3644                 },
3645                 .eth = {
3646                         .type = IBV_FLOW_SPEC_ETH,
3647                         .size = sizeof(struct ibv_flow_spec_eth),
3648                 },
3649                 .drop = {
3650                         .size = sizeof(struct ibv_flow_spec_action_drop),
3651                         .type = IBV_FLOW_SPEC_ACTION_DROP,
3652                 },
3653         };
3654         struct ibv_flow *flow;
3655
3656         do {
3657                 flow_attr.attr.priority = verb_priorities - 1;
3658                 flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3659                                               &flow_attr.attr);
3660                 if (flow) {
3661                         claim_zero(mlx5_glue->destroy_flow(flow));
3662                         /* Try more priorities. */
3663                         verb_priorities *= 2;
3664                 } else {
3665                         /* Failed, restore last right number. */
3666                         verb_priorities /= 2;
3667                         break;
3668                 }
3669         } while (1);
3670         DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3671                 " user flow priorities: %d",
3672                 dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3673         return verb_priorities;
3674 }