net/mlx5: allow flow tunnel ID 0 with outer pattern
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
36
37 /* Internet Protocol versions. */
38 #define MLX5_IPV4 4
39 #define MLX5_IPV6 6
40 #define MLX5_GRE 47
41
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
44         int dummy;
45 };
46 #endif
47
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
51
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54         struct rte_eth_dev *dev; /** Ethernet device. */
55         struct mlx5_flow_parse *parser; /** Parser context. */
56         struct rte_flow_error *error; /** Error context. */
57 };
58
59 static int
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61                      const void *default_mask,
62                      struct mlx5_flow_data *data);
63
64 static int
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66                       const void *default_mask,
67                       struct mlx5_flow_data *data);
68
69 static int
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71                       const void *default_mask,
72                       struct mlx5_flow_data *data);
73
74 static int
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76                       const void *default_mask,
77                       struct mlx5_flow_data *data);
78
79 static int
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81                      const void *default_mask,
82                      struct mlx5_flow_data *data);
83
84 static int
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86                      const void *default_mask,
87                      struct mlx5_flow_data *data);
88
89 static int
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91                        const void *default_mask,
92                        struct mlx5_flow_data *data);
93
94 static int
95 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
96                            const void *default_mask,
97                            struct mlx5_flow_data *data);
98
99 static int
100 mlx5_flow_create_gre(const struct rte_flow_item *item,
101                      const void *default_mask,
102                      struct mlx5_flow_data *data);
103
104 struct mlx5_flow_parse;
105
106 static void
107 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
108                       unsigned int size);
109
110 static int
111 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
112
113 static int
114 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
115
116 /* Hash RX queue types. */
117 enum hash_rxq_type {
118         HASH_RXQ_TCPV4,
119         HASH_RXQ_UDPV4,
120         HASH_RXQ_IPV4,
121         HASH_RXQ_TCPV6,
122         HASH_RXQ_UDPV6,
123         HASH_RXQ_IPV6,
124         HASH_RXQ_ETH,
125         HASH_RXQ_TUNNEL,
126 };
127
128 /* Initialization data for hash RX queue. */
129 struct hash_rxq_init {
130         uint64_t hash_fields; /* Fields that participate in the hash. */
131         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
132         unsigned int flow_priority; /* Flow priority to use. */
133         unsigned int ip_version; /* Internet protocol. */
134 };
135
136 /* Initialization data for hash RX queues. */
137 const struct hash_rxq_init hash_rxq_init[] = {
138         [HASH_RXQ_TCPV4] = {
139                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
140                                 IBV_RX_HASH_DST_IPV4 |
141                                 IBV_RX_HASH_SRC_PORT_TCP |
142                                 IBV_RX_HASH_DST_PORT_TCP),
143                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
144                 .flow_priority = 0,
145                 .ip_version = MLX5_IPV4,
146         },
147         [HASH_RXQ_UDPV4] = {
148                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
149                                 IBV_RX_HASH_DST_IPV4 |
150                                 IBV_RX_HASH_SRC_PORT_UDP |
151                                 IBV_RX_HASH_DST_PORT_UDP),
152                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
153                 .flow_priority = 0,
154                 .ip_version = MLX5_IPV4,
155         },
156         [HASH_RXQ_IPV4] = {
157                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
158                                 IBV_RX_HASH_DST_IPV4),
159                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
160                                 ETH_RSS_FRAG_IPV4),
161                 .flow_priority = 1,
162                 .ip_version = MLX5_IPV4,
163         },
164         [HASH_RXQ_TCPV6] = {
165                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
166                                 IBV_RX_HASH_DST_IPV6 |
167                                 IBV_RX_HASH_SRC_PORT_TCP |
168                                 IBV_RX_HASH_DST_PORT_TCP),
169                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
170                 .flow_priority = 0,
171                 .ip_version = MLX5_IPV6,
172         },
173         [HASH_RXQ_UDPV6] = {
174                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
175                                 IBV_RX_HASH_DST_IPV6 |
176                                 IBV_RX_HASH_SRC_PORT_UDP |
177                                 IBV_RX_HASH_DST_PORT_UDP),
178                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
179                 .flow_priority = 0,
180                 .ip_version = MLX5_IPV6,
181         },
182         [HASH_RXQ_IPV6] = {
183                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
184                                 IBV_RX_HASH_DST_IPV6),
185                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
186                                 ETH_RSS_FRAG_IPV6),
187                 .flow_priority = 1,
188                 .ip_version = MLX5_IPV6,
189         },
190         [HASH_RXQ_ETH] = {
191                 .hash_fields = 0,
192                 .dpdk_rss_hf = 0,
193                 .flow_priority = 2,
194         },
195 };
196
197 /* Number of entries in hash_rxq_init[]. */
198 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
199
200 /** Structure for holding counter stats. */
201 struct mlx5_flow_counter_stats {
202         uint64_t hits; /**< Number of packets matched by the rule. */
203         uint64_t bytes; /**< Number of bytes matched by the rule. */
204 };
205
206 /** Structure for Drop queue. */
207 struct mlx5_hrxq_drop {
208         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
209         struct ibv_qp *qp; /**< Verbs queue pair. */
210         struct ibv_wq *wq; /**< Verbs work queue. */
211         struct ibv_cq *cq; /**< Verbs completion queue. */
212 };
213
214 /* Flows structures. */
215 struct mlx5_flow {
216         uint64_t hash_fields; /**< Fields that participate in the hash. */
217         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
218         struct ibv_flow *ibv_flow; /**< Verbs flow. */
219         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
220 };
221
222 /* Drop flows structures. */
223 struct mlx5_flow_drop {
224         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
225         struct ibv_flow *ibv_flow; /**< Verbs flow. */
226 };
227
228 struct rte_flow {
229         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
230         uint32_t mark:1; /**< Set if the flow is marked. */
231         uint32_t drop:1; /**< Drop queue. */
232         struct rte_flow_action_rss rss_conf; /**< RSS configuration */
233         uint16_t (*queues)[]; /**< Queues indexes to use. */
234         uint8_t rss_key[40]; /**< copy of the RSS key. */
235         uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
236         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
237         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
238         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
239         /**< Flow with Rx queue. */
240 };
241
242 /** Static initializer for items. */
243 #define ITEMS(...) \
244         (const enum rte_flow_item_type []){ \
245                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
246         }
247
248 #define IS_TUNNEL(type) ( \
249         (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
250         (type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
251         (type) == RTE_FLOW_ITEM_TYPE_GRE)
252
253 const uint32_t flow_ptype[] = {
254         [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
255         [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
256         [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
257 };
258
259 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
260
261 const uint32_t ptype_ext[] = {
262         [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
263                                               RTE_PTYPE_L4_UDP,
264         [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)] = RTE_PTYPE_TUNNEL_VXLAN_GPE |
265                                                   RTE_PTYPE_L4_UDP,
266         [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
267 };
268
269 /** Structure to generate a simple graph of layers supported by the NIC. */
270 struct mlx5_flow_items {
271         /** List of possible actions for these items. */
272         const enum rte_flow_action_type *const actions;
273         /** Bit-masks corresponding to the possibilities for the item. */
274         const void *mask;
275         /**
276          * Default bit-masks to use when item->mask is not provided. When
277          * \default_mask is also NULL, the full supported bit-mask (\mask) is
278          * used instead.
279          */
280         const void *default_mask;
281         /** Bit-masks size in bytes. */
282         const unsigned int mask_sz;
283         /**
284          * Conversion function from rte_flow to NIC specific flow.
285          *
286          * @param item
287          *   rte_flow item to convert.
288          * @param default_mask
289          *   Default bit-masks to use when item->mask is not provided.
290          * @param data
291          *   Internal structure to store the conversion.
292          *
293          * @return
294          *   0 on success, a negative errno value otherwise and rte_errno is
295          *   set.
296          */
297         int (*convert)(const struct rte_flow_item *item,
298                        const void *default_mask,
299                        struct mlx5_flow_data *data);
300         /** Size in bytes of the destination structure. */
301         const unsigned int dst_sz;
302         /** List of possible following items.  */
303         const enum rte_flow_item_type *const items;
304 };
305
306 /** Valid action for this PMD. */
307 static const enum rte_flow_action_type valid_actions[] = {
308         RTE_FLOW_ACTION_TYPE_DROP,
309         RTE_FLOW_ACTION_TYPE_QUEUE,
310         RTE_FLOW_ACTION_TYPE_MARK,
311         RTE_FLOW_ACTION_TYPE_FLAG,
312 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
313         RTE_FLOW_ACTION_TYPE_COUNT,
314 #endif
315         RTE_FLOW_ACTION_TYPE_END,
316 };
317
318 /** Graph of supported items and associated actions. */
319 static const struct mlx5_flow_items mlx5_flow_items[] = {
320         [RTE_FLOW_ITEM_TYPE_END] = {
321                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
322                                RTE_FLOW_ITEM_TYPE_VXLAN,
323                                RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
324                                RTE_FLOW_ITEM_TYPE_GRE),
325         },
326         [RTE_FLOW_ITEM_TYPE_ETH] = {
327                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
328                                RTE_FLOW_ITEM_TYPE_IPV4,
329                                RTE_FLOW_ITEM_TYPE_IPV6),
330                 .actions = valid_actions,
331                 .mask = &(const struct rte_flow_item_eth){
332                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
333                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
334                         .type = -1,
335                 },
336                 .default_mask = &rte_flow_item_eth_mask,
337                 .mask_sz = sizeof(struct rte_flow_item_eth),
338                 .convert = mlx5_flow_create_eth,
339                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
340         },
341         [RTE_FLOW_ITEM_TYPE_VLAN] = {
342                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
343                                RTE_FLOW_ITEM_TYPE_IPV6),
344                 .actions = valid_actions,
345                 .mask = &(const struct rte_flow_item_vlan){
346                         .tci = -1,
347                         .inner_type = -1,
348                 },
349                 .default_mask = &rte_flow_item_vlan_mask,
350                 .mask_sz = sizeof(struct rte_flow_item_vlan),
351                 .convert = mlx5_flow_create_vlan,
352                 .dst_sz = 0,
353         },
354         [RTE_FLOW_ITEM_TYPE_IPV4] = {
355                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
356                                RTE_FLOW_ITEM_TYPE_TCP,
357                                RTE_FLOW_ITEM_TYPE_GRE),
358                 .actions = valid_actions,
359                 .mask = &(const struct rte_flow_item_ipv4){
360                         .hdr = {
361                                 .src_addr = -1,
362                                 .dst_addr = -1,
363                                 .type_of_service = -1,
364                                 .next_proto_id = -1,
365                         },
366                 },
367                 .default_mask = &rte_flow_item_ipv4_mask,
368                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
369                 .convert = mlx5_flow_create_ipv4,
370                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
371         },
372         [RTE_FLOW_ITEM_TYPE_IPV6] = {
373                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
374                                RTE_FLOW_ITEM_TYPE_TCP,
375                                RTE_FLOW_ITEM_TYPE_GRE),
376                 .actions = valid_actions,
377                 .mask = &(const struct rte_flow_item_ipv6){
378                         .hdr = {
379                                 .src_addr = {
380                                         0xff, 0xff, 0xff, 0xff,
381                                         0xff, 0xff, 0xff, 0xff,
382                                         0xff, 0xff, 0xff, 0xff,
383                                         0xff, 0xff, 0xff, 0xff,
384                                 },
385                                 .dst_addr = {
386                                         0xff, 0xff, 0xff, 0xff,
387                                         0xff, 0xff, 0xff, 0xff,
388                                         0xff, 0xff, 0xff, 0xff,
389                                         0xff, 0xff, 0xff, 0xff,
390                                 },
391                                 .vtc_flow = -1,
392                                 .proto = -1,
393                                 .hop_limits = -1,
394                         },
395                 },
396                 .default_mask = &rte_flow_item_ipv6_mask,
397                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
398                 .convert = mlx5_flow_create_ipv6,
399                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
400         },
401         [RTE_FLOW_ITEM_TYPE_UDP] = {
402                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
403                                RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
404                 .actions = valid_actions,
405                 .mask = &(const struct rte_flow_item_udp){
406                         .hdr = {
407                                 .src_port = -1,
408                                 .dst_port = -1,
409                         },
410                 },
411                 .default_mask = &rte_flow_item_udp_mask,
412                 .mask_sz = sizeof(struct rte_flow_item_udp),
413                 .convert = mlx5_flow_create_udp,
414                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
415         },
416         [RTE_FLOW_ITEM_TYPE_TCP] = {
417                 .actions = valid_actions,
418                 .mask = &(const struct rte_flow_item_tcp){
419                         .hdr = {
420                                 .src_port = -1,
421                                 .dst_port = -1,
422                         },
423                 },
424                 .default_mask = &rte_flow_item_tcp_mask,
425                 .mask_sz = sizeof(struct rte_flow_item_tcp),
426                 .convert = mlx5_flow_create_tcp,
427                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
428         },
429         [RTE_FLOW_ITEM_TYPE_GRE] = {
430                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
431                                RTE_FLOW_ITEM_TYPE_IPV4,
432                                RTE_FLOW_ITEM_TYPE_IPV6),
433                 .actions = valid_actions,
434                 .mask = &(const struct rte_flow_item_gre){
435                         .protocol = -1,
436                 },
437                 .default_mask = &rte_flow_item_gre_mask,
438                 .mask_sz = sizeof(struct rte_flow_item_gre),
439                 .convert = mlx5_flow_create_gre,
440                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
441         },
442         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
443                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
444                                RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
445                                RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
446                 .actions = valid_actions,
447                 .mask = &(const struct rte_flow_item_vxlan){
448                         .vni = "\xff\xff\xff",
449                 },
450                 .default_mask = &rte_flow_item_vxlan_mask,
451                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
452                 .convert = mlx5_flow_create_vxlan,
453                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
454         },
455         [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = {
456                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
457                                RTE_FLOW_ITEM_TYPE_IPV4,
458                                RTE_FLOW_ITEM_TYPE_IPV6),
459                 .actions = valid_actions,
460                 .mask = &(const struct rte_flow_item_vxlan_gpe){
461                         .vni = "\xff\xff\xff",
462                 },
463                 .default_mask = &rte_flow_item_vxlan_gpe_mask,
464                 .mask_sz = sizeof(struct rte_flow_item_vxlan_gpe),
465                 .convert = mlx5_flow_create_vxlan_gpe,
466                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
467         },
468 };
469
470 /** Structure to pass to the conversion function. */
471 struct mlx5_flow_parse {
472         uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
473         uint32_t create:1;
474         /**< Whether resources should remain after a validate. */
475         uint32_t drop:1; /**< Target is a drop queue. */
476         uint32_t mark:1; /**< Mark is present in the flow. */
477         uint32_t count:1; /**< Count is present in the flow. */
478         uint32_t mark_id; /**< Mark identifier. */
479         struct rte_flow_action_rss rss_conf; /**< RSS configuration */
480         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
481         uint8_t rss_key[40]; /**< copy of the RSS key. */
482         enum hash_rxq_type layer; /**< Last pattern layer detected. */
483         enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
484         uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
485         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
486         struct {
487                 struct ibv_flow_attr *ibv_attr;
488                 /**< Pointer to Verbs attributes. */
489                 unsigned int offset;
490                 /**< Current position or total size of the attribute. */
491                 uint64_t hash_fields; /**< Verbs hash fields. */
492         } queue[RTE_DIM(hash_rxq_init)];
493 };
494
495 static const struct rte_flow_ops mlx5_flow_ops = {
496         .validate = mlx5_flow_validate,
497         .create = mlx5_flow_create,
498         .destroy = mlx5_flow_destroy,
499         .flush = mlx5_flow_flush,
500 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
501         .query = mlx5_flow_query,
502 #else
503         .query = NULL,
504 #endif
505         .isolate = mlx5_flow_isolate,
506 };
507
508 /* Convert FDIR request to Generic flow. */
509 struct mlx5_fdir {
510         struct rte_flow_attr attr;
511         struct rte_flow_action actions[2];
512         struct rte_flow_item items[4];
513         struct rte_flow_item_eth l2;
514         struct rte_flow_item_eth l2_mask;
515         union {
516                 struct rte_flow_item_ipv4 ipv4;
517                 struct rte_flow_item_ipv6 ipv6;
518         } l3;
519         union {
520                 struct rte_flow_item_ipv4 ipv4;
521                 struct rte_flow_item_ipv6 ipv6;
522         } l3_mask;
523         union {
524                 struct rte_flow_item_udp udp;
525                 struct rte_flow_item_tcp tcp;
526         } l4;
527         union {
528                 struct rte_flow_item_udp udp;
529                 struct rte_flow_item_tcp tcp;
530         } l4_mask;
531         struct rte_flow_action_queue queue;
532 };
533
534 /* Verbs specification header. */
535 struct ibv_spec_header {
536         enum ibv_flow_spec_type type;
537         uint16_t size;
538 };
539
540 /**
541  * Check support for a given item.
542  *
543  * @param item[in]
544  *   Item specification.
545  * @param mask[in]
546  *   Bit-masks covering supported fields to compare with spec, last and mask in
547  *   \item.
548  * @param size
549  *   Bit-Mask size in bytes.
550  *
551  * @return
552  *   0 on success, a negative errno value otherwise and rte_errno is set.
553  */
554 static int
555 mlx5_flow_item_validate(const struct rte_flow_item *item,
556                         const uint8_t *mask, unsigned int size)
557 {
558         if (!item->spec && (item->mask || item->last)) {
559                 rte_errno = EINVAL;
560                 return -rte_errno;
561         }
562         if (item->spec && !item->mask) {
563                 unsigned int i;
564                 const uint8_t *spec = item->spec;
565
566                 for (i = 0; i < size; ++i)
567                         if ((spec[i] | mask[i]) != mask[i]) {
568                                 rte_errno = EINVAL;
569                                 return -rte_errno;
570                         }
571         }
572         if (item->last && !item->mask) {
573                 unsigned int i;
574                 const uint8_t *spec = item->last;
575
576                 for (i = 0; i < size; ++i)
577                         if ((spec[i] | mask[i]) != mask[i]) {
578                                 rte_errno = EINVAL;
579                                 return -rte_errno;
580                         }
581         }
582         if (item->mask) {
583                 unsigned int i;
584                 const uint8_t *spec = item->spec;
585
586                 for (i = 0; i < size; ++i)
587                         if ((spec[i] | mask[i]) != mask[i]) {
588                                 rte_errno = EINVAL;
589                                 return -rte_errno;
590                         }
591         }
592         if (item->spec && item->last) {
593                 uint8_t spec[size];
594                 uint8_t last[size];
595                 const uint8_t *apply = mask;
596                 unsigned int i;
597                 int ret;
598
599                 if (item->mask)
600                         apply = item->mask;
601                 for (i = 0; i < size; ++i) {
602                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
603                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
604                 }
605                 ret = memcmp(spec, last, size);
606                 if (ret != 0) {
607                         rte_errno = EINVAL;
608                         return -rte_errno;
609                 }
610         }
611         return 0;
612 }
613
614 /**
615  * Extract attribute to the parser.
616  *
617  * @param[in] attr
618  *   Flow rule attributes.
619  * @param[out] error
620  *   Perform verbose error reporting if not NULL.
621  *
622  * @return
623  *   0 on success, a negative errno value otherwise and rte_errno is set.
624  */
625 static int
626 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
627                              struct rte_flow_error *error)
628 {
629         if (attr->group) {
630                 rte_flow_error_set(error, ENOTSUP,
631                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
632                                    NULL,
633                                    "groups are not supported");
634                 return -rte_errno;
635         }
636         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
637                 rte_flow_error_set(error, ENOTSUP,
638                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
639                                    NULL,
640                                    "priorities are not supported");
641                 return -rte_errno;
642         }
643         if (attr->egress) {
644                 rte_flow_error_set(error, ENOTSUP,
645                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
646                                    NULL,
647                                    "egress is not supported");
648                 return -rte_errno;
649         }
650         if (attr->transfer) {
651                 rte_flow_error_set(error, ENOTSUP,
652                                    RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
653                                    NULL,
654                                    "transfer is not supported");
655                 return -rte_errno;
656         }
657         if (!attr->ingress) {
658                 rte_flow_error_set(error, ENOTSUP,
659                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
660                                    NULL,
661                                    "only ingress is supported");
662                 return -rte_errno;
663         }
664         return 0;
665 }
666
667 /**
668  * Extract actions request to the parser.
669  *
670  * @param dev
671  *   Pointer to Ethernet device.
672  * @param[in] actions
673  *   Associated actions (list terminated by the END action).
674  * @param[out] error
675  *   Perform verbose error reporting if not NULL.
676  * @param[in, out] parser
677  *   Internal parser structure.
678  *
679  * @return
680  *   0 on success, a negative errno value otherwise and rte_errno is set.
681  */
682 static int
683 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
684                           const struct rte_flow_action actions[],
685                           struct rte_flow_error *error,
686                           struct mlx5_flow_parse *parser)
687 {
688         enum { FATE = 1, MARK = 2, COUNT = 4, };
689         uint32_t overlap = 0;
690         struct priv *priv = dev->data->dev_private;
691
692         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
693                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
694                         continue;
695                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
696                         if (overlap & FATE)
697                                 goto exit_action_overlap;
698                         overlap |= FATE;
699                         parser->drop = 1;
700                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
701                         const struct rte_flow_action_queue *queue =
702                                 (const struct rte_flow_action_queue *)
703                                 actions->conf;
704
705                         if (overlap & FATE)
706                                 goto exit_action_overlap;
707                         overlap |= FATE;
708                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
709                                 goto exit_action_not_supported;
710                         parser->queues[0] = queue->index;
711                         parser->rss_conf = (struct rte_flow_action_rss){
712                                 .queue_num = 1,
713                                 .queue = parser->queues,
714                         };
715                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
716                         const struct rte_flow_action_rss *rss =
717                                 (const struct rte_flow_action_rss *)
718                                 actions->conf;
719                         const uint8_t *rss_key;
720                         uint32_t rss_key_len;
721                         uint16_t n;
722
723                         if (overlap & FATE)
724                                 goto exit_action_overlap;
725                         overlap |= FATE;
726                         if (rss->func &&
727                             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
728                                 rte_flow_error_set(error, EINVAL,
729                                                    RTE_FLOW_ERROR_TYPE_ACTION,
730                                                    actions,
731                                                    "the only supported RSS hash"
732                                                    " function is Toeplitz");
733                                 return -rte_errno;
734                         }
735 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
736                         if (parser->rss_conf.level > 1) {
737                                 rte_flow_error_set(error, EINVAL,
738                                                    RTE_FLOW_ERROR_TYPE_ACTION,
739                                                    actions,
740                                                    "a nonzero RSS encapsulation"
741                                                    " level is not supported");
742                                 return -rte_errno;
743                         }
744 #endif
745                         if (parser->rss_conf.level > 2) {
746                                 rte_flow_error_set(error, EINVAL,
747                                                    RTE_FLOW_ERROR_TYPE_ACTION,
748                                                    actions,
749                                                    "RSS encapsulation level"
750                                                    " > 1 is not supported");
751                                 return -rte_errno;
752                         }
753                         if (rss->types & MLX5_RSS_HF_MASK) {
754                                 rte_flow_error_set(error, EINVAL,
755                                                    RTE_FLOW_ERROR_TYPE_ACTION,
756                                                    actions,
757                                                    "unsupported RSS type"
758                                                    " requested");
759                                 return -rte_errno;
760                         }
761                         if (rss->key_len) {
762                                 rss_key_len = rss->key_len;
763                                 rss_key = rss->key;
764                         } else {
765                                 rss_key_len = rss_hash_default_key_len;
766                                 rss_key = rss_hash_default_key;
767                         }
768                         if (rss_key_len != RTE_DIM(parser->rss_key)) {
769                                 rte_flow_error_set(error, EINVAL,
770                                                    RTE_FLOW_ERROR_TYPE_ACTION,
771                                                    actions,
772                                                    "RSS hash key must be"
773                                                    " exactly 40 bytes long");
774                                 return -rte_errno;
775                         }
776                         if (!rss->queue_num) {
777                                 rte_flow_error_set(error, EINVAL,
778                                                    RTE_FLOW_ERROR_TYPE_ACTION,
779                                                    actions,
780                                                    "no valid queues");
781                                 return -rte_errno;
782                         }
783                         if (rss->queue_num > RTE_DIM(parser->queues)) {
784                                 rte_flow_error_set(error, EINVAL,
785                                                    RTE_FLOW_ERROR_TYPE_ACTION,
786                                                    actions,
787                                                    "too many queues for RSS"
788                                                    " context");
789                                 return -rte_errno;
790                         }
791                         for (n = 0; n < rss->queue_num; ++n) {
792                                 if (rss->queue[n] >= priv->rxqs_n) {
793                                         rte_flow_error_set(error, EINVAL,
794                                                    RTE_FLOW_ERROR_TYPE_ACTION,
795                                                    actions,
796                                                    "queue id > number of"
797                                                    " queues");
798                                         return -rte_errno;
799                                 }
800                         }
801                         parser->rss_conf = (struct rte_flow_action_rss){
802                                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
803                                 .level = rss->level,
804                                 .types = rss->types,
805                                 .key_len = rss_key_len,
806                                 .queue_num = rss->queue_num,
807                                 .key = memcpy(parser->rss_key, rss_key,
808                                               sizeof(*rss_key) * rss_key_len),
809                                 .queue = memcpy(parser->queues, rss->queue,
810                                                 sizeof(*rss->queue) *
811                                                 rss->queue_num),
812                         };
813                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
814                         const struct rte_flow_action_mark *mark =
815                                 (const struct rte_flow_action_mark *)
816                                 actions->conf;
817
818                         if (overlap & MARK)
819                                 goto exit_action_overlap;
820                         overlap |= MARK;
821                         if (!mark) {
822                                 rte_flow_error_set(error, EINVAL,
823                                                    RTE_FLOW_ERROR_TYPE_ACTION,
824                                                    actions,
825                                                    "mark must be defined");
826                                 return -rte_errno;
827                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
828                                 rte_flow_error_set(error, ENOTSUP,
829                                                    RTE_FLOW_ERROR_TYPE_ACTION,
830                                                    actions,
831                                                    "mark must be between 0"
832                                                    " and 16777199");
833                                 return -rte_errno;
834                         }
835                         parser->mark = 1;
836                         parser->mark_id = mark->id;
837                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
838                         if (overlap & MARK)
839                                 goto exit_action_overlap;
840                         overlap |= MARK;
841                         parser->mark = 1;
842                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
843                            priv->config.flow_counter_en) {
844                         if (overlap & COUNT)
845                                 goto exit_action_overlap;
846                         overlap |= COUNT;
847                         parser->count = 1;
848                 } else {
849                         goto exit_action_not_supported;
850                 }
851         }
852         /* When fate is unknown, drop traffic. */
853         if (!(overlap & FATE))
854                 parser->drop = 1;
855         if (parser->drop && parser->mark)
856                 parser->mark = 0;
857         if (!parser->rss_conf.queue_num && !parser->drop) {
858                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
859                                    NULL, "no valid action");
860                 return -rte_errno;
861         }
862         return 0;
863 exit_action_not_supported:
864         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
865                            actions, "action not supported");
866         return -rte_errno;
867 exit_action_overlap:
868         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
869                            actions, "overlapping actions are not supported");
870         return -rte_errno;
871 }
872
873 /**
874  * Validate items.
875  *
876  * @param[in] items
877  *   Pattern specification (list terminated by the END pattern item).
878  * @param[out] error
879  *   Perform verbose error reporting if not NULL.
880  * @param[in, out] parser
881  *   Internal parser structure.
882  *
883  * @return
884  *   0 on success, a negative errno value otherwise and rte_errno is set.
885  */
886 static int
887 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
888                                  const struct rte_flow_item items[],
889                                  struct rte_flow_error *error,
890                                  struct mlx5_flow_parse *parser)
891 {
892         struct priv *priv = dev->data->dev_private;
893         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
894         unsigned int i;
895         int ret = 0;
896
897         /* Initialise the offsets to start after verbs attribute. */
898         for (i = 0; i != hash_rxq_init_n; ++i)
899                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
900         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
901                 const struct mlx5_flow_items *token = NULL;
902                 unsigned int n;
903
904                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
905                         continue;
906                 for (i = 0;
907                      cur_item->items &&
908                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
909                      ++i) {
910                         if (cur_item->items[i] == items->type) {
911                                 token = &mlx5_flow_items[items->type];
912                                 break;
913                         }
914                 }
915                 if (!token) {
916                         ret = -ENOTSUP;
917                         goto exit_item_not_supported;
918                 }
919                 cur_item = token;
920                 ret = mlx5_flow_item_validate(items,
921                                               (const uint8_t *)cur_item->mask,
922                                               cur_item->mask_sz);
923                 if (ret)
924                         goto exit_item_not_supported;
925                 if (IS_TUNNEL(items->type)) {
926                         if (parser->tunnel) {
927                                 rte_flow_error_set(error, ENOTSUP,
928                                                    RTE_FLOW_ERROR_TYPE_ITEM,
929                                                    items,
930                                                    "Cannot recognize multiple"
931                                                    " tunnel encapsulations.");
932                                 return -rte_errno;
933                         }
934                         if (!priv->config.tunnel_en &&
935                             parser->rss_conf.level > 1) {
936                                 rte_flow_error_set(error, ENOTSUP,
937                                         RTE_FLOW_ERROR_TYPE_ITEM,
938                                         items,
939                                         "RSS on tunnel is not supported");
940                                 return -rte_errno;
941                         }
942                         parser->inner = IBV_FLOW_SPEC_INNER;
943                         parser->tunnel = flow_ptype[items->type];
944                 }
945                 if (parser->drop) {
946                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
947                 } else {
948                         for (n = 0; n != hash_rxq_init_n; ++n)
949                                 parser->queue[n].offset += cur_item->dst_sz;
950                 }
951         }
952         if (parser->drop) {
953                 parser->queue[HASH_RXQ_ETH].offset +=
954                         sizeof(struct ibv_flow_spec_action_drop);
955         }
956         if (parser->mark) {
957                 for (i = 0; i != hash_rxq_init_n; ++i)
958                         parser->queue[i].offset +=
959                                 sizeof(struct ibv_flow_spec_action_tag);
960         }
961         if (parser->count) {
962                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
963
964                 for (i = 0; i != hash_rxq_init_n; ++i)
965                         parser->queue[i].offset += size;
966         }
967         return 0;
968 exit_item_not_supported:
969         return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
970                                   items, "item not supported");
971 }
972
973 /**
974  * Allocate memory space to store verbs flow attributes.
975  *
976  * @param[in] size
977  *   Amount of byte to allocate.
978  * @param[out] error
979  *   Perform verbose error reporting if not NULL.
980  *
981  * @return
982  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
983  */
984 static struct ibv_flow_attr *
985 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
986 {
987         struct ibv_flow_attr *ibv_attr;
988
989         ibv_attr = rte_calloc(__func__, 1, size, 0);
990         if (!ibv_attr) {
991                 rte_flow_error_set(error, ENOMEM,
992                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
993                                    NULL,
994                                    "cannot allocate verbs spec attributes");
995                 return NULL;
996         }
997         return ibv_attr;
998 }
999
1000 /**
1001  * Make inner packet matching with an higher priority from the non Inner
1002  * matching.
1003  *
1004  * @param dev
1005  *   Pointer to Ethernet device.
1006  * @param[in, out] parser
1007  *   Internal parser structure.
1008  * @param attr
1009  *   User flow attribute.
1010  */
1011 static void
1012 mlx5_flow_update_priority(struct rte_eth_dev *dev,
1013                           struct mlx5_flow_parse *parser,
1014                           const struct rte_flow_attr *attr)
1015 {
1016         struct priv *priv = dev->data->dev_private;
1017         unsigned int i;
1018         uint16_t priority;
1019
1020         /*                      8 priorities    >= 16 priorities
1021          * Control flow:        4-7             8-15
1022          * User normal flow:    1-3             4-7
1023          * User tunnel flow:    0-2             0-3
1024          */
1025         priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
1026         if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1027                 priority /= 2;
1028         /*
1029          * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1030          * priorities, lower 4 otherwise.
1031          */
1032         if (!parser->inner) {
1033                 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1034                         priority += 1;
1035                 else
1036                         priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1037         }
1038         if (parser->drop) {
1039                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1040                                 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1041                 return;
1042         }
1043         for (i = 0; i != hash_rxq_init_n; ++i) {
1044                 if (!parser->queue[i].ibv_attr)
1045                         continue;
1046                 parser->queue[i].ibv_attr->priority = priority +
1047                                 hash_rxq_init[i].flow_priority;
1048         }
1049 }
1050
1051 /**
1052  * Finalise verbs flow attributes.
1053  *
1054  * @param[in, out] parser
1055  *   Internal parser structure.
1056  */
1057 static void
1058 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1059 {
1060         unsigned int i;
1061         uint32_t inner = parser->inner;
1062
1063         /* Don't create extra flows for outer RSS. */
1064         if (parser->tunnel && parser->rss_conf.level < 2)
1065                 return;
1066         /*
1067          * Fill missing layers in verbs specifications, or compute the correct
1068          * offset to allocate the memory space for the attributes and
1069          * specifications.
1070          */
1071         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1072                 union {
1073                         struct ibv_flow_spec_ipv4_ext ipv4;
1074                         struct ibv_flow_spec_ipv6 ipv6;
1075                         struct ibv_flow_spec_tcp_udp udp_tcp;
1076                         struct ibv_flow_spec_eth eth;
1077                 } specs;
1078                 void *dst;
1079                 uint16_t size;
1080
1081                 if (i == parser->layer)
1082                         continue;
1083                 if (parser->layer == HASH_RXQ_ETH ||
1084                     parser->layer == HASH_RXQ_TUNNEL) {
1085                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1086                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1087                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1088                                         .type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1089                                         .size = size,
1090                                 };
1091                         } else {
1092                                 size = sizeof(struct ibv_flow_spec_ipv6);
1093                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1094                                         .type = inner | IBV_FLOW_SPEC_IPV6,
1095                                         .size = size,
1096                                 };
1097                         }
1098                         if (parser->queue[i].ibv_attr) {
1099                                 dst = (void *)((uintptr_t)
1100                                                parser->queue[i].ibv_attr +
1101                                                parser->queue[i].offset);
1102                                 memcpy(dst, &specs, size);
1103                                 ++parser->queue[i].ibv_attr->num_of_specs;
1104                         }
1105                         parser->queue[i].offset += size;
1106                 }
1107                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1108                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1109                         size = sizeof(struct ibv_flow_spec_tcp_udp);
1110                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1111                                 .type = inner | ((i == HASH_RXQ_UDPV4 ||
1112                                           i == HASH_RXQ_UDPV6) ?
1113                                          IBV_FLOW_SPEC_UDP :
1114                                          IBV_FLOW_SPEC_TCP),
1115                                 .size = size,
1116                         };
1117                         if (parser->queue[i].ibv_attr) {
1118                                 dst = (void *)((uintptr_t)
1119                                                parser->queue[i].ibv_attr +
1120                                                parser->queue[i].offset);
1121                                 memcpy(dst, &specs, size);
1122                                 ++parser->queue[i].ibv_attr->num_of_specs;
1123                         }
1124                         parser->queue[i].offset += size;
1125                 }
1126         }
1127 }
1128
1129 /**
1130  * Update flows according to pattern and RSS hash fields.
1131  *
1132  * @param[in, out] parser
1133  *   Internal parser structure.
1134  *
1135  * @return
1136  *   0 on success, a negative errno value otherwise and rte_errno is set.
1137  */
1138 static int
1139 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1140 {
1141         unsigned int i;
1142         enum hash_rxq_type start;
1143         enum hash_rxq_type layer;
1144         int outer = parser->tunnel && parser->rss_conf.level < 2;
1145         uint64_t rss = parser->rss_conf.types;
1146
1147         /* Default to outer RSS. */
1148         if (!parser->rss_conf.level)
1149                 parser->rss_conf.level = 1;
1150         layer = outer ? parser->out_layer : parser->layer;
1151         if (layer == HASH_RXQ_TUNNEL)
1152                 layer = HASH_RXQ_ETH;
1153         if (outer) {
1154                 /* Only one hash type for outer RSS. */
1155                 if (rss && layer == HASH_RXQ_ETH) {
1156                         start = HASH_RXQ_TCPV4;
1157                 } else if (rss && layer != HASH_RXQ_ETH &&
1158                            !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1159                         /* If RSS not match L4 pattern, try L3 RSS. */
1160                         if (layer < HASH_RXQ_IPV4)
1161                                 layer = HASH_RXQ_IPV4;
1162                         else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1163                                 layer = HASH_RXQ_IPV6;
1164                         start = layer;
1165                 } else {
1166                         start = layer;
1167                 }
1168                 /* Scan first valid hash type. */
1169                 for (i = start; rss && i <= layer; ++i) {
1170                         if (!parser->queue[i].ibv_attr)
1171                                 continue;
1172                         if (hash_rxq_init[i].dpdk_rss_hf & rss)
1173                                 break;
1174                 }
1175                 if (rss && i <= layer)
1176                         parser->queue[layer].hash_fields =
1177                                         hash_rxq_init[i].hash_fields;
1178                 /* Trim unused hash types. */
1179                 for (i = 0; i != hash_rxq_init_n; ++i) {
1180                         if (parser->queue[i].ibv_attr && i != layer) {
1181                                 rte_free(parser->queue[i].ibv_attr);
1182                                 parser->queue[i].ibv_attr = NULL;
1183                         }
1184                 }
1185         } else {
1186                 /* Expand for inner or normal RSS. */
1187                 if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1188                         start = HASH_RXQ_TCPV4;
1189                 else if (rss && layer == HASH_RXQ_IPV6)
1190                         start = HASH_RXQ_TCPV6;
1191                 else
1192                         start = layer;
1193                 /* For L4 pattern, try L3 RSS if no L4 RSS. */
1194                 /* Trim unused hash types. */
1195                 for (i = 0; i != hash_rxq_init_n; ++i) {
1196                         if (!parser->queue[i].ibv_attr)
1197                                 continue;
1198                         if (i < start || i > layer) {
1199                                 rte_free(parser->queue[i].ibv_attr);
1200                                 parser->queue[i].ibv_attr = NULL;
1201                                 continue;
1202                         }
1203                         if (!rss)
1204                                 continue;
1205                         if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1206                                 parser->queue[i].hash_fields =
1207                                                 hash_rxq_init[i].hash_fields;
1208                         } else if (i != layer) {
1209                                 /* Remove unused RSS expansion. */
1210                                 rte_free(parser->queue[i].ibv_attr);
1211                                 parser->queue[i].ibv_attr = NULL;
1212                         } else if (layer < HASH_RXQ_IPV4 &&
1213                                    (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1214                                     rss)) {
1215                                 /* Allow IPv4 RSS on L4 pattern. */
1216                                 parser->queue[i].hash_fields =
1217                                         hash_rxq_init[HASH_RXQ_IPV4]
1218                                                 .hash_fields;
1219                         } else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1220                                    (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1221                                     rss)) {
1222                                 /* Allow IPv4 RSS on L4 pattern. */
1223                                 parser->queue[i].hash_fields =
1224                                         hash_rxq_init[HASH_RXQ_IPV6]
1225                                                 .hash_fields;
1226                         }
1227                 }
1228         }
1229         return 0;
1230 }
1231
1232 /**
1233  * Validate and convert a flow supported by the NIC.
1234  *
1235  * @param dev
1236  *   Pointer to Ethernet device.
1237  * @param[in] attr
1238  *   Flow rule attributes.
1239  * @param[in] pattern
1240  *   Pattern specification (list terminated by the END pattern item).
1241  * @param[in] actions
1242  *   Associated actions (list terminated by the END action).
1243  * @param[out] error
1244  *   Perform verbose error reporting if not NULL.
1245  * @param[in, out] parser
1246  *   Internal parser structure.
1247  *
1248  * @return
1249  *   0 on success, a negative errno value otherwise and rte_errno is set.
1250  */
1251 static int
1252 mlx5_flow_convert(struct rte_eth_dev *dev,
1253                   const struct rte_flow_attr *attr,
1254                   const struct rte_flow_item items[],
1255                   const struct rte_flow_action actions[],
1256                   struct rte_flow_error *error,
1257                   struct mlx5_flow_parse *parser)
1258 {
1259         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1260         unsigned int i;
1261         int ret;
1262
1263         /* First step. Validate the attributes, items and actions. */
1264         *parser = (struct mlx5_flow_parse){
1265                 .create = parser->create,
1266                 .layer = HASH_RXQ_ETH,
1267                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1268         };
1269         ret = mlx5_flow_convert_attributes(attr, error);
1270         if (ret)
1271                 return ret;
1272         ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1273         if (ret)
1274                 return ret;
1275         ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1276         if (ret)
1277                 return ret;
1278         mlx5_flow_convert_finalise(parser);
1279         /*
1280          * Second step.
1281          * Allocate the memory space to store verbs specifications.
1282          */
1283         if (parser->drop) {
1284                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1285
1286                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1287                         mlx5_flow_convert_allocate(offset, error);
1288                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1289                         goto exit_enomem;
1290                 parser->queue[HASH_RXQ_ETH].offset =
1291                         sizeof(struct ibv_flow_attr);
1292         } else {
1293                 for (i = 0; i != hash_rxq_init_n; ++i) {
1294                         unsigned int offset;
1295
1296                         offset = parser->queue[i].offset;
1297                         parser->queue[i].ibv_attr =
1298                                 mlx5_flow_convert_allocate(offset, error);
1299                         if (!parser->queue[i].ibv_attr)
1300                                 goto exit_enomem;
1301                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1302                 }
1303         }
1304         /* Third step. Conversion parse, fill the specifications. */
1305         parser->inner = 0;
1306         parser->tunnel = 0;
1307         parser->layer = HASH_RXQ_ETH;
1308         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1309                 struct mlx5_flow_data data = {
1310                         .dev = dev,
1311                         .parser = parser,
1312                         .error = error,
1313                 };
1314
1315                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1316                         continue;
1317                 cur_item = &mlx5_flow_items[items->type];
1318                 ret = cur_item->convert(items,
1319                                         (cur_item->default_mask ?
1320                                          cur_item->default_mask :
1321                                          cur_item->mask),
1322                                          &data);
1323                 if (ret)
1324                         goto exit_free;
1325         }
1326         if (!parser->drop) {
1327                 /* RSS check, remove unused hash types. */
1328                 ret = mlx5_flow_convert_rss(parser);
1329                 if (ret)
1330                         goto exit_free;
1331                 /* Complete missing specification. */
1332                 mlx5_flow_convert_finalise(parser);
1333         }
1334         mlx5_flow_update_priority(dev, parser, attr);
1335         if (parser->mark)
1336                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1337         if (parser->count && parser->create) {
1338                 mlx5_flow_create_count(dev, parser);
1339                 if (!parser->cs)
1340                         goto exit_count_error;
1341         }
1342 exit_free:
1343         /* Only verification is expected, all resources should be released. */
1344         if (!parser->create) {
1345                 for (i = 0; i != hash_rxq_init_n; ++i) {
1346                         if (parser->queue[i].ibv_attr) {
1347                                 rte_free(parser->queue[i].ibv_attr);
1348                                 parser->queue[i].ibv_attr = NULL;
1349                         }
1350                 }
1351         }
1352         return ret;
1353 exit_enomem:
1354         for (i = 0; i != hash_rxq_init_n; ++i) {
1355                 if (parser->queue[i].ibv_attr) {
1356                         rte_free(parser->queue[i].ibv_attr);
1357                         parser->queue[i].ibv_attr = NULL;
1358                 }
1359         }
1360         rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1361                            NULL, "cannot allocate verbs spec attributes");
1362         return -rte_errno;
1363 exit_count_error:
1364         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1365                            NULL, "cannot create counter");
1366         return -rte_errno;
1367 }
1368
1369 /**
1370  * Copy the specification created into the flow.
1371  *
1372  * @param parser
1373  *   Internal parser structure.
1374  * @param src
1375  *   Create specification.
1376  * @param size
1377  *   Size in bytes of the specification to copy.
1378  */
1379 static void
1380 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1381                       unsigned int size)
1382 {
1383         unsigned int i;
1384         void *dst;
1385
1386         for (i = 0; i != hash_rxq_init_n; ++i) {
1387                 if (!parser->queue[i].ibv_attr)
1388                         continue;
1389                 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1390                                 parser->queue[i].offset);
1391                 memcpy(dst, src, size);
1392                 ++parser->queue[i].ibv_attr->num_of_specs;
1393                 parser->queue[i].offset += size;
1394         }
1395 }
1396
1397 /**
1398  * Convert Ethernet item to Verbs specification.
1399  *
1400  * @param item[in]
1401  *   Item specification.
1402  * @param default_mask[in]
1403  *   Default bit-masks to use when item->mask is not provided.
1404  * @param data[in, out]
1405  *   User structure.
1406  *
1407  * @return
1408  *   0 on success, a negative errno value otherwise and rte_errno is set.
1409  */
1410 static int
1411 mlx5_flow_create_eth(const struct rte_flow_item *item,
1412                      const void *default_mask,
1413                      struct mlx5_flow_data *data)
1414 {
1415         const struct rte_flow_item_eth *spec = item->spec;
1416         const struct rte_flow_item_eth *mask = item->mask;
1417         struct mlx5_flow_parse *parser = data->parser;
1418         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1419         struct ibv_flow_spec_eth eth = {
1420                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1421                 .size = eth_size,
1422         };
1423
1424         parser->layer = HASH_RXQ_ETH;
1425         if (spec) {
1426                 unsigned int i;
1427
1428                 if (!mask)
1429                         mask = default_mask;
1430                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1431                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1432                 eth.val.ether_type = spec->type;
1433                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1434                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1435                 eth.mask.ether_type = mask->type;
1436                 /* Remove unwanted bits from values. */
1437                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1438                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1439                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1440                 }
1441                 eth.val.ether_type &= eth.mask.ether_type;
1442         }
1443         mlx5_flow_create_copy(parser, &eth, eth_size);
1444         return 0;
1445 }
1446
1447 /**
1448  * Convert VLAN item to Verbs specification.
1449  *
1450  * @param item[in]
1451  *   Item specification.
1452  * @param default_mask[in]
1453  *   Default bit-masks to use when item->mask is not provided.
1454  * @param data[in, out]
1455  *   User structure.
1456  *
1457  * @return
1458  *   0 on success, a negative errno value otherwise and rte_errno is set.
1459  */
1460 static int
1461 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1462                       const void *default_mask,
1463                       struct mlx5_flow_data *data)
1464 {
1465         const struct rte_flow_item_vlan *spec = item->spec;
1466         const struct rte_flow_item_vlan *mask = item->mask;
1467         struct mlx5_flow_parse *parser = data->parser;
1468         struct ibv_flow_spec_eth *eth;
1469         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1470         const char *msg = "VLAN cannot be empty";
1471
1472         if (spec) {
1473                 unsigned int i;
1474                 if (!mask)
1475                         mask = default_mask;
1476
1477                 for (i = 0; i != hash_rxq_init_n; ++i) {
1478                         if (!parser->queue[i].ibv_attr)
1479                                 continue;
1480
1481                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1482                                        parser->queue[i].offset - eth_size);
1483                         eth->val.vlan_tag = spec->tci;
1484                         eth->mask.vlan_tag = mask->tci;
1485                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1486                         /*
1487                          * From verbs perspective an empty VLAN is equivalent
1488                          * to a packet without VLAN layer.
1489                          */
1490                         if (!eth->mask.vlan_tag)
1491                                 goto error;
1492                         /* Outer TPID cannot be matched. */
1493                         if (eth->mask.ether_type) {
1494                                 msg = "VLAN TPID matching is not supported";
1495                                 goto error;
1496                         }
1497                         eth->val.ether_type = spec->inner_type;
1498                         eth->mask.ether_type = mask->inner_type;
1499                         eth->val.ether_type &= eth->mask.ether_type;
1500                 }
1501                 return 0;
1502         }
1503 error:
1504         return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1505                                   item, msg);
1506 }
1507
1508 /**
1509  * Convert IPv4 item to Verbs specification.
1510  *
1511  * @param item[in]
1512  *   Item specification.
1513  * @param default_mask[in]
1514  *   Default bit-masks to use when item->mask is not provided.
1515  * @param data[in, out]
1516  *   User structure.
1517  *
1518  * @return
1519  *   0 on success, a negative errno value otherwise and rte_errno is set.
1520  */
1521 static int
1522 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1523                       const void *default_mask,
1524                       struct mlx5_flow_data *data)
1525 {
1526         struct priv *priv = data->dev->data->dev_private;
1527         const struct rte_flow_item_ipv4 *spec = item->spec;
1528         const struct rte_flow_item_ipv4 *mask = item->mask;
1529         struct mlx5_flow_parse *parser = data->parser;
1530         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1531         struct ibv_flow_spec_ipv4_ext ipv4 = {
1532                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1533                 .size = ipv4_size,
1534         };
1535
1536         if (parser->layer == HASH_RXQ_TUNNEL &&
1537             parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1538             !priv->config.l3_vxlan_en)
1539                 return rte_flow_error_set(data->error, EINVAL,
1540                                           RTE_FLOW_ERROR_TYPE_ITEM,
1541                                           item,
1542                                           "L3 VXLAN not enabled by device"
1543                                           " parameter and/or not configured"
1544                                           " in firmware");
1545         parser->layer = HASH_RXQ_IPV4;
1546         if (spec) {
1547                 if (!mask)
1548                         mask = default_mask;
1549                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1550                         .src_ip = spec->hdr.src_addr,
1551                         .dst_ip = spec->hdr.dst_addr,
1552                         .proto = spec->hdr.next_proto_id,
1553                         .tos = spec->hdr.type_of_service,
1554                 };
1555                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1556                         .src_ip = mask->hdr.src_addr,
1557                         .dst_ip = mask->hdr.dst_addr,
1558                         .proto = mask->hdr.next_proto_id,
1559                         .tos = mask->hdr.type_of_service,
1560                 };
1561                 /* Remove unwanted bits from values. */
1562                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1563                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1564                 ipv4.val.proto &= ipv4.mask.proto;
1565                 ipv4.val.tos &= ipv4.mask.tos;
1566         }
1567         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1568         return 0;
1569 }
1570
1571 /**
1572  * Convert IPv6 item to Verbs specification.
1573  *
1574  * @param item[in]
1575  *   Item specification.
1576  * @param default_mask[in]
1577  *   Default bit-masks to use when item->mask is not provided.
1578  * @param data[in, out]
1579  *   User structure.
1580  *
1581  * @return
1582  *   0 on success, a negative errno value otherwise and rte_errno is set.
1583  */
1584 static int
1585 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1586                       const void *default_mask,
1587                       struct mlx5_flow_data *data)
1588 {
1589         struct priv *priv = data->dev->data->dev_private;
1590         const struct rte_flow_item_ipv6 *spec = item->spec;
1591         const struct rte_flow_item_ipv6 *mask = item->mask;
1592         struct mlx5_flow_parse *parser = data->parser;
1593         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1594         struct ibv_flow_spec_ipv6 ipv6 = {
1595                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1596                 .size = ipv6_size,
1597         };
1598
1599         if (parser->layer == HASH_RXQ_TUNNEL &&
1600             parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1601             !priv->config.l3_vxlan_en)
1602                 return rte_flow_error_set(data->error, EINVAL,
1603                                           RTE_FLOW_ERROR_TYPE_ITEM,
1604                                           item,
1605                                           "L3 VXLAN not enabled by device"
1606                                           " parameter and/or not configured"
1607                                           " in firmware");
1608         parser->layer = HASH_RXQ_IPV6;
1609         if (spec) {
1610                 unsigned int i;
1611                 uint32_t vtc_flow_val;
1612                 uint32_t vtc_flow_mask;
1613
1614                 if (!mask)
1615                         mask = default_mask;
1616                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1617                        RTE_DIM(ipv6.val.src_ip));
1618                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1619                        RTE_DIM(ipv6.val.dst_ip));
1620                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1621                        RTE_DIM(ipv6.mask.src_ip));
1622                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1623                        RTE_DIM(ipv6.mask.dst_ip));
1624                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1625                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1626                 ipv6.val.flow_label =
1627                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1628                                          IPV6_HDR_FL_SHIFT);
1629                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1630                                          IPV6_HDR_TC_SHIFT;
1631                 ipv6.val.next_hdr = spec->hdr.proto;
1632                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1633                 ipv6.mask.flow_label =
1634                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1635                                          IPV6_HDR_FL_SHIFT);
1636                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1637                                           IPV6_HDR_TC_SHIFT;
1638                 ipv6.mask.next_hdr = mask->hdr.proto;
1639                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1640                 /* Remove unwanted bits from values. */
1641                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1642                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1643                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1644                 }
1645                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1646                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1647                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1648                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1649         }
1650         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1651         return 0;
1652 }
1653
1654 /**
1655  * Convert UDP item to Verbs specification.
1656  *
1657  * @param item[in]
1658  *   Item specification.
1659  * @param default_mask[in]
1660  *   Default bit-masks to use when item->mask is not provided.
1661  * @param data[in, out]
1662  *   User structure.
1663  *
1664  * @return
1665  *   0 on success, a negative errno value otherwise and rte_errno is set.
1666  */
1667 static int
1668 mlx5_flow_create_udp(const struct rte_flow_item *item,
1669                      const void *default_mask,
1670                      struct mlx5_flow_data *data)
1671 {
1672         const struct rte_flow_item_udp *spec = item->spec;
1673         const struct rte_flow_item_udp *mask = item->mask;
1674         struct mlx5_flow_parse *parser = data->parser;
1675         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1676         struct ibv_flow_spec_tcp_udp udp = {
1677                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1678                 .size = udp_size,
1679         };
1680
1681         if (parser->layer == HASH_RXQ_IPV4)
1682                 parser->layer = HASH_RXQ_UDPV4;
1683         else
1684                 parser->layer = HASH_RXQ_UDPV6;
1685         if (spec) {
1686                 if (!mask)
1687                         mask = default_mask;
1688                 udp.val.dst_port = spec->hdr.dst_port;
1689                 udp.val.src_port = spec->hdr.src_port;
1690                 udp.mask.dst_port = mask->hdr.dst_port;
1691                 udp.mask.src_port = mask->hdr.src_port;
1692                 /* Remove unwanted bits from values. */
1693                 udp.val.src_port &= udp.mask.src_port;
1694                 udp.val.dst_port &= udp.mask.dst_port;
1695         }
1696         mlx5_flow_create_copy(parser, &udp, udp_size);
1697         return 0;
1698 }
1699
1700 /**
1701  * Convert TCP item to Verbs specification.
1702  *
1703  * @param item[in]
1704  *   Item specification.
1705  * @param default_mask[in]
1706  *   Default bit-masks to use when item->mask is not provided.
1707  * @param data[in, out]
1708  *   User structure.
1709  *
1710  * @return
1711  *   0 on success, a negative errno value otherwise and rte_errno is set.
1712  */
1713 static int
1714 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1715                      const void *default_mask,
1716                      struct mlx5_flow_data *data)
1717 {
1718         const struct rte_flow_item_tcp *spec = item->spec;
1719         const struct rte_flow_item_tcp *mask = item->mask;
1720         struct mlx5_flow_parse *parser = data->parser;
1721         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1722         struct ibv_flow_spec_tcp_udp tcp = {
1723                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1724                 .size = tcp_size,
1725         };
1726
1727         if (parser->layer == HASH_RXQ_IPV4)
1728                 parser->layer = HASH_RXQ_TCPV4;
1729         else
1730                 parser->layer = HASH_RXQ_TCPV6;
1731         if (spec) {
1732                 if (!mask)
1733                         mask = default_mask;
1734                 tcp.val.dst_port = spec->hdr.dst_port;
1735                 tcp.val.src_port = spec->hdr.src_port;
1736                 tcp.mask.dst_port = mask->hdr.dst_port;
1737                 tcp.mask.src_port = mask->hdr.src_port;
1738                 /* Remove unwanted bits from values. */
1739                 tcp.val.src_port &= tcp.mask.src_port;
1740                 tcp.val.dst_port &= tcp.mask.dst_port;
1741         }
1742         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1743         return 0;
1744 }
1745
1746 /**
1747  * Convert VXLAN item to Verbs specification.
1748  *
1749  * @param item[in]
1750  *   Item specification.
1751  * @param default_mask[in]
1752  *   Default bit-masks to use when item->mask is not provided.
1753  * @param data[in, out]
1754  *   User structure.
1755  *
1756  * @return
1757  *   0 on success, a negative errno value otherwise and rte_errno is set.
1758  */
1759 static int
1760 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1761                        const void *default_mask,
1762                        struct mlx5_flow_data *data)
1763 {
1764         const struct rte_flow_item_vxlan *spec = item->spec;
1765         const struct rte_flow_item_vxlan *mask = item->mask;
1766         struct mlx5_flow_parse *parser = data->parser;
1767         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1768         struct ibv_flow_spec_tunnel vxlan = {
1769                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1770                 .size = size,
1771         };
1772         union vni {
1773                 uint32_t vlan_id;
1774                 uint8_t vni[4];
1775         } id;
1776
1777         id.vni[0] = 0;
1778         parser->inner = IBV_FLOW_SPEC_INNER;
1779         parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1780         parser->out_layer = parser->layer;
1781         parser->layer = HASH_RXQ_TUNNEL;
1782         /* Default VXLAN to outer RSS. */
1783         if (!parser->rss_conf.level)
1784                 parser->rss_conf.level = 1;
1785         if (spec) {
1786                 if (!mask)
1787                         mask = default_mask;
1788                 memcpy(&id.vni[1], spec->vni, 3);
1789                 vxlan.val.tunnel_id = id.vlan_id;
1790                 memcpy(&id.vni[1], mask->vni, 3);
1791                 vxlan.mask.tunnel_id = id.vlan_id;
1792                 /* Remove unwanted bits from values. */
1793                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1794         }
1795         /*
1796          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1797          * layer is defined in the Verbs specification it is interpreted as
1798          * wildcard and all packets will match this rule, if it follows a full
1799          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1800          * before will also match this rule.
1801          * To avoid such situation, VNI 0 is currently refused.
1802          */
1803         /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1804         if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1805                 return rte_flow_error_set(data->error, EINVAL,
1806                                           RTE_FLOW_ERROR_TYPE_ITEM,
1807                                           item,
1808                                           "VxLAN vni cannot be 0");
1809         mlx5_flow_create_copy(parser, &vxlan, size);
1810         return 0;
1811 }
1812
1813 /**
1814  * Convert VXLAN-GPE item to Verbs specification.
1815  *
1816  * @param item[in]
1817  *   Item specification.
1818  * @param default_mask[in]
1819  *   Default bit-masks to use when item->mask is not provided.
1820  * @param data[in, out]
1821  *   User structure.
1822  *
1823  * @return
1824  *   0 on success, a negative errno value otherwise and rte_errno is set.
1825  */
1826 static int
1827 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
1828                            const void *default_mask,
1829                            struct mlx5_flow_data *data)
1830 {
1831         struct priv *priv = data->dev->data->dev_private;
1832         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1833         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1834         struct mlx5_flow_parse *parser = data->parser;
1835         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1836         struct ibv_flow_spec_tunnel vxlan = {
1837                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1838                 .size = size,
1839         };
1840         union vni {
1841                 uint32_t vlan_id;
1842                 uint8_t vni[4];
1843         } id;
1844
1845         if (!priv->config.l3_vxlan_en)
1846                 return rte_flow_error_set(data->error, EINVAL,
1847                                           RTE_FLOW_ERROR_TYPE_ITEM,
1848                                           item,
1849                                           "L3 VXLAN not enabled by device"
1850                                           " parameter and/or not configured"
1851                                           " in firmware");
1852         id.vni[0] = 0;
1853         parser->inner = IBV_FLOW_SPEC_INNER;
1854         parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)];
1855         parser->out_layer = parser->layer;
1856         parser->layer = HASH_RXQ_TUNNEL;
1857         /* Default VXLAN-GPE to outer RSS. */
1858         if (!parser->rss_conf.level)
1859                 parser->rss_conf.level = 1;
1860         if (spec) {
1861                 if (!mask)
1862                         mask = default_mask;
1863                 memcpy(&id.vni[1], spec->vni, 3);
1864                 vxlan.val.tunnel_id = id.vlan_id;
1865                 memcpy(&id.vni[1], mask->vni, 3);
1866                 vxlan.mask.tunnel_id = id.vlan_id;
1867                 if (spec->protocol)
1868                         return rte_flow_error_set(data->error, EINVAL,
1869                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1870                                                   item,
1871                                                   "VxLAN-GPE protocol not"
1872                                                   " supported");
1873                 /* Remove unwanted bits from values. */
1874                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1875         }
1876         /*
1877          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1878          * layer is defined in the Verbs specification it is interpreted as
1879          * wildcard and all packets will match this rule, if it follows a full
1880          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1881          * before will also match this rule.
1882          * To avoid such situation, VNI 0 is currently refused.
1883          */
1884         /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1885         if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1886                 return rte_flow_error_set(data->error, EINVAL,
1887                                           RTE_FLOW_ERROR_TYPE_ITEM,
1888                                           item,
1889                                           "VxLAN-GPE vni cannot be 0");
1890         mlx5_flow_create_copy(parser, &vxlan, size);
1891         return 0;
1892 }
1893
1894 /**
1895  * Convert GRE item to Verbs specification.
1896  *
1897  * @param item[in]
1898  *   Item specification.
1899  * @param default_mask[in]
1900  *   Default bit-masks to use when item->mask is not provided.
1901  * @param data[in, out]
1902  *   User structure.
1903  *
1904  * @return
1905  *   0 on success, a negative errno value otherwise and rte_errno is set.
1906  */
1907 static int
1908 mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
1909                      const void *default_mask __rte_unused,
1910                      struct mlx5_flow_data *data)
1911 {
1912         struct mlx5_flow_parse *parser = data->parser;
1913         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1914         struct ibv_flow_spec_tunnel tunnel = {
1915                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1916                 .size = size,
1917         };
1918         struct ibv_flow_spec_ipv4_ext *ipv4;
1919         struct ibv_flow_spec_ipv6 *ipv6;
1920         unsigned int i;
1921
1922         parser->inner = IBV_FLOW_SPEC_INNER;
1923         parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1924         parser->out_layer = parser->layer;
1925         parser->layer = HASH_RXQ_TUNNEL;
1926         /* Default GRE to inner RSS. */
1927         if (!parser->rss_conf.level)
1928                 parser->rss_conf.level = 2;
1929         /* Update encapsulation IP layer protocol. */
1930         for (i = 0; i != hash_rxq_init_n; ++i) {
1931                 if (!parser->queue[i].ibv_attr)
1932                         continue;
1933                 if (parser->out_layer == HASH_RXQ_IPV4) {
1934                         ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1935                                 parser->queue[i].offset -
1936                                 sizeof(struct ibv_flow_spec_ipv4_ext));
1937                         if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1938                                 break;
1939                         ipv4->val.proto = MLX5_GRE;
1940                         ipv4->mask.proto = 0xff;
1941                 } else if (parser->out_layer == HASH_RXQ_IPV6) {
1942                         ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1943                                 parser->queue[i].offset -
1944                                 sizeof(struct ibv_flow_spec_ipv6));
1945                         if (ipv6->mask.next_hdr &&
1946                             ipv6->val.next_hdr != MLX5_GRE)
1947                                 break;
1948                         ipv6->val.next_hdr = MLX5_GRE;
1949                         ipv6->mask.next_hdr = 0xff;
1950                 }
1951         }
1952         if (i != hash_rxq_init_n)
1953                 return rte_flow_error_set(data->error, EINVAL,
1954                                           RTE_FLOW_ERROR_TYPE_ITEM,
1955                                           item,
1956                                           "IP protocol of GRE must be 47");
1957         mlx5_flow_create_copy(parser, &tunnel, size);
1958         return 0;
1959 }
1960
1961 /**
1962  * Convert mark/flag action to Verbs specification.
1963  *
1964  * @param parser
1965  *   Internal parser structure.
1966  * @param mark_id
1967  *   Mark identifier.
1968  *
1969  * @return
1970  *   0 on success, a negative errno value otherwise and rte_errno is set.
1971  */
1972 static int
1973 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1974 {
1975         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1976         struct ibv_flow_spec_action_tag tag = {
1977                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1978                 .size = size,
1979                 .tag_id = mlx5_flow_mark_set(mark_id),
1980         };
1981
1982         assert(parser->mark);
1983         mlx5_flow_create_copy(parser, &tag, size);
1984         return 0;
1985 }
1986
1987 /**
1988  * Convert count action to Verbs specification.
1989  *
1990  * @param dev
1991  *   Pointer to Ethernet device.
1992  * @param parser
1993  *   Pointer to MLX5 flow parser structure.
1994  *
1995  * @return
1996  *   0 on success, a negative errno value otherwise and rte_errno is set.
1997  */
1998 static int
1999 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
2000                        struct mlx5_flow_parse *parser __rte_unused)
2001 {
2002 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2003         struct priv *priv = dev->data->dev_private;
2004         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
2005         struct ibv_counter_set_init_attr init_attr = {0};
2006         struct ibv_flow_spec_counter_action counter = {
2007                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
2008                 .size = size,
2009                 .counter_set_handle = 0,
2010         };
2011
2012         init_attr.counter_set_id = 0;
2013         parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
2014         if (!parser->cs) {
2015                 rte_errno = EINVAL;
2016                 return -rte_errno;
2017         }
2018         counter.counter_set_handle = parser->cs->handle;
2019         mlx5_flow_create_copy(parser, &counter, size);
2020 #endif
2021         return 0;
2022 }
2023
2024 /**
2025  * Complete flow rule creation with a drop queue.
2026  *
2027  * @param dev
2028  *   Pointer to Ethernet device.
2029  * @param parser
2030  *   Internal parser structure.
2031  * @param flow
2032  *   Pointer to the rte_flow.
2033  * @param[out] error
2034  *   Perform verbose error reporting if not NULL.
2035  *
2036  * @return
2037  *   0 on success, a negative errno value otherwise and rte_errno is set.
2038  */
2039 static int
2040 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
2041                                    struct mlx5_flow_parse *parser,
2042                                    struct rte_flow *flow,
2043                                    struct rte_flow_error *error)
2044 {
2045         struct priv *priv = dev->data->dev_private;
2046         struct ibv_flow_spec_action_drop *drop;
2047         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
2048
2049         assert(priv->pd);
2050         assert(priv->ctx);
2051         flow->drop = 1;
2052         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
2053                         parser->queue[HASH_RXQ_ETH].offset);
2054         *drop = (struct ibv_flow_spec_action_drop){
2055                         .type = IBV_FLOW_SPEC_ACTION_DROP,
2056                         .size = size,
2057         };
2058         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
2059         parser->queue[HASH_RXQ_ETH].offset += size;
2060         flow->frxq[HASH_RXQ_ETH].ibv_attr =
2061                 parser->queue[HASH_RXQ_ETH].ibv_attr;
2062         if (parser->count)
2063                 flow->cs = parser->cs;
2064         if (!priv->dev->data->dev_started)
2065                 return 0;
2066         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
2067         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2068                 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
2069                                        flow->frxq[HASH_RXQ_ETH].ibv_attr);
2070         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2071                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
2072                                    NULL, "flow rule creation failure");
2073                 goto error;
2074         }
2075         return 0;
2076 error:
2077         assert(flow);
2078         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2079                 claim_zero(mlx5_glue->destroy_flow
2080                            (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2081                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2082         }
2083         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
2084                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2085                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
2086         }
2087         if (flow->cs) {
2088                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2089                 flow->cs = NULL;
2090                 parser->cs = NULL;
2091         }
2092         return -rte_errno;
2093 }
2094
2095 /**
2096  * Create hash Rx queues when RSS is enabled.
2097  *
2098  * @param dev
2099  *   Pointer to Ethernet device.
2100  * @param parser
2101  *   Internal parser structure.
2102  * @param flow
2103  *   Pointer to the rte_flow.
2104  * @param[out] error
2105  *   Perform verbose error reporting if not NULL.
2106  *
2107  * @return
2108  *   0 on success, a negative errno value otherwise and rte_errno is set.
2109  */
2110 static int
2111 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2112                                   struct mlx5_flow_parse *parser,
2113                                   struct rte_flow *flow,
2114                                   struct rte_flow_error *error)
2115 {
2116         struct priv *priv = dev->data->dev_private;
2117         unsigned int i;
2118
2119         for (i = 0; i != hash_rxq_init_n; ++i) {
2120                 if (!parser->queue[i].ibv_attr)
2121                         continue;
2122                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2123                 parser->queue[i].ibv_attr = NULL;
2124                 flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2125                 if (!priv->dev->data->dev_started)
2126                         continue;
2127                 flow->frxq[i].hrxq =
2128                         mlx5_hrxq_get(dev,
2129                                       parser->rss_conf.key,
2130                                       parser->rss_conf.key_len,
2131                                       flow->frxq[i].hash_fields,
2132                                       parser->rss_conf.queue,
2133                                       parser->rss_conf.queue_num,
2134                                       parser->tunnel,
2135                                       parser->rss_conf.level);
2136                 if (flow->frxq[i].hrxq)
2137                         continue;
2138                 flow->frxq[i].hrxq =
2139                         mlx5_hrxq_new(dev,
2140                                       parser->rss_conf.key,
2141                                       parser->rss_conf.key_len,
2142                                       flow->frxq[i].hash_fields,
2143                                       parser->rss_conf.queue,
2144                                       parser->rss_conf.queue_num,
2145                                       parser->tunnel,
2146                                       parser->rss_conf.level);
2147                 if (!flow->frxq[i].hrxq) {
2148                         return rte_flow_error_set(error, ENOMEM,
2149                                                   RTE_FLOW_ERROR_TYPE_HANDLE,
2150                                                   NULL,
2151                                                   "cannot create hash rxq");
2152                 }
2153         }
2154         return 0;
2155 }
2156
2157 /**
2158  * RXQ update after flow rule creation.
2159  *
2160  * @param dev
2161  *   Pointer to Ethernet device.
2162  * @param flow
2163  *   Pointer to the flow rule.
2164  */
2165 static void
2166 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2167 {
2168         struct priv *priv = dev->data->dev_private;
2169         unsigned int i;
2170         unsigned int j;
2171
2172         if (!dev->data->dev_started)
2173                 return;
2174         for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2175                 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2176                                                  [(*flow->queues)[i]];
2177                 struct mlx5_rxq_ctrl *rxq_ctrl =
2178                         container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2179                 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2180
2181                 rxq_data->mark |= flow->mark;
2182                 if (!tunnel)
2183                         continue;
2184                 rxq_ctrl->tunnel_types[tunnel] += 1;
2185                 /* Clear tunnel type if more than one tunnel types set. */
2186                 for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2187                         if (j == tunnel)
2188                                 continue;
2189                         if (rxq_ctrl->tunnel_types[j] > 0) {
2190                                 rxq_data->tunnel = 0;
2191                                 break;
2192                         }
2193                 }
2194                 if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2195                         rxq_data->tunnel = flow->tunnel;
2196         }
2197 }
2198
2199 /**
2200  * Dump flow hash RX queue detail.
2201  *
2202  * @param dev
2203  *   Pointer to Ethernet device.
2204  * @param flow
2205  *   Pointer to the rte_flow.
2206  * @param hrxq_idx
2207  *   Hash RX queue index.
2208  */
2209 static void
2210 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
2211                struct rte_flow *flow __rte_unused,
2212                unsigned int hrxq_idx __rte_unused)
2213 {
2214 #ifndef NDEBUG
2215         uintptr_t spec_ptr;
2216         uint16_t j;
2217         char buf[256];
2218         uint8_t off;
2219
2220         spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
2221         for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
2222              j++) {
2223                 struct ibv_flow_spec *spec = (void *)spec_ptr;
2224                 off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
2225                                spec->hdr.size);
2226                 spec_ptr += spec->hdr.size;
2227         }
2228         DRV_LOG(DEBUG,
2229                 "port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
2230                 " hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
2231                 " flags:%x, comp_mask:%x specs:%s",
2232                 dev->data->port_id, (void *)flow, hrxq_idx,
2233                 (void *)flow->frxq[hrxq_idx].hrxq,
2234                 (void *)flow->frxq[hrxq_idx].hrxq->qp,
2235                 (void *)flow->frxq[hrxq_idx].hrxq->ind_table,
2236                 flow->frxq[hrxq_idx].hash_fields |
2237                 (flow->tunnel &&
2238                  flow->rss_conf.level > 1 ? (uint32_t)IBV_RX_HASH_INNER : 0),
2239                 flow->rss_conf.queue_num,
2240                 flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
2241                 flow->frxq[hrxq_idx].ibv_attr->size,
2242                 flow->frxq[hrxq_idx].ibv_attr->priority,
2243                 flow->frxq[hrxq_idx].ibv_attr->type,
2244                 flow->frxq[hrxq_idx].ibv_attr->flags,
2245                 flow->frxq[hrxq_idx].ibv_attr->comp_mask,
2246                 buf);
2247 #endif
2248 }
2249
2250 /**
2251  * Complete flow rule creation.
2252  *
2253  * @param dev
2254  *   Pointer to Ethernet device.
2255  * @param parser
2256  *   Internal parser structure.
2257  * @param flow
2258  *   Pointer to the rte_flow.
2259  * @param[out] error
2260  *   Perform verbose error reporting if not NULL.
2261  *
2262  * @return
2263  *   0 on success, a negative errno value otherwise and rte_errno is set.
2264  */
2265 static int
2266 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2267                               struct mlx5_flow_parse *parser,
2268                               struct rte_flow *flow,
2269                               struct rte_flow_error *error)
2270 {
2271         struct priv *priv = dev->data->dev_private;
2272         int ret;
2273         unsigned int i;
2274         unsigned int flows_n = 0;
2275
2276         assert(priv->pd);
2277         assert(priv->ctx);
2278         assert(!parser->drop);
2279         ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2280         if (ret)
2281                 goto error;
2282         if (parser->count)
2283                 flow->cs = parser->cs;
2284         if (!priv->dev->data->dev_started)
2285                 return 0;
2286         for (i = 0; i != hash_rxq_init_n; ++i) {
2287                 if (!flow->frxq[i].hrxq)
2288                         continue;
2289                 flow->frxq[i].ibv_flow =
2290                         mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2291                                                flow->frxq[i].ibv_attr);
2292                 mlx5_flow_dump(dev, flow, i);
2293                 if (!flow->frxq[i].ibv_flow) {
2294                         rte_flow_error_set(error, ENOMEM,
2295                                            RTE_FLOW_ERROR_TYPE_HANDLE,
2296                                            NULL, "flow rule creation failure");
2297                         goto error;
2298                 }
2299                 ++flows_n;
2300         }
2301         if (!flows_n) {
2302                 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2303                                    NULL, "internal error in flow creation");
2304                 goto error;
2305         }
2306         mlx5_flow_create_update_rxqs(dev, flow);
2307         return 0;
2308 error:
2309         ret = rte_errno; /* Save rte_errno before cleanup. */
2310         assert(flow);
2311         for (i = 0; i != hash_rxq_init_n; ++i) {
2312                 if (flow->frxq[i].ibv_flow) {
2313                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2314
2315                         claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2316                 }
2317                 if (flow->frxq[i].hrxq)
2318                         mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2319                 if (flow->frxq[i].ibv_attr)
2320                         rte_free(flow->frxq[i].ibv_attr);
2321         }
2322         if (flow->cs) {
2323                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2324                 flow->cs = NULL;
2325                 parser->cs = NULL;
2326         }
2327         rte_errno = ret; /* Restore rte_errno. */
2328         return -rte_errno;
2329 }
2330
2331 /**
2332  * Convert a flow.
2333  *
2334  * @param dev
2335  *   Pointer to Ethernet device.
2336  * @param list
2337  *   Pointer to a TAILQ flow list.
2338  * @param[in] attr
2339  *   Flow rule attributes.
2340  * @param[in] pattern
2341  *   Pattern specification (list terminated by the END pattern item).
2342  * @param[in] actions
2343  *   Associated actions (list terminated by the END action).
2344  * @param[out] error
2345  *   Perform verbose error reporting if not NULL.
2346  *
2347  * @return
2348  *   A flow on success, NULL otherwise and rte_errno is set.
2349  */
2350 static struct rte_flow *
2351 mlx5_flow_list_create(struct rte_eth_dev *dev,
2352                       struct mlx5_flows *list,
2353                       const struct rte_flow_attr *attr,
2354                       const struct rte_flow_item items[],
2355                       const struct rte_flow_action actions[],
2356                       struct rte_flow_error *error)
2357 {
2358         struct mlx5_flow_parse parser = { .create = 1, };
2359         struct rte_flow *flow = NULL;
2360         unsigned int i;
2361         int ret;
2362
2363         ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2364         if (ret)
2365                 goto exit;
2366         flow = rte_calloc(__func__, 1,
2367                           sizeof(*flow) +
2368                           parser.rss_conf.queue_num * sizeof(uint16_t),
2369                           0);
2370         if (!flow) {
2371                 rte_flow_error_set(error, ENOMEM,
2372                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2373                                    NULL,
2374                                    "cannot allocate flow memory");
2375                 return NULL;
2376         }
2377         /* Copy configuration. */
2378         flow->queues = (uint16_t (*)[])(flow + 1);
2379         flow->tunnel = parser.tunnel;
2380         flow->rss_conf = (struct rte_flow_action_rss){
2381                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2382                 .level = 0,
2383                 .types = parser.rss_conf.types,
2384                 .key_len = parser.rss_conf.key_len,
2385                 .queue_num = parser.rss_conf.queue_num,
2386                 .key = memcpy(flow->rss_key, parser.rss_conf.key,
2387                               sizeof(*parser.rss_conf.key) *
2388                               parser.rss_conf.key_len),
2389                 .queue = memcpy(flow->queues, parser.rss_conf.queue,
2390                                 sizeof(*parser.rss_conf.queue) *
2391                                 parser.rss_conf.queue_num),
2392         };
2393         flow->mark = parser.mark;
2394         /* finalise the flow. */
2395         if (parser.drop)
2396                 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2397                                                          error);
2398         else
2399                 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2400         if (ret)
2401                 goto exit;
2402         TAILQ_INSERT_TAIL(list, flow, next);
2403         DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2404                 (void *)flow);
2405         return flow;
2406 exit:
2407         DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2408                 error->message);
2409         for (i = 0; i != hash_rxq_init_n; ++i) {
2410                 if (parser.queue[i].ibv_attr)
2411                         rte_free(parser.queue[i].ibv_attr);
2412         }
2413         rte_free(flow);
2414         return NULL;
2415 }
2416
2417 /**
2418  * Validate a flow supported by the NIC.
2419  *
2420  * @see rte_flow_validate()
2421  * @see rte_flow_ops
2422  */
2423 int
2424 mlx5_flow_validate(struct rte_eth_dev *dev,
2425                    const struct rte_flow_attr *attr,
2426                    const struct rte_flow_item items[],
2427                    const struct rte_flow_action actions[],
2428                    struct rte_flow_error *error)
2429 {
2430         struct mlx5_flow_parse parser = { .create = 0, };
2431
2432         return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2433 }
2434
2435 /**
2436  * Create a flow.
2437  *
2438  * @see rte_flow_create()
2439  * @see rte_flow_ops
2440  */
2441 struct rte_flow *
2442 mlx5_flow_create(struct rte_eth_dev *dev,
2443                  const struct rte_flow_attr *attr,
2444                  const struct rte_flow_item items[],
2445                  const struct rte_flow_action actions[],
2446                  struct rte_flow_error *error)
2447 {
2448         struct priv *priv = dev->data->dev_private;
2449
2450         return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2451                                      error);
2452 }
2453
2454 /**
2455  * Destroy a flow in a list.
2456  *
2457  * @param dev
2458  *   Pointer to Ethernet device.
2459  * @param list
2460  *   Pointer to a TAILQ flow list.
2461  * @param[in] flow
2462  *   Flow to destroy.
2463  */
2464 static void
2465 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2466                        struct rte_flow *flow)
2467 {
2468         struct priv *priv = dev->data->dev_private;
2469         unsigned int i;
2470
2471         if (flow->drop || !dev->data->dev_started)
2472                 goto free;
2473         for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2474                 /* Update queue tunnel type. */
2475                 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2476                                                  [(*flow->queues)[i]];
2477                 struct mlx5_rxq_ctrl *rxq_ctrl =
2478                         container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2479                 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2480
2481                 assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2482                 rxq_ctrl->tunnel_types[tunnel] -= 1;
2483                 if (!rxq_ctrl->tunnel_types[tunnel]) {
2484                         /* Update tunnel type. */
2485                         uint8_t j;
2486                         uint8_t types = 0;
2487                         uint8_t last;
2488
2489                         for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2490                                 if (rxq_ctrl->tunnel_types[j]) {
2491                                         types += 1;
2492                                         last = j;
2493                                 }
2494                         /* Keep same if more than one tunnel types left. */
2495                         if (types == 1)
2496                                 rxq_data->tunnel = ptype_ext[last];
2497                         else if (types == 0)
2498                                 /* No tunnel type left. */
2499                                 rxq_data->tunnel = 0;
2500                 }
2501         }
2502         for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2503                 struct rte_flow *tmp;
2504                 int mark = 0;
2505
2506                 /*
2507                  * To remove the mark from the queue, the queue must not be
2508                  * present in any other marked flow (RSS or not).
2509                  */
2510                 TAILQ_FOREACH(tmp, list, next) {
2511                         unsigned int j;
2512                         uint16_t *tqs = NULL;
2513                         uint16_t tq_n = 0;
2514
2515                         if (!tmp->mark)
2516                                 continue;
2517                         for (j = 0; j != hash_rxq_init_n; ++j) {
2518                                 if (!tmp->frxq[j].hrxq)
2519                                         continue;
2520                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2521                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2522                         }
2523                         if (!tq_n)
2524                                 continue;
2525                         for (j = 0; (j != tq_n) && !mark; j++)
2526                                 if (tqs[j] == (*flow->queues)[i])
2527                                         mark = 1;
2528                 }
2529                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2530         }
2531 free:
2532         if (flow->drop) {
2533                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2534                         claim_zero(mlx5_glue->destroy_flow
2535                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2536                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2537         } else {
2538                 for (i = 0; i != hash_rxq_init_n; ++i) {
2539                         struct mlx5_flow *frxq = &flow->frxq[i];
2540
2541                         if (frxq->ibv_flow)
2542                                 claim_zero(mlx5_glue->destroy_flow
2543                                            (frxq->ibv_flow));
2544                         if (frxq->hrxq)
2545                                 mlx5_hrxq_release(dev, frxq->hrxq);
2546                         if (frxq->ibv_attr)
2547                                 rte_free(frxq->ibv_attr);
2548                 }
2549         }
2550         if (flow->cs) {
2551                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2552                 flow->cs = NULL;
2553         }
2554         TAILQ_REMOVE(list, flow, next);
2555         DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2556                 (void *)flow);
2557         rte_free(flow);
2558 }
2559
2560 /**
2561  * Destroy all flows.
2562  *
2563  * @param dev
2564  *   Pointer to Ethernet device.
2565  * @param list
2566  *   Pointer to a TAILQ flow list.
2567  */
2568 void
2569 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2570 {
2571         while (!TAILQ_EMPTY(list)) {
2572                 struct rte_flow *flow;
2573
2574                 flow = TAILQ_FIRST(list);
2575                 mlx5_flow_list_destroy(dev, list, flow);
2576         }
2577 }
2578
2579 /**
2580  * Create drop queue.
2581  *
2582  * @param dev
2583  *   Pointer to Ethernet device.
2584  *
2585  * @return
2586  *   0 on success, a negative errno value otherwise and rte_errno is set.
2587  */
2588 int
2589 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2590 {
2591         struct priv *priv = dev->data->dev_private;
2592         struct mlx5_hrxq_drop *fdq = NULL;
2593
2594         assert(priv->pd);
2595         assert(priv->ctx);
2596         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2597         if (!fdq) {
2598                 DRV_LOG(WARNING,
2599                         "port %u cannot allocate memory for drop queue",
2600                         dev->data->port_id);
2601                 rte_errno = ENOMEM;
2602                 return -rte_errno;
2603         }
2604         fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2605         if (!fdq->cq) {
2606                 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2607                         dev->data->port_id);
2608                 rte_errno = errno;
2609                 goto error;
2610         }
2611         fdq->wq = mlx5_glue->create_wq
2612                 (priv->ctx,
2613                  &(struct ibv_wq_init_attr){
2614                         .wq_type = IBV_WQT_RQ,
2615                         .max_wr = 1,
2616                         .max_sge = 1,
2617                         .pd = priv->pd,
2618                         .cq = fdq->cq,
2619                  });
2620         if (!fdq->wq) {
2621                 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2622                         dev->data->port_id);
2623                 rte_errno = errno;
2624                 goto error;
2625         }
2626         fdq->ind_table = mlx5_glue->create_rwq_ind_table
2627                 (priv->ctx,
2628                  &(struct ibv_rwq_ind_table_init_attr){
2629                         .log_ind_tbl_size = 0,
2630                         .ind_tbl = &fdq->wq,
2631                         .comp_mask = 0,
2632                  });
2633         if (!fdq->ind_table) {
2634                 DRV_LOG(WARNING,
2635                         "port %u cannot allocate indirection table for drop"
2636                         " queue",
2637                         dev->data->port_id);
2638                 rte_errno = errno;
2639                 goto error;
2640         }
2641         fdq->qp = mlx5_glue->create_qp_ex
2642                 (priv->ctx,
2643                  &(struct ibv_qp_init_attr_ex){
2644                         .qp_type = IBV_QPT_RAW_PACKET,
2645                         .comp_mask =
2646                                 IBV_QP_INIT_ATTR_PD |
2647                                 IBV_QP_INIT_ATTR_IND_TABLE |
2648                                 IBV_QP_INIT_ATTR_RX_HASH,
2649                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2650                                 .rx_hash_function =
2651                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2652                                 .rx_hash_key_len = rss_hash_default_key_len,
2653                                 .rx_hash_key = rss_hash_default_key,
2654                                 .rx_hash_fields_mask = 0,
2655                                 },
2656                         .rwq_ind_tbl = fdq->ind_table,
2657                         .pd = priv->pd
2658                  });
2659         if (!fdq->qp) {
2660                 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2661                         dev->data->port_id);
2662                 rte_errno = errno;
2663                 goto error;
2664         }
2665         priv->flow_drop_queue = fdq;
2666         return 0;
2667 error:
2668         if (fdq->qp)
2669                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2670         if (fdq->ind_table)
2671                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2672         if (fdq->wq)
2673                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2674         if (fdq->cq)
2675                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2676         if (fdq)
2677                 rte_free(fdq);
2678         priv->flow_drop_queue = NULL;
2679         return -rte_errno;
2680 }
2681
2682 /**
2683  * Delete drop queue.
2684  *
2685  * @param dev
2686  *   Pointer to Ethernet device.
2687  */
2688 void
2689 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2690 {
2691         struct priv *priv = dev->data->dev_private;
2692         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2693
2694         if (!fdq)
2695                 return;
2696         if (fdq->qp)
2697                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2698         if (fdq->ind_table)
2699                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2700         if (fdq->wq)
2701                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2702         if (fdq->cq)
2703                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2704         rte_free(fdq);
2705         priv->flow_drop_queue = NULL;
2706 }
2707
2708 /**
2709  * Remove all flows.
2710  *
2711  * @param dev
2712  *   Pointer to Ethernet device.
2713  * @param list
2714  *   Pointer to a TAILQ flow list.
2715  */
2716 void
2717 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2718 {
2719         struct priv *priv = dev->data->dev_private;
2720         struct rte_flow *flow;
2721         unsigned int i;
2722
2723         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2724                 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2725
2726                 if (flow->drop) {
2727                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2728                                 continue;
2729                         claim_zero(mlx5_glue->destroy_flow
2730                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2731                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2732                         DRV_LOG(DEBUG, "port %u flow %p removed",
2733                                 dev->data->port_id, (void *)flow);
2734                         /* Next flow. */
2735                         continue;
2736                 }
2737                 /* Verify the flow has not already been cleaned. */
2738                 for (i = 0; i != hash_rxq_init_n; ++i) {
2739                         if (!flow->frxq[i].ibv_flow)
2740                                 continue;
2741                         /*
2742                          * Indirection table may be necessary to remove the
2743                          * flags in the Rx queues.
2744                          * This helps to speed-up the process by avoiding
2745                          * another loop.
2746                          */
2747                         ind_tbl = flow->frxq[i].hrxq->ind_table;
2748                         break;
2749                 }
2750                 if (i == hash_rxq_init_n)
2751                         return;
2752                 if (flow->mark) {
2753                         assert(ind_tbl);
2754                         for (i = 0; i != ind_tbl->queues_n; ++i)
2755                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2756                 }
2757                 for (i = 0; i != hash_rxq_init_n; ++i) {
2758                         if (!flow->frxq[i].ibv_flow)
2759                                 continue;
2760                         claim_zero(mlx5_glue->destroy_flow
2761                                    (flow->frxq[i].ibv_flow));
2762                         flow->frxq[i].ibv_flow = NULL;
2763                         mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2764                         flow->frxq[i].hrxq = NULL;
2765                 }
2766                 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2767                         (void *)flow);
2768         }
2769         /* Cleanup Rx queue tunnel info. */
2770         for (i = 0; i != priv->rxqs_n; ++i) {
2771                 struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2772                 struct mlx5_rxq_ctrl *rxq_ctrl =
2773                         container_of(q, struct mlx5_rxq_ctrl, rxq);
2774
2775                 if (!q)
2776                         continue;
2777                 memset((void *)rxq_ctrl->tunnel_types, 0,
2778                        sizeof(rxq_ctrl->tunnel_types));
2779                 q->tunnel = 0;
2780         }
2781 }
2782
2783 /**
2784  * Add all flows.
2785  *
2786  * @param dev
2787  *   Pointer to Ethernet device.
2788  * @param list
2789  *   Pointer to a TAILQ flow list.
2790  *
2791  * @return
2792  *   0 on success, a negative errno value otherwise and rte_errno is set.
2793  */
2794 int
2795 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2796 {
2797         struct priv *priv = dev->data->dev_private;
2798         struct rte_flow *flow;
2799
2800         TAILQ_FOREACH(flow, list, next) {
2801                 unsigned int i;
2802
2803                 if (flow->drop) {
2804                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2805                                 mlx5_glue->create_flow
2806                                 (priv->flow_drop_queue->qp,
2807                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2808                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2809                                 DRV_LOG(DEBUG,
2810                                         "port %u flow %p cannot be applied",
2811                                         dev->data->port_id, (void *)flow);
2812                                 rte_errno = EINVAL;
2813                                 return -rte_errno;
2814                         }
2815                         DRV_LOG(DEBUG, "port %u flow %p applied",
2816                                 dev->data->port_id, (void *)flow);
2817                         /* Next flow. */
2818                         continue;
2819                 }
2820                 for (i = 0; i != hash_rxq_init_n; ++i) {
2821                         if (!flow->frxq[i].ibv_attr)
2822                                 continue;
2823                         flow->frxq[i].hrxq =
2824                                 mlx5_hrxq_get(dev, flow->rss_conf.key,
2825                                               flow->rss_conf.key_len,
2826                                               flow->frxq[i].hash_fields,
2827                                               flow->rss_conf.queue,
2828                                               flow->rss_conf.queue_num,
2829                                               flow->tunnel,
2830                                               flow->rss_conf.level);
2831                         if (flow->frxq[i].hrxq)
2832                                 goto flow_create;
2833                         flow->frxq[i].hrxq =
2834                                 mlx5_hrxq_new(dev, flow->rss_conf.key,
2835                                               flow->rss_conf.key_len,
2836                                               flow->frxq[i].hash_fields,
2837                                               flow->rss_conf.queue,
2838                                               flow->rss_conf.queue_num,
2839                                               flow->tunnel,
2840                                               flow->rss_conf.level);
2841                         if (!flow->frxq[i].hrxq) {
2842                                 DRV_LOG(DEBUG,
2843                                         "port %u flow %p cannot create hash"
2844                                         " rxq",
2845                                         dev->data->port_id, (void *)flow);
2846                                 rte_errno = EINVAL;
2847                                 return -rte_errno;
2848                         }
2849 flow_create:
2850                         mlx5_flow_dump(dev, flow, i);
2851                         flow->frxq[i].ibv_flow =
2852                                 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2853                                                        flow->frxq[i].ibv_attr);
2854                         if (!flow->frxq[i].ibv_flow) {
2855                                 DRV_LOG(DEBUG,
2856                                         "port %u flow %p type %u cannot be"
2857                                         " applied",
2858                                         dev->data->port_id, (void *)flow, i);
2859                                 rte_errno = EINVAL;
2860                                 return -rte_errno;
2861                         }
2862                 }
2863                 mlx5_flow_create_update_rxqs(dev, flow);
2864         }
2865         return 0;
2866 }
2867
2868 /**
2869  * Verify the flow list is empty
2870  *
2871  * @param dev
2872  *  Pointer to Ethernet device.
2873  *
2874  * @return the number of flows not released.
2875  */
2876 int
2877 mlx5_flow_verify(struct rte_eth_dev *dev)
2878 {
2879         struct priv *priv = dev->data->dev_private;
2880         struct rte_flow *flow;
2881         int ret = 0;
2882
2883         TAILQ_FOREACH(flow, &priv->flows, next) {
2884                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2885                         dev->data->port_id, (void *)flow);
2886                 ++ret;
2887         }
2888         return ret;
2889 }
2890
2891 /**
2892  * Enable a control flow configured from the control plane.
2893  *
2894  * @param dev
2895  *   Pointer to Ethernet device.
2896  * @param eth_spec
2897  *   An Ethernet flow spec to apply.
2898  * @param eth_mask
2899  *   An Ethernet flow mask to apply.
2900  * @param vlan_spec
2901  *   A VLAN flow spec to apply.
2902  * @param vlan_mask
2903  *   A VLAN flow mask to apply.
2904  *
2905  * @return
2906  *   0 on success, a negative errno value otherwise and rte_errno is set.
2907  */
2908 int
2909 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2910                     struct rte_flow_item_eth *eth_spec,
2911                     struct rte_flow_item_eth *eth_mask,
2912                     struct rte_flow_item_vlan *vlan_spec,
2913                     struct rte_flow_item_vlan *vlan_mask)
2914 {
2915         struct priv *priv = dev->data->dev_private;
2916         const struct rte_flow_attr attr = {
2917                 .ingress = 1,
2918                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2919         };
2920         struct rte_flow_item items[] = {
2921                 {
2922                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2923                         .spec = eth_spec,
2924                         .last = NULL,
2925                         .mask = eth_mask,
2926                 },
2927                 {
2928                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2929                                 RTE_FLOW_ITEM_TYPE_END,
2930                         .spec = vlan_spec,
2931                         .last = NULL,
2932                         .mask = vlan_mask,
2933                 },
2934                 {
2935                         .type = RTE_FLOW_ITEM_TYPE_END,
2936                 },
2937         };
2938         uint16_t queue[priv->reta_idx_n];
2939         struct rte_flow_action_rss action_rss = {
2940                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2941                 .level = 0,
2942                 .types = priv->rss_conf.rss_hf,
2943                 .key_len = priv->rss_conf.rss_key_len,
2944                 .queue_num = priv->reta_idx_n,
2945                 .key = priv->rss_conf.rss_key,
2946                 .queue = queue,
2947         };
2948         struct rte_flow_action actions[] = {
2949                 {
2950                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2951                         .conf = &action_rss,
2952                 },
2953                 {
2954                         .type = RTE_FLOW_ACTION_TYPE_END,
2955                 },
2956         };
2957         struct rte_flow *flow;
2958         struct rte_flow_error error;
2959         unsigned int i;
2960
2961         if (!priv->reta_idx_n) {
2962                 rte_errno = EINVAL;
2963                 return -rte_errno;
2964         }
2965         for (i = 0; i != priv->reta_idx_n; ++i)
2966                 queue[i] = (*priv->reta_idx)[i];
2967         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2968                                      actions, &error);
2969         if (!flow)
2970                 return -rte_errno;
2971         return 0;
2972 }
2973
2974 /**
2975  * Enable a flow control configured from the control plane.
2976  *
2977  * @param dev
2978  *   Pointer to Ethernet device.
2979  * @param eth_spec
2980  *   An Ethernet flow spec to apply.
2981  * @param eth_mask
2982  *   An Ethernet flow mask to apply.
2983  *
2984  * @return
2985  *   0 on success, a negative errno value otherwise and rte_errno is set.
2986  */
2987 int
2988 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2989                struct rte_flow_item_eth *eth_spec,
2990                struct rte_flow_item_eth *eth_mask)
2991 {
2992         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2993 }
2994
2995 /**
2996  * Destroy a flow.
2997  *
2998  * @see rte_flow_destroy()
2999  * @see rte_flow_ops
3000  */
3001 int
3002 mlx5_flow_destroy(struct rte_eth_dev *dev,
3003                   struct rte_flow *flow,
3004                   struct rte_flow_error *error __rte_unused)
3005 {
3006         struct priv *priv = dev->data->dev_private;
3007
3008         mlx5_flow_list_destroy(dev, &priv->flows, flow);
3009         return 0;
3010 }
3011
3012 /**
3013  * Destroy all flows.
3014  *
3015  * @see rte_flow_flush()
3016  * @see rte_flow_ops
3017  */
3018 int
3019 mlx5_flow_flush(struct rte_eth_dev *dev,
3020                 struct rte_flow_error *error __rte_unused)
3021 {
3022         struct priv *priv = dev->data->dev_private;
3023
3024         mlx5_flow_list_flush(dev, &priv->flows);
3025         return 0;
3026 }
3027
3028 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3029 /**
3030  * Query flow counter.
3031  *
3032  * @param cs
3033  *   the counter set.
3034  * @param counter_value
3035  *   returned data from the counter.
3036  *
3037  * @return
3038  *   0 on success, a negative errno value otherwise and rte_errno is set.
3039  */
3040 static int
3041 mlx5_flow_query_count(struct ibv_counter_set *cs,
3042                       struct mlx5_flow_counter_stats *counter_stats,
3043                       struct rte_flow_query_count *query_count,
3044                       struct rte_flow_error *error)
3045 {
3046         uint64_t counters[2];
3047         struct ibv_query_counter_set_attr query_cs_attr = {
3048                 .cs = cs,
3049                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3050         };
3051         struct ibv_counter_set_data query_out = {
3052                 .out = counters,
3053                 .outlen = 2 * sizeof(uint64_t),
3054         };
3055         int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
3056
3057         if (err)
3058                 return rte_flow_error_set(error, err,
3059                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3060                                           NULL,
3061                                           "cannot read counter");
3062         query_count->hits_set = 1;
3063         query_count->bytes_set = 1;
3064         query_count->hits = counters[0] - counter_stats->hits;
3065         query_count->bytes = counters[1] - counter_stats->bytes;
3066         if (query_count->reset) {
3067                 counter_stats->hits = counters[0];
3068                 counter_stats->bytes = counters[1];
3069         }
3070         return 0;
3071 }
3072
3073 /**
3074  * Query a flows.
3075  *
3076  * @see rte_flow_query()
3077  * @see rte_flow_ops
3078  */
3079 int
3080 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3081                 struct rte_flow *flow,
3082                 enum rte_flow_action_type action __rte_unused,
3083                 void *data,
3084                 struct rte_flow_error *error)
3085 {
3086         if (flow->cs) {
3087                 int ret;
3088
3089                 ret = mlx5_flow_query_count(flow->cs,
3090                                             &flow->counter_stats,
3091                                             (struct rte_flow_query_count *)data,
3092                                             error);
3093                 if (ret)
3094                         return ret;
3095         } else {
3096                 return rte_flow_error_set(error, EINVAL,
3097                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3098                                           NULL,
3099                                           "no counter found for flow");
3100         }
3101         return 0;
3102 }
3103 #endif
3104
3105 /**
3106  * Isolated mode.
3107  *
3108  * @see rte_flow_isolate()
3109  * @see rte_flow_ops
3110  */
3111 int
3112 mlx5_flow_isolate(struct rte_eth_dev *dev,
3113                   int enable,
3114                   struct rte_flow_error *error)
3115 {
3116         struct priv *priv = dev->data->dev_private;
3117
3118         if (dev->data->dev_started) {
3119                 rte_flow_error_set(error, EBUSY,
3120                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3121                                    NULL,
3122                                    "port must be stopped first");
3123                 return -rte_errno;
3124         }
3125         priv->isolated = !!enable;
3126         if (enable)
3127                 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
3128         else
3129                 priv->dev->dev_ops = &mlx5_dev_ops;
3130         return 0;
3131 }
3132
3133 /**
3134  * Convert a flow director filter to a generic flow.
3135  *
3136  * @param dev
3137  *   Pointer to Ethernet device.
3138  * @param fdir_filter
3139  *   Flow director filter to add.
3140  * @param attributes
3141  *   Generic flow parameters structure.
3142  *
3143  * @return
3144  *   0 on success, a negative errno value otherwise and rte_errno is set.
3145  */
3146 static int
3147 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3148                          const struct rte_eth_fdir_filter *fdir_filter,
3149                          struct mlx5_fdir *attributes)
3150 {
3151         struct priv *priv = dev->data->dev_private;
3152         const struct rte_eth_fdir_input *input = &fdir_filter->input;
3153         const struct rte_eth_fdir_masks *mask =
3154                 &dev->data->dev_conf.fdir_conf.mask;
3155
3156         /* Validate queue number. */
3157         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3158                 DRV_LOG(ERR, "port %u invalid queue number %d",
3159                         dev->data->port_id, fdir_filter->action.rx_queue);
3160                 rte_errno = EINVAL;
3161                 return -rte_errno;
3162         }
3163         attributes->attr.ingress = 1;
3164         attributes->items[0] = (struct rte_flow_item) {
3165                 .type = RTE_FLOW_ITEM_TYPE_ETH,
3166                 .spec = &attributes->l2,
3167                 .mask = &attributes->l2_mask,
3168         };
3169         switch (fdir_filter->action.behavior) {
3170         case RTE_ETH_FDIR_ACCEPT:
3171                 attributes->actions[0] = (struct rte_flow_action){
3172                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
3173                         .conf = &attributes->queue,
3174                 };
3175                 break;
3176         case RTE_ETH_FDIR_REJECT:
3177                 attributes->actions[0] = (struct rte_flow_action){
3178                         .type = RTE_FLOW_ACTION_TYPE_DROP,
3179                 };
3180                 break;
3181         default:
3182                 DRV_LOG(ERR, "port %u invalid behavior %d",
3183                         dev->data->port_id,
3184                         fdir_filter->action.behavior);
3185                 rte_errno = ENOTSUP;
3186                 return -rte_errno;
3187         }
3188         attributes->queue.index = fdir_filter->action.rx_queue;
3189         /* Handle L3. */
3190         switch (fdir_filter->input.flow_type) {
3191         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3192         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3193         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3194                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3195                         .src_addr = input->flow.ip4_flow.src_ip,
3196                         .dst_addr = input->flow.ip4_flow.dst_ip,
3197                         .time_to_live = input->flow.ip4_flow.ttl,
3198                         .type_of_service = input->flow.ip4_flow.tos,
3199                         .next_proto_id = input->flow.ip4_flow.proto,
3200                 };
3201                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3202                         .src_addr = mask->ipv4_mask.src_ip,
3203                         .dst_addr = mask->ipv4_mask.dst_ip,
3204                         .time_to_live = mask->ipv4_mask.ttl,
3205                         .type_of_service = mask->ipv4_mask.tos,
3206                         .next_proto_id = mask->ipv4_mask.proto,
3207                 };
3208                 attributes->items[1] = (struct rte_flow_item){
3209                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
3210                         .spec = &attributes->l3,
3211                         .mask = &attributes->l3_mask,
3212                 };
3213                 break;
3214         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3215         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3216         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3217                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3218                         .hop_limits = input->flow.ipv6_flow.hop_limits,
3219                         .proto = input->flow.ipv6_flow.proto,
3220                 };
3221
3222                 memcpy(attributes->l3.ipv6.hdr.src_addr,
3223                        input->flow.ipv6_flow.src_ip,
3224                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3225                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3226                        input->flow.ipv6_flow.dst_ip,
3227                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3228                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3229                        mask->ipv6_mask.src_ip,
3230                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3231                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3232                        mask->ipv6_mask.dst_ip,
3233                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3234                 attributes->items[1] = (struct rte_flow_item){
3235                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
3236                         .spec = &attributes->l3,
3237                         .mask = &attributes->l3_mask,
3238                 };
3239                 break;
3240         default:
3241                 DRV_LOG(ERR, "port %u invalid flow type%d",
3242                         dev->data->port_id, fdir_filter->input.flow_type);
3243                 rte_errno = ENOTSUP;
3244                 return -rte_errno;
3245         }
3246         /* Handle L4. */
3247         switch (fdir_filter->input.flow_type) {
3248         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3249                 attributes->l4.udp.hdr = (struct udp_hdr){
3250                         .src_port = input->flow.udp4_flow.src_port,
3251                         .dst_port = input->flow.udp4_flow.dst_port,
3252                 };
3253                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3254                         .src_port = mask->src_port_mask,
3255                         .dst_port = mask->dst_port_mask,
3256                 };
3257                 attributes->items[2] = (struct rte_flow_item){
3258                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3259                         .spec = &attributes->l4,
3260                         .mask = &attributes->l4_mask,
3261                 };
3262                 break;
3263         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3264                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3265                         .src_port = input->flow.tcp4_flow.src_port,
3266                         .dst_port = input->flow.tcp4_flow.dst_port,
3267                 };
3268                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3269                         .src_port = mask->src_port_mask,
3270                         .dst_port = mask->dst_port_mask,
3271                 };
3272                 attributes->items[2] = (struct rte_flow_item){
3273                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3274                         .spec = &attributes->l4,
3275                         .mask = &attributes->l4_mask,
3276                 };
3277                 break;
3278         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3279                 attributes->l4.udp.hdr = (struct udp_hdr){
3280                         .src_port = input->flow.udp6_flow.src_port,
3281                         .dst_port = input->flow.udp6_flow.dst_port,
3282                 };
3283                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3284                         .src_port = mask->src_port_mask,
3285                         .dst_port = mask->dst_port_mask,
3286                 };
3287                 attributes->items[2] = (struct rte_flow_item){
3288                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3289                         .spec = &attributes->l4,
3290                         .mask = &attributes->l4_mask,
3291                 };
3292                 break;
3293         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3294                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3295                         .src_port = input->flow.tcp6_flow.src_port,
3296                         .dst_port = input->flow.tcp6_flow.dst_port,
3297                 };
3298                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3299                         .src_port = mask->src_port_mask,
3300                         .dst_port = mask->dst_port_mask,
3301                 };
3302                 attributes->items[2] = (struct rte_flow_item){
3303                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3304                         .spec = &attributes->l4,
3305                         .mask = &attributes->l4_mask,
3306                 };
3307                 break;
3308         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3309         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3310                 break;
3311         default:
3312                 DRV_LOG(ERR, "port %u invalid flow type%d",
3313                         dev->data->port_id, fdir_filter->input.flow_type);
3314                 rte_errno = ENOTSUP;
3315                 return -rte_errno;
3316         }
3317         return 0;
3318 }
3319
3320 /**
3321  * Add new flow director filter and store it in list.
3322  *
3323  * @param dev
3324  *   Pointer to Ethernet device.
3325  * @param fdir_filter
3326  *   Flow director filter to add.
3327  *
3328  * @return
3329  *   0 on success, a negative errno value otherwise and rte_errno is set.
3330  */
3331 static int
3332 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3333                      const struct rte_eth_fdir_filter *fdir_filter)
3334 {
3335         struct priv *priv = dev->data->dev_private;
3336         struct mlx5_fdir attributes = {
3337                 .attr.group = 0,
3338                 .l2_mask = {
3339                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3340                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3341                         .type = 0,
3342                 },
3343         };
3344         struct mlx5_flow_parse parser = {
3345                 .layer = HASH_RXQ_ETH,
3346         };
3347         struct rte_flow_error error;
3348         struct rte_flow *flow;
3349         int ret;
3350
3351         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3352         if (ret)
3353                 return ret;
3354         ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3355                                 attributes.actions, &error, &parser);
3356         if (ret)
3357                 return ret;
3358         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3359                                      attributes.items, attributes.actions,
3360                                      &error);
3361         if (flow) {
3362                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3363                         (void *)flow);
3364                 return 0;
3365         }
3366         return -rte_errno;
3367 }
3368
3369 /**
3370  * Delete specific filter.
3371  *
3372  * @param dev
3373  *   Pointer to Ethernet device.
3374  * @param fdir_filter
3375  *   Filter to be deleted.
3376  *
3377  * @return
3378  *   0 on success, a negative errno value otherwise and rte_errno is set.
3379  */
3380 static int
3381 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3382                         const struct rte_eth_fdir_filter *fdir_filter)
3383 {
3384         struct priv *priv = dev->data->dev_private;
3385         struct mlx5_fdir attributes = {
3386                 .attr.group = 0,
3387         };
3388         struct mlx5_flow_parse parser = {
3389                 .create = 1,
3390                 .layer = HASH_RXQ_ETH,
3391         };
3392         struct rte_flow_error error;
3393         struct rte_flow *flow;
3394         unsigned int i;
3395         int ret;
3396
3397         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3398         if (ret)
3399                 return ret;
3400         ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3401                                 attributes.actions, &error, &parser);
3402         if (ret)
3403                 goto exit;
3404         /*
3405          * Special case for drop action which is only set in the
3406          * specifications when the flow is created.  In this situation the
3407          * drop specification is missing.
3408          */
3409         if (parser.drop) {
3410                 struct ibv_flow_spec_action_drop *drop;
3411
3412                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
3413                                 parser.queue[HASH_RXQ_ETH].offset);
3414                 *drop = (struct ibv_flow_spec_action_drop){
3415                         .type = IBV_FLOW_SPEC_ACTION_DROP,
3416                         .size = sizeof(struct ibv_flow_spec_action_drop),
3417                 };
3418                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
3419         }
3420         TAILQ_FOREACH(flow, &priv->flows, next) {
3421                 struct ibv_flow_attr *attr;
3422                 struct ibv_spec_header *attr_h;
3423                 void *spec;
3424                 struct ibv_flow_attr *flow_attr;
3425                 struct ibv_spec_header *flow_h;
3426                 void *flow_spec;
3427                 unsigned int specs_n;
3428
3429                 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
3430                 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
3431                 /* Compare first the attributes. */
3432                 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3433                         continue;
3434                 if (attr->num_of_specs == 0)
3435                         continue;
3436                 spec = (void *)((uintptr_t)attr +
3437                                 sizeof(struct ibv_flow_attr));
3438                 flow_spec = (void *)((uintptr_t)flow_attr +
3439                                      sizeof(struct ibv_flow_attr));
3440                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3441                 for (i = 0; i != specs_n; ++i) {
3442                         attr_h = spec;
3443                         flow_h = flow_spec;
3444                         if (memcmp(spec, flow_spec,
3445                                    RTE_MIN(attr_h->size, flow_h->size)))
3446                                 goto wrong_flow;
3447                         spec = (void *)((uintptr_t)spec + attr_h->size);
3448                         flow_spec = (void *)((uintptr_t)flow_spec +
3449                                              flow_h->size);
3450                 }
3451                 /* At this point, the flow match. */
3452                 break;
3453 wrong_flow:
3454                 /* The flow does not match. */
3455                 continue;
3456         }
3457         ret = rte_errno; /* Save rte_errno before cleanup. */
3458         if (flow)
3459                 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3460 exit:
3461         for (i = 0; i != hash_rxq_init_n; ++i) {
3462                 if (parser.queue[i].ibv_attr)
3463                         rte_free(parser.queue[i].ibv_attr);
3464         }
3465         rte_errno = ret; /* Restore rte_errno. */
3466         return -rte_errno;
3467 }
3468
3469 /**
3470  * Update queue for specific filter.
3471  *
3472  * @param dev
3473  *   Pointer to Ethernet device.
3474  * @param fdir_filter
3475  *   Filter to be updated.
3476  *
3477  * @return
3478  *   0 on success, a negative errno value otherwise and rte_errno is set.
3479  */
3480 static int
3481 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3482                         const struct rte_eth_fdir_filter *fdir_filter)
3483 {
3484         int ret;
3485
3486         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3487         if (ret)
3488                 return ret;
3489         return mlx5_fdir_filter_add(dev, fdir_filter);
3490 }
3491
3492 /**
3493  * Flush all filters.
3494  *
3495  * @param dev
3496  *   Pointer to Ethernet device.
3497  */
3498 static void
3499 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3500 {
3501         struct priv *priv = dev->data->dev_private;
3502
3503         mlx5_flow_list_flush(dev, &priv->flows);
3504 }
3505
3506 /**
3507  * Get flow director information.
3508  *
3509  * @param dev
3510  *   Pointer to Ethernet device.
3511  * @param[out] fdir_info
3512  *   Resulting flow director information.
3513  */
3514 static void
3515 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3516 {
3517         struct priv *priv = dev->data->dev_private;
3518         struct rte_eth_fdir_masks *mask =
3519                 &priv->dev->data->dev_conf.fdir_conf.mask;
3520
3521         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3522         fdir_info->guarant_spc = 0;
3523         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3524         fdir_info->max_flexpayload = 0;
3525         fdir_info->flow_types_mask[0] = 0;
3526         fdir_info->flex_payload_unit = 0;
3527         fdir_info->max_flex_payload_segment_num = 0;
3528         fdir_info->flex_payload_limit = 0;
3529         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3530 }
3531
3532 /**
3533  * Deal with flow director operations.
3534  *
3535  * @param dev
3536  *   Pointer to Ethernet device.
3537  * @param filter_op
3538  *   Operation to perform.
3539  * @param arg
3540  *   Pointer to operation-specific structure.
3541  *
3542  * @return
3543  *   0 on success, a negative errno value otherwise and rte_errno is set.
3544  */
3545 static int
3546 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3547                     void *arg)
3548 {
3549         struct priv *priv = dev->data->dev_private;
3550         enum rte_fdir_mode fdir_mode =
3551                 priv->dev->data->dev_conf.fdir_conf.mode;
3552
3553         if (filter_op == RTE_ETH_FILTER_NOP)
3554                 return 0;
3555         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3556             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3557                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3558                         dev->data->port_id, fdir_mode);
3559                 rte_errno = EINVAL;
3560                 return -rte_errno;
3561         }
3562         switch (filter_op) {
3563         case RTE_ETH_FILTER_ADD:
3564                 return mlx5_fdir_filter_add(dev, arg);
3565         case RTE_ETH_FILTER_UPDATE:
3566                 return mlx5_fdir_filter_update(dev, arg);
3567         case RTE_ETH_FILTER_DELETE:
3568                 return mlx5_fdir_filter_delete(dev, arg);
3569         case RTE_ETH_FILTER_FLUSH:
3570                 mlx5_fdir_filter_flush(dev);
3571                 break;
3572         case RTE_ETH_FILTER_INFO:
3573                 mlx5_fdir_info_get(dev, arg);
3574                 break;
3575         default:
3576                 DRV_LOG(DEBUG, "port %u unknown operation %u",
3577                         dev->data->port_id, filter_op);
3578                 rte_errno = EINVAL;
3579                 return -rte_errno;
3580         }
3581         return 0;
3582 }
3583
3584 /**
3585  * Manage filter operations.
3586  *
3587  * @param dev
3588  *   Pointer to Ethernet device structure.
3589  * @param filter_type
3590  *   Filter type.
3591  * @param filter_op
3592  *   Operation to perform.
3593  * @param arg
3594  *   Pointer to operation-specific structure.
3595  *
3596  * @return
3597  *   0 on success, a negative errno value otherwise and rte_errno is set.
3598  */
3599 int
3600 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3601                      enum rte_filter_type filter_type,
3602                      enum rte_filter_op filter_op,
3603                      void *arg)
3604 {
3605         switch (filter_type) {
3606         case RTE_ETH_FILTER_GENERIC:
3607                 if (filter_op != RTE_ETH_FILTER_GET) {
3608                         rte_errno = EINVAL;
3609                         return -rte_errno;
3610                 }
3611                 *(const void **)arg = &mlx5_flow_ops;
3612                 return 0;
3613         case RTE_ETH_FILTER_FDIR:
3614                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3615         default:
3616                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3617                         dev->data->port_id, filter_type);
3618                 rte_errno = ENOTSUP;
3619                 return -rte_errno;
3620         }
3621         return 0;
3622 }
3623
3624 /**
3625  * Detect number of Verbs flow priorities supported.
3626  *
3627  * @param dev
3628  *   Pointer to Ethernet device.
3629  *
3630  * @return
3631  *   number of supported Verbs flow priority.
3632  */
3633 unsigned int
3634 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3635 {
3636         struct priv *priv = dev->data->dev_private;
3637         unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3638         struct {
3639                 struct ibv_flow_attr attr;
3640                 struct ibv_flow_spec_eth eth;
3641                 struct ibv_flow_spec_action_drop drop;
3642         } flow_attr = {
3643                 .attr = {
3644                         .num_of_specs = 2,
3645                 },
3646                 .eth = {
3647                         .type = IBV_FLOW_SPEC_ETH,
3648                         .size = sizeof(struct ibv_flow_spec_eth),
3649                 },
3650                 .drop = {
3651                         .size = sizeof(struct ibv_flow_spec_action_drop),
3652                         .type = IBV_FLOW_SPEC_ACTION_DROP,
3653                 },
3654         };
3655         struct ibv_flow *flow;
3656
3657         do {
3658                 flow_attr.attr.priority = verb_priorities - 1;
3659                 flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3660                                               &flow_attr.attr);
3661                 if (flow) {
3662                         claim_zero(mlx5_glue->destroy_flow(flow));
3663                         /* Try more priorities. */
3664                         verb_priorities *= 2;
3665                 } else {
3666                         /* Failed, restore last right number. */
3667                         verb_priorities /= 2;
3668                         break;
3669                 }
3670         } while (1);
3671         DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3672                 " user flow priorities: %d",
3673                 dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3674         return verb_priorities;
3675 }