a886f7eb1b38da54d1838b947b058e3359606466
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
36
37 /* Internet Protocol versions. */
38 #define MLX5_IPV4 4
39 #define MLX5_IPV6 6
40 #define MLX5_GRE 47
41
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
44         int dummy;
45 };
46 #endif
47
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
51
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54         struct rte_eth_dev *dev; /** Ethernet device. */
55         struct mlx5_flow_parse *parser; /** Parser context. */
56         struct rte_flow_error *error; /** Error context. */
57 };
58
59 static int
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61                      const void *default_mask,
62                      struct mlx5_flow_data *data);
63
64 static int
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66                       const void *default_mask,
67                       struct mlx5_flow_data *data);
68
69 static int
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71                       const void *default_mask,
72                       struct mlx5_flow_data *data);
73
74 static int
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76                       const void *default_mask,
77                       struct mlx5_flow_data *data);
78
79 static int
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81                      const void *default_mask,
82                      struct mlx5_flow_data *data);
83
84 static int
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86                      const void *default_mask,
87                      struct mlx5_flow_data *data);
88
89 static int
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91                        const void *default_mask,
92                        struct mlx5_flow_data *data);
93
94 static int
95 mlx5_flow_create_gre(const struct rte_flow_item *item,
96                      const void *default_mask,
97                      struct mlx5_flow_data *data);
98
99 struct mlx5_flow_parse;
100
101 static void
102 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
103                       unsigned int size);
104
105 static int
106 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
107
108 static int
109 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
110
111 /* Hash RX queue types. */
112 enum hash_rxq_type {
113         HASH_RXQ_TCPV4,
114         HASH_RXQ_UDPV4,
115         HASH_RXQ_IPV4,
116         HASH_RXQ_TCPV6,
117         HASH_RXQ_UDPV6,
118         HASH_RXQ_IPV6,
119         HASH_RXQ_ETH,
120         HASH_RXQ_TUNNEL,
121 };
122
123 /* Initialization data for hash RX queue. */
124 struct hash_rxq_init {
125         uint64_t hash_fields; /* Fields that participate in the hash. */
126         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
127         unsigned int flow_priority; /* Flow priority to use. */
128         unsigned int ip_version; /* Internet protocol. */
129 };
130
131 /* Initialization data for hash RX queues. */
132 const struct hash_rxq_init hash_rxq_init[] = {
133         [HASH_RXQ_TCPV4] = {
134                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135                                 IBV_RX_HASH_DST_IPV4 |
136                                 IBV_RX_HASH_SRC_PORT_TCP |
137                                 IBV_RX_HASH_DST_PORT_TCP),
138                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
139                 .flow_priority = 0,
140                 .ip_version = MLX5_IPV4,
141         },
142         [HASH_RXQ_UDPV4] = {
143                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
144                                 IBV_RX_HASH_DST_IPV4 |
145                                 IBV_RX_HASH_SRC_PORT_UDP |
146                                 IBV_RX_HASH_DST_PORT_UDP),
147                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
148                 .flow_priority = 0,
149                 .ip_version = MLX5_IPV4,
150         },
151         [HASH_RXQ_IPV4] = {
152                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
153                                 IBV_RX_HASH_DST_IPV4),
154                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
155                                 ETH_RSS_FRAG_IPV4),
156                 .flow_priority = 1,
157                 .ip_version = MLX5_IPV4,
158         },
159         [HASH_RXQ_TCPV6] = {
160                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161                                 IBV_RX_HASH_DST_IPV6 |
162                                 IBV_RX_HASH_SRC_PORT_TCP |
163                                 IBV_RX_HASH_DST_PORT_TCP),
164                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
165                 .flow_priority = 0,
166                 .ip_version = MLX5_IPV6,
167         },
168         [HASH_RXQ_UDPV6] = {
169                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
170                                 IBV_RX_HASH_DST_IPV6 |
171                                 IBV_RX_HASH_SRC_PORT_UDP |
172                                 IBV_RX_HASH_DST_PORT_UDP),
173                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
174                 .flow_priority = 0,
175                 .ip_version = MLX5_IPV6,
176         },
177         [HASH_RXQ_IPV6] = {
178                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
179                                 IBV_RX_HASH_DST_IPV6),
180                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
181                                 ETH_RSS_FRAG_IPV6),
182                 .flow_priority = 1,
183                 .ip_version = MLX5_IPV6,
184         },
185         [HASH_RXQ_ETH] = {
186                 .hash_fields = 0,
187                 .dpdk_rss_hf = 0,
188                 .flow_priority = 2,
189         },
190 };
191
192 /* Number of entries in hash_rxq_init[]. */
193 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
194
195 /** Structure for holding counter stats. */
196 struct mlx5_flow_counter_stats {
197         uint64_t hits; /**< Number of packets matched by the rule. */
198         uint64_t bytes; /**< Number of bytes matched by the rule. */
199 };
200
201 /** Structure for Drop queue. */
202 struct mlx5_hrxq_drop {
203         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
204         struct ibv_qp *qp; /**< Verbs queue pair. */
205         struct ibv_wq *wq; /**< Verbs work queue. */
206         struct ibv_cq *cq; /**< Verbs completion queue. */
207 };
208
209 /* Flows structures. */
210 struct mlx5_flow {
211         uint64_t hash_fields; /**< Fields that participate in the hash. */
212         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
213         struct ibv_flow *ibv_flow; /**< Verbs flow. */
214         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
215 };
216
217 /* Drop flows structures. */
218 struct mlx5_flow_drop {
219         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
220         struct ibv_flow *ibv_flow; /**< Verbs flow. */
221 };
222
223 struct rte_flow {
224         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
225         uint32_t mark:1; /**< Set if the flow is marked. */
226         uint32_t drop:1; /**< Drop queue. */
227         struct rte_flow_action_rss rss_conf; /**< RSS configuration */
228         uint16_t (*queues)[]; /**< Queues indexes to use. */
229         uint8_t rss_key[40]; /**< copy of the RSS key. */
230         uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
231         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
232         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
233         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
234         /**< Flow with Rx queue. */
235 };
236
237 /** Static initializer for items. */
238 #define ITEMS(...) \
239         (const enum rte_flow_item_type []){ \
240                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
241         }
242
243 #define IS_TUNNEL(type) ( \
244         (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
245         (type) == RTE_FLOW_ITEM_TYPE_GRE)
246
247 const uint32_t flow_ptype[] = {
248         [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
249         [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
250 };
251
252 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
253
254 const uint32_t ptype_ext[] = {
255         [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
256                                               RTE_PTYPE_L4_UDP,
257         [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
258 };
259
260 /** Structure to generate a simple graph of layers supported by the NIC. */
261 struct mlx5_flow_items {
262         /** List of possible actions for these items. */
263         const enum rte_flow_action_type *const actions;
264         /** Bit-masks corresponding to the possibilities for the item. */
265         const void *mask;
266         /**
267          * Default bit-masks to use when item->mask is not provided. When
268          * \default_mask is also NULL, the full supported bit-mask (\mask) is
269          * used instead.
270          */
271         const void *default_mask;
272         /** Bit-masks size in bytes. */
273         const unsigned int mask_sz;
274         /**
275          * Conversion function from rte_flow to NIC specific flow.
276          *
277          * @param item
278          *   rte_flow item to convert.
279          * @param default_mask
280          *   Default bit-masks to use when item->mask is not provided.
281          * @param data
282          *   Internal structure to store the conversion.
283          *
284          * @return
285          *   0 on success, a negative errno value otherwise and rte_errno is
286          *   set.
287          */
288         int (*convert)(const struct rte_flow_item *item,
289                        const void *default_mask,
290                        struct mlx5_flow_data *data);
291         /** Size in bytes of the destination structure. */
292         const unsigned int dst_sz;
293         /** List of possible following items.  */
294         const enum rte_flow_item_type *const items;
295 };
296
297 /** Valid action for this PMD. */
298 static const enum rte_flow_action_type valid_actions[] = {
299         RTE_FLOW_ACTION_TYPE_DROP,
300         RTE_FLOW_ACTION_TYPE_QUEUE,
301         RTE_FLOW_ACTION_TYPE_MARK,
302         RTE_FLOW_ACTION_TYPE_FLAG,
303 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
304         RTE_FLOW_ACTION_TYPE_COUNT,
305 #endif
306         RTE_FLOW_ACTION_TYPE_END,
307 };
308
309 /** Graph of supported items and associated actions. */
310 static const struct mlx5_flow_items mlx5_flow_items[] = {
311         [RTE_FLOW_ITEM_TYPE_END] = {
312                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
313                                RTE_FLOW_ITEM_TYPE_VXLAN,
314                                RTE_FLOW_ITEM_TYPE_GRE),
315         },
316         [RTE_FLOW_ITEM_TYPE_ETH] = {
317                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
318                                RTE_FLOW_ITEM_TYPE_IPV4,
319                                RTE_FLOW_ITEM_TYPE_IPV6),
320                 .actions = valid_actions,
321                 .mask = &(const struct rte_flow_item_eth){
322                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
323                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
324                         .type = -1,
325                 },
326                 .default_mask = &rte_flow_item_eth_mask,
327                 .mask_sz = sizeof(struct rte_flow_item_eth),
328                 .convert = mlx5_flow_create_eth,
329                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
330         },
331         [RTE_FLOW_ITEM_TYPE_VLAN] = {
332                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
333                                RTE_FLOW_ITEM_TYPE_IPV6),
334                 .actions = valid_actions,
335                 .mask = &(const struct rte_flow_item_vlan){
336                         .tci = -1,
337                         .inner_type = -1,
338                 },
339                 .default_mask = &rte_flow_item_vlan_mask,
340                 .mask_sz = sizeof(struct rte_flow_item_vlan),
341                 .convert = mlx5_flow_create_vlan,
342                 .dst_sz = 0,
343         },
344         [RTE_FLOW_ITEM_TYPE_IPV4] = {
345                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
346                                RTE_FLOW_ITEM_TYPE_TCP,
347                                RTE_FLOW_ITEM_TYPE_GRE),
348                 .actions = valid_actions,
349                 .mask = &(const struct rte_flow_item_ipv4){
350                         .hdr = {
351                                 .src_addr = -1,
352                                 .dst_addr = -1,
353                                 .type_of_service = -1,
354                                 .next_proto_id = -1,
355                         },
356                 },
357                 .default_mask = &rte_flow_item_ipv4_mask,
358                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
359                 .convert = mlx5_flow_create_ipv4,
360                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
361         },
362         [RTE_FLOW_ITEM_TYPE_IPV6] = {
363                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
364                                RTE_FLOW_ITEM_TYPE_TCP,
365                                RTE_FLOW_ITEM_TYPE_GRE),
366                 .actions = valid_actions,
367                 .mask = &(const struct rte_flow_item_ipv6){
368                         .hdr = {
369                                 .src_addr = {
370                                         0xff, 0xff, 0xff, 0xff,
371                                         0xff, 0xff, 0xff, 0xff,
372                                         0xff, 0xff, 0xff, 0xff,
373                                         0xff, 0xff, 0xff, 0xff,
374                                 },
375                                 .dst_addr = {
376                                         0xff, 0xff, 0xff, 0xff,
377                                         0xff, 0xff, 0xff, 0xff,
378                                         0xff, 0xff, 0xff, 0xff,
379                                         0xff, 0xff, 0xff, 0xff,
380                                 },
381                                 .vtc_flow = -1,
382                                 .proto = -1,
383                                 .hop_limits = -1,
384                         },
385                 },
386                 .default_mask = &rte_flow_item_ipv6_mask,
387                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
388                 .convert = mlx5_flow_create_ipv6,
389                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
390         },
391         [RTE_FLOW_ITEM_TYPE_UDP] = {
392                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
393                 .actions = valid_actions,
394                 .mask = &(const struct rte_flow_item_udp){
395                         .hdr = {
396                                 .src_port = -1,
397                                 .dst_port = -1,
398                         },
399                 },
400                 .default_mask = &rte_flow_item_udp_mask,
401                 .mask_sz = sizeof(struct rte_flow_item_udp),
402                 .convert = mlx5_flow_create_udp,
403                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
404         },
405         [RTE_FLOW_ITEM_TYPE_TCP] = {
406                 .actions = valid_actions,
407                 .mask = &(const struct rte_flow_item_tcp){
408                         .hdr = {
409                                 .src_port = -1,
410                                 .dst_port = -1,
411                         },
412                 },
413                 .default_mask = &rte_flow_item_tcp_mask,
414                 .mask_sz = sizeof(struct rte_flow_item_tcp),
415                 .convert = mlx5_flow_create_tcp,
416                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
417         },
418         [RTE_FLOW_ITEM_TYPE_GRE] = {
419                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
420                                RTE_FLOW_ITEM_TYPE_IPV4,
421                                RTE_FLOW_ITEM_TYPE_IPV6),
422                 .actions = valid_actions,
423                 .mask = &(const struct rte_flow_item_gre){
424                         .protocol = -1,
425                 },
426                 .default_mask = &rte_flow_item_gre_mask,
427                 .mask_sz = sizeof(struct rte_flow_item_gre),
428                 .convert = mlx5_flow_create_gre,
429                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
430         },
431         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
432                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
433                                RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
434                                RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
435                 .actions = valid_actions,
436                 .mask = &(const struct rte_flow_item_vxlan){
437                         .vni = "\xff\xff\xff",
438                 },
439                 .default_mask = &rte_flow_item_vxlan_mask,
440                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
441                 .convert = mlx5_flow_create_vxlan,
442                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
443         },
444 };
445
446 /** Structure to pass to the conversion function. */
447 struct mlx5_flow_parse {
448         uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
449         uint32_t create:1;
450         /**< Whether resources should remain after a validate. */
451         uint32_t drop:1; /**< Target is a drop queue. */
452         uint32_t mark:1; /**< Mark is present in the flow. */
453         uint32_t count:1; /**< Count is present in the flow. */
454         uint32_t mark_id; /**< Mark identifier. */
455         struct rte_flow_action_rss rss_conf; /**< RSS configuration */
456         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
457         uint8_t rss_key[40]; /**< copy of the RSS key. */
458         enum hash_rxq_type layer; /**< Last pattern layer detected. */
459         enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
460         uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
461         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
462         struct {
463                 struct ibv_flow_attr *ibv_attr;
464                 /**< Pointer to Verbs attributes. */
465                 unsigned int offset;
466                 /**< Current position or total size of the attribute. */
467         } queue[RTE_DIM(hash_rxq_init)];
468 };
469
470 static const struct rte_flow_ops mlx5_flow_ops = {
471         .validate = mlx5_flow_validate,
472         .create = mlx5_flow_create,
473         .destroy = mlx5_flow_destroy,
474         .flush = mlx5_flow_flush,
475 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
476         .query = mlx5_flow_query,
477 #else
478         .query = NULL,
479 #endif
480         .isolate = mlx5_flow_isolate,
481 };
482
483 /* Convert FDIR request to Generic flow. */
484 struct mlx5_fdir {
485         struct rte_flow_attr attr;
486         struct rte_flow_action actions[2];
487         struct rte_flow_item items[4];
488         struct rte_flow_item_eth l2;
489         struct rte_flow_item_eth l2_mask;
490         union {
491                 struct rte_flow_item_ipv4 ipv4;
492                 struct rte_flow_item_ipv6 ipv6;
493         } l3;
494         union {
495                 struct rte_flow_item_ipv4 ipv4;
496                 struct rte_flow_item_ipv6 ipv6;
497         } l3_mask;
498         union {
499                 struct rte_flow_item_udp udp;
500                 struct rte_flow_item_tcp tcp;
501         } l4;
502         union {
503                 struct rte_flow_item_udp udp;
504                 struct rte_flow_item_tcp tcp;
505         } l4_mask;
506         struct rte_flow_action_queue queue;
507 };
508
509 /* Verbs specification header. */
510 struct ibv_spec_header {
511         enum ibv_flow_spec_type type;
512         uint16_t size;
513 };
514
515 /**
516  * Check support for a given item.
517  *
518  * @param item[in]
519  *   Item specification.
520  * @param mask[in]
521  *   Bit-masks covering supported fields to compare with spec, last and mask in
522  *   \item.
523  * @param size
524  *   Bit-Mask size in bytes.
525  *
526  * @return
527  *   0 on success, a negative errno value otherwise and rte_errno is set.
528  */
529 static int
530 mlx5_flow_item_validate(const struct rte_flow_item *item,
531                         const uint8_t *mask, unsigned int size)
532 {
533         if (!item->spec && (item->mask || item->last)) {
534                 rte_errno = EINVAL;
535                 return -rte_errno;
536         }
537         if (item->spec && !item->mask) {
538                 unsigned int i;
539                 const uint8_t *spec = item->spec;
540
541                 for (i = 0; i < size; ++i)
542                         if ((spec[i] | mask[i]) != mask[i]) {
543                                 rte_errno = EINVAL;
544                                 return -rte_errno;
545                         }
546         }
547         if (item->last && !item->mask) {
548                 unsigned int i;
549                 const uint8_t *spec = item->last;
550
551                 for (i = 0; i < size; ++i)
552                         if ((spec[i] | mask[i]) != mask[i]) {
553                                 rte_errno = EINVAL;
554                                 return -rte_errno;
555                         }
556         }
557         if (item->mask) {
558                 unsigned int i;
559                 const uint8_t *spec = item->spec;
560
561                 for (i = 0; i < size; ++i)
562                         if ((spec[i] | mask[i]) != mask[i]) {
563                                 rte_errno = EINVAL;
564                                 return -rte_errno;
565                         }
566         }
567         if (item->spec && item->last) {
568                 uint8_t spec[size];
569                 uint8_t last[size];
570                 const uint8_t *apply = mask;
571                 unsigned int i;
572                 int ret;
573
574                 if (item->mask)
575                         apply = item->mask;
576                 for (i = 0; i < size; ++i) {
577                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
578                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
579                 }
580                 ret = memcmp(spec, last, size);
581                 if (ret != 0) {
582                         rte_errno = EINVAL;
583                         return -rte_errno;
584                 }
585         }
586         return 0;
587 }
588
589 /**
590  * Extract attribute to the parser.
591  *
592  * @param[in] attr
593  *   Flow rule attributes.
594  * @param[out] error
595  *   Perform verbose error reporting if not NULL.
596  *
597  * @return
598  *   0 on success, a negative errno value otherwise and rte_errno is set.
599  */
600 static int
601 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
602                              struct rte_flow_error *error)
603 {
604         if (attr->group) {
605                 rte_flow_error_set(error, ENOTSUP,
606                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
607                                    NULL,
608                                    "groups are not supported");
609                 return -rte_errno;
610         }
611         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
612                 rte_flow_error_set(error, ENOTSUP,
613                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
614                                    NULL,
615                                    "priorities are not supported");
616                 return -rte_errno;
617         }
618         if (attr->egress) {
619                 rte_flow_error_set(error, ENOTSUP,
620                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
621                                    NULL,
622                                    "egress is not supported");
623                 return -rte_errno;
624         }
625         if (attr->transfer) {
626                 rte_flow_error_set(error, ENOTSUP,
627                                    RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
628                                    NULL,
629                                    "transfer is not supported");
630                 return -rte_errno;
631         }
632         if (!attr->ingress) {
633                 rte_flow_error_set(error, ENOTSUP,
634                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
635                                    NULL,
636                                    "only ingress is supported");
637                 return -rte_errno;
638         }
639         return 0;
640 }
641
642 /**
643  * Extract actions request to the parser.
644  *
645  * @param dev
646  *   Pointer to Ethernet device.
647  * @param[in] actions
648  *   Associated actions (list terminated by the END action).
649  * @param[out] error
650  *   Perform verbose error reporting if not NULL.
651  * @param[in, out] parser
652  *   Internal parser structure.
653  *
654  * @return
655  *   0 on success, a negative errno value otherwise and rte_errno is set.
656  */
657 static int
658 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
659                           const struct rte_flow_action actions[],
660                           struct rte_flow_error *error,
661                           struct mlx5_flow_parse *parser)
662 {
663         enum { FATE = 1, MARK = 2, COUNT = 4, };
664         uint32_t overlap = 0;
665         struct priv *priv = dev->data->dev_private;
666
667         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
668                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
669                         continue;
670                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
671                         if (overlap & FATE)
672                                 goto exit_action_overlap;
673                         overlap |= FATE;
674                         parser->drop = 1;
675                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
676                         const struct rte_flow_action_queue *queue =
677                                 (const struct rte_flow_action_queue *)
678                                 actions->conf;
679
680                         if (overlap & FATE)
681                                 goto exit_action_overlap;
682                         overlap |= FATE;
683                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
684                                 goto exit_action_not_supported;
685                         parser->queues[0] = queue->index;
686                         parser->rss_conf = (struct rte_flow_action_rss){
687                                 .queue_num = 1,
688                                 .queue = parser->queues,
689                         };
690                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
691                         const struct rte_flow_action_rss *rss =
692                                 (const struct rte_flow_action_rss *)
693                                 actions->conf;
694                         const uint8_t *rss_key;
695                         uint32_t rss_key_len;
696                         uint16_t n;
697
698                         if (overlap & FATE)
699                                 goto exit_action_overlap;
700                         overlap |= FATE;
701                         if (rss->func &&
702                             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
703                                 rte_flow_error_set(error, EINVAL,
704                                                    RTE_FLOW_ERROR_TYPE_ACTION,
705                                                    actions,
706                                                    "the only supported RSS hash"
707                                                    " function is Toeplitz");
708                                 return -rte_errno;
709                         }
710                         if (rss->level) {
711                                 rte_flow_error_set(error, EINVAL,
712                                                    RTE_FLOW_ERROR_TYPE_ACTION,
713                                                    actions,
714                                                    "a nonzero RSS encapsulation"
715                                                    " level is not supported");
716                                 return -rte_errno;
717                         }
718                         if (rss->types & MLX5_RSS_HF_MASK) {
719                                 rte_flow_error_set(error, EINVAL,
720                                                    RTE_FLOW_ERROR_TYPE_ACTION,
721                                                    actions,
722                                                    "unsupported RSS type"
723                                                    " requested");
724                                 return -rte_errno;
725                         }
726                         if (rss->key_len) {
727                                 rss_key_len = rss->key_len;
728                                 rss_key = rss->key;
729                         } else {
730                                 rss_key_len = rss_hash_default_key_len;
731                                 rss_key = rss_hash_default_key;
732                         }
733                         if (rss_key_len != RTE_DIM(parser->rss_key)) {
734                                 rte_flow_error_set(error, EINVAL,
735                                                    RTE_FLOW_ERROR_TYPE_ACTION,
736                                                    actions,
737                                                    "RSS hash key must be"
738                                                    " exactly 40 bytes long");
739                                 return -rte_errno;
740                         }
741                         if (!rss->queue_num) {
742                                 rte_flow_error_set(error, EINVAL,
743                                                    RTE_FLOW_ERROR_TYPE_ACTION,
744                                                    actions,
745                                                    "no valid queues");
746                                 return -rte_errno;
747                         }
748                         if (rss->queue_num > RTE_DIM(parser->queues)) {
749                                 rte_flow_error_set(error, EINVAL,
750                                                    RTE_FLOW_ERROR_TYPE_ACTION,
751                                                    actions,
752                                                    "too many queues for RSS"
753                                                    " context");
754                                 return -rte_errno;
755                         }
756                         for (n = 0; n < rss->queue_num; ++n) {
757                                 if (rss->queue[n] >= priv->rxqs_n) {
758                                         rte_flow_error_set(error, EINVAL,
759                                                    RTE_FLOW_ERROR_TYPE_ACTION,
760                                                    actions,
761                                                    "queue id > number of"
762                                                    " queues");
763                                         return -rte_errno;
764                                 }
765                         }
766                         parser->rss_conf = (struct rte_flow_action_rss){
767                                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
768                                 .level = 0,
769                                 .types = rss->types,
770                                 .key_len = rss_key_len,
771                                 .queue_num = rss->queue_num,
772                                 .key = memcpy(parser->rss_key, rss_key,
773                                               sizeof(*rss_key) * rss_key_len),
774                                 .queue = memcpy(parser->queues, rss->queue,
775                                                 sizeof(*rss->queue) *
776                                                 rss->queue_num),
777                         };
778                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
779                         const struct rte_flow_action_mark *mark =
780                                 (const struct rte_flow_action_mark *)
781                                 actions->conf;
782
783                         if (overlap & MARK)
784                                 goto exit_action_overlap;
785                         overlap |= MARK;
786                         if (!mark) {
787                                 rte_flow_error_set(error, EINVAL,
788                                                    RTE_FLOW_ERROR_TYPE_ACTION,
789                                                    actions,
790                                                    "mark must be defined");
791                                 return -rte_errno;
792                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
793                                 rte_flow_error_set(error, ENOTSUP,
794                                                    RTE_FLOW_ERROR_TYPE_ACTION,
795                                                    actions,
796                                                    "mark must be between 0"
797                                                    " and 16777199");
798                                 return -rte_errno;
799                         }
800                         parser->mark = 1;
801                         parser->mark_id = mark->id;
802                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
803                         if (overlap & MARK)
804                                 goto exit_action_overlap;
805                         overlap |= MARK;
806                         parser->mark = 1;
807                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
808                            priv->config.flow_counter_en) {
809                         if (overlap & COUNT)
810                                 goto exit_action_overlap;
811                         overlap |= COUNT;
812                         parser->count = 1;
813                 } else {
814                         goto exit_action_not_supported;
815                 }
816         }
817         /* When fate is unknown, drop traffic. */
818         if (!(overlap & FATE))
819                 parser->drop = 1;
820         if (parser->drop && parser->mark)
821                 parser->mark = 0;
822         if (!parser->rss_conf.queue_num && !parser->drop) {
823                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
824                                    NULL, "no valid action");
825                 return -rte_errno;
826         }
827         return 0;
828 exit_action_not_supported:
829         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
830                            actions, "action not supported");
831         return -rte_errno;
832 exit_action_overlap:
833         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
834                            actions, "overlapping actions are not supported");
835         return -rte_errno;
836 }
837
838 /**
839  * Validate items.
840  *
841  * @param[in] items
842  *   Pattern specification (list terminated by the END pattern item).
843  * @param[out] error
844  *   Perform verbose error reporting if not NULL.
845  * @param[in, out] parser
846  *   Internal parser structure.
847  *
848  * @return
849  *   0 on success, a negative errno value otherwise and rte_errno is set.
850  */
851 static int
852 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
853                                  struct rte_flow_error *error,
854                                  struct mlx5_flow_parse *parser)
855 {
856         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
857         unsigned int i;
858         int ret = 0;
859
860         /* Initialise the offsets to start after verbs attribute. */
861         for (i = 0; i != hash_rxq_init_n; ++i)
862                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
863         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
864                 const struct mlx5_flow_items *token = NULL;
865                 unsigned int n;
866
867                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
868                         continue;
869                 for (i = 0;
870                      cur_item->items &&
871                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
872                      ++i) {
873                         if (cur_item->items[i] == items->type) {
874                                 token = &mlx5_flow_items[items->type];
875                                 break;
876                         }
877                 }
878                 if (!token) {
879                         ret = -ENOTSUP;
880                         goto exit_item_not_supported;
881                 }
882                 cur_item = token;
883                 ret = mlx5_flow_item_validate(items,
884                                               (const uint8_t *)cur_item->mask,
885                                               cur_item->mask_sz);
886                 if (ret)
887                         goto exit_item_not_supported;
888                 if (IS_TUNNEL(items->type)) {
889                         if (parser->tunnel) {
890                                 rte_flow_error_set(error, ENOTSUP,
891                                                    RTE_FLOW_ERROR_TYPE_ITEM,
892                                                    items,
893                                                    "Cannot recognize multiple"
894                                                    " tunnel encapsulations.");
895                                 return -rte_errno;
896                         }
897                         parser->inner = IBV_FLOW_SPEC_INNER;
898                         parser->tunnel = flow_ptype[items->type];
899                 }
900                 if (parser->drop) {
901                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
902                 } else {
903                         for (n = 0; n != hash_rxq_init_n; ++n)
904                                 parser->queue[n].offset += cur_item->dst_sz;
905                 }
906         }
907         if (parser->drop) {
908                 parser->queue[HASH_RXQ_ETH].offset +=
909                         sizeof(struct ibv_flow_spec_action_drop);
910         }
911         if (parser->mark) {
912                 for (i = 0; i != hash_rxq_init_n; ++i)
913                         parser->queue[i].offset +=
914                                 sizeof(struct ibv_flow_spec_action_tag);
915         }
916         if (parser->count) {
917                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
918
919                 for (i = 0; i != hash_rxq_init_n; ++i)
920                         parser->queue[i].offset += size;
921         }
922         return 0;
923 exit_item_not_supported:
924         return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
925                                   items, "item not supported");
926 }
927
928 /**
929  * Allocate memory space to store verbs flow attributes.
930  *
931  * @param[in] size
932  *   Amount of byte to allocate.
933  * @param[out] error
934  *   Perform verbose error reporting if not NULL.
935  *
936  * @return
937  *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
938  */
939 static struct ibv_flow_attr *
940 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
941 {
942         struct ibv_flow_attr *ibv_attr;
943
944         ibv_attr = rte_calloc(__func__, 1, size, 0);
945         if (!ibv_attr) {
946                 rte_flow_error_set(error, ENOMEM,
947                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
948                                    NULL,
949                                    "cannot allocate verbs spec attributes");
950                 return NULL;
951         }
952         return ibv_attr;
953 }
954
955 /**
956  * Make inner packet matching with an higher priority from the non Inner
957  * matching.
958  *
959  * @param dev
960  *   Pointer to Ethernet device.
961  * @param[in, out] parser
962  *   Internal parser structure.
963  * @param attr
964  *   User flow attribute.
965  */
966 static void
967 mlx5_flow_update_priority(struct rte_eth_dev *dev,
968                           struct mlx5_flow_parse *parser,
969                           const struct rte_flow_attr *attr)
970 {
971         struct priv *priv = dev->data->dev_private;
972         unsigned int i;
973         uint16_t priority;
974
975         /*                      8 priorities    >= 16 priorities
976          * Control flow:        4-7             8-15
977          * User normal flow:    1-3             4-7
978          * User tunnel flow:    0-2             0-3
979          */
980         priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
981         if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
982                 priority /= 2;
983         /*
984          * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
985          * priorities, lower 4 otherwise.
986          */
987         if (!parser->inner) {
988                 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
989                         priority += 1;
990                 else
991                         priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
992         }
993         if (parser->drop) {
994                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
995                                 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
996                 return;
997         }
998         for (i = 0; i != hash_rxq_init_n; ++i) {
999                 if (!parser->queue[i].ibv_attr)
1000                         continue;
1001                 parser->queue[i].ibv_attr->priority = priority +
1002                                 hash_rxq_init[i].flow_priority;
1003         }
1004 }
1005
1006 /**
1007  * Finalise verbs flow attributes.
1008  *
1009  * @param[in, out] parser
1010  *   Internal parser structure.
1011  */
1012 static void
1013 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1014 {
1015         const unsigned int ipv4 =
1016                 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
1017         const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
1018         const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
1019         const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
1020         const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
1021         const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
1022         unsigned int i;
1023
1024         /* Remove any other flow not matching the pattern. */
1025         if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
1026                 for (i = 0; i != hash_rxq_init_n; ++i) {
1027                         if (i == HASH_RXQ_ETH)
1028                                 continue;
1029                         rte_free(parser->queue[i].ibv_attr);
1030                         parser->queue[i].ibv_attr = NULL;
1031                 }
1032                 return;
1033         }
1034         if (parser->layer == HASH_RXQ_ETH) {
1035                 goto fill;
1036         } else {
1037                 /*
1038                  * This layer becomes useless as the pattern define under
1039                  * layers.
1040                  */
1041                 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
1042                 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1043         }
1044         /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
1045         for (i = ohmin; i != (ohmax + 1); ++i) {
1046                 if (!parser->queue[i].ibv_attr)
1047                         continue;
1048                 rte_free(parser->queue[i].ibv_attr);
1049                 parser->queue[i].ibv_attr = NULL;
1050         }
1051         /* Remove impossible flow according to the RSS configuration. */
1052         if (hash_rxq_init[parser->layer].dpdk_rss_hf &
1053             parser->rss_conf.types) {
1054                 /* Remove any other flow. */
1055                 for (i = hmin; i != (hmax + 1); ++i) {
1056                         if ((i == parser->layer) ||
1057                              (!parser->queue[i].ibv_attr))
1058                                 continue;
1059                         rte_free(parser->queue[i].ibv_attr);
1060                         parser->queue[i].ibv_attr = NULL;
1061                 }
1062         } else  if (!parser->queue[ip].ibv_attr) {
1063                 /* no RSS possible with the current configuration. */
1064                 parser->rss_conf.queue_num = 1;
1065                 return;
1066         }
1067 fill:
1068         /*
1069          * Fill missing layers in verbs specifications, or compute the correct
1070          * offset to allocate the memory space for the attributes and
1071          * specifications.
1072          */
1073         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1074                 union {
1075                         struct ibv_flow_spec_ipv4_ext ipv4;
1076                         struct ibv_flow_spec_ipv6 ipv6;
1077                         struct ibv_flow_spec_tcp_udp udp_tcp;
1078                 } specs;
1079                 void *dst;
1080                 uint16_t size;
1081
1082                 if (i == parser->layer)
1083                         continue;
1084                 if (parser->layer == HASH_RXQ_ETH) {
1085                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1086                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1087                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1088                                         .type = IBV_FLOW_SPEC_IPV4_EXT,
1089                                         .size = size,
1090                                 };
1091                         } else {
1092                                 size = sizeof(struct ibv_flow_spec_ipv6);
1093                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1094                                         .type = IBV_FLOW_SPEC_IPV6,
1095                                         .size = size,
1096                                 };
1097                         }
1098                         if (parser->queue[i].ibv_attr) {
1099                                 dst = (void *)((uintptr_t)
1100                                                parser->queue[i].ibv_attr +
1101                                                parser->queue[i].offset);
1102                                 memcpy(dst, &specs, size);
1103                                 ++parser->queue[i].ibv_attr->num_of_specs;
1104                         }
1105                         parser->queue[i].offset += size;
1106                 }
1107                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1108                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1109                         size = sizeof(struct ibv_flow_spec_tcp_udp);
1110                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1111                                 .type = ((i == HASH_RXQ_UDPV4 ||
1112                                           i == HASH_RXQ_UDPV6) ?
1113                                          IBV_FLOW_SPEC_UDP :
1114                                          IBV_FLOW_SPEC_TCP),
1115                                 .size = size,
1116                         };
1117                         if (parser->queue[i].ibv_attr) {
1118                                 dst = (void *)((uintptr_t)
1119                                                parser->queue[i].ibv_attr +
1120                                                parser->queue[i].offset);
1121                                 memcpy(dst, &specs, size);
1122                                 ++parser->queue[i].ibv_attr->num_of_specs;
1123                         }
1124                         parser->queue[i].offset += size;
1125                 }
1126         }
1127 }
1128
1129 /**
1130  * Validate and convert a flow supported by the NIC.
1131  *
1132  * @param dev
1133  *   Pointer to Ethernet device.
1134  * @param[in] attr
1135  *   Flow rule attributes.
1136  * @param[in] pattern
1137  *   Pattern specification (list terminated by the END pattern item).
1138  * @param[in] actions
1139  *   Associated actions (list terminated by the END action).
1140  * @param[out] error
1141  *   Perform verbose error reporting if not NULL.
1142  * @param[in, out] parser
1143  *   Internal parser structure.
1144  *
1145  * @return
1146  *   0 on success, a negative errno value otherwise and rte_errno is set.
1147  */
1148 static int
1149 mlx5_flow_convert(struct rte_eth_dev *dev,
1150                   const struct rte_flow_attr *attr,
1151                   const struct rte_flow_item items[],
1152                   const struct rte_flow_action actions[],
1153                   struct rte_flow_error *error,
1154                   struct mlx5_flow_parse *parser)
1155 {
1156         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1157         unsigned int i;
1158         int ret;
1159
1160         /* First step. Validate the attributes, items and actions. */
1161         *parser = (struct mlx5_flow_parse){
1162                 .create = parser->create,
1163                 .layer = HASH_RXQ_ETH,
1164                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1165         };
1166         ret = mlx5_flow_convert_attributes(attr, error);
1167         if (ret)
1168                 return ret;
1169         ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1170         if (ret)
1171                 return ret;
1172         ret = mlx5_flow_convert_items_validate(items, error, parser);
1173         if (ret)
1174                 return ret;
1175         mlx5_flow_convert_finalise(parser);
1176         /*
1177          * Second step.
1178          * Allocate the memory space to store verbs specifications.
1179          */
1180         if (parser->drop) {
1181                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1182
1183                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1184                         mlx5_flow_convert_allocate(offset, error);
1185                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1186                         goto exit_enomem;
1187                 parser->queue[HASH_RXQ_ETH].offset =
1188                         sizeof(struct ibv_flow_attr);
1189         } else {
1190                 for (i = 0; i != hash_rxq_init_n; ++i) {
1191                         unsigned int offset;
1192
1193                         if (!(parser->rss_conf.types &
1194                               hash_rxq_init[i].dpdk_rss_hf) &&
1195                             (i != HASH_RXQ_ETH))
1196                                 continue;
1197                         offset = parser->queue[i].offset;
1198                         parser->queue[i].ibv_attr =
1199                                 mlx5_flow_convert_allocate(offset, error);
1200                         if (!parser->queue[i].ibv_attr)
1201                                 goto exit_enomem;
1202                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1203                 }
1204         }
1205         /* Third step. Conversion parse, fill the specifications. */
1206         parser->inner = 0;
1207         parser->tunnel = 0;
1208         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1209                 struct mlx5_flow_data data = {
1210                         .dev = dev,
1211                         .parser = parser,
1212                         .error = error,
1213                 };
1214
1215                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1216                         continue;
1217                 cur_item = &mlx5_flow_items[items->type];
1218                 ret = cur_item->convert(items,
1219                                         (cur_item->default_mask ?
1220                                          cur_item->default_mask :
1221                                          cur_item->mask),
1222                                          &data);
1223                 if (ret)
1224                         goto exit_free;
1225         }
1226         if (parser->mark)
1227                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1228         if (parser->count && parser->create) {
1229                 mlx5_flow_create_count(dev, parser);
1230                 if (!parser->cs)
1231                         goto exit_count_error;
1232         }
1233         /*
1234          * Last step. Complete missing specification to reach the RSS
1235          * configuration.
1236          */
1237         if (!parser->drop)
1238                 mlx5_flow_convert_finalise(parser);
1239         mlx5_flow_update_priority(dev, parser, attr);
1240 exit_free:
1241         /* Only verification is expected, all resources should be released. */
1242         if (!parser->create) {
1243                 for (i = 0; i != hash_rxq_init_n; ++i) {
1244                         if (parser->queue[i].ibv_attr) {
1245                                 rte_free(parser->queue[i].ibv_attr);
1246                                 parser->queue[i].ibv_attr = NULL;
1247                         }
1248                 }
1249         }
1250         return ret;
1251 exit_enomem:
1252         for (i = 0; i != hash_rxq_init_n; ++i) {
1253                 if (parser->queue[i].ibv_attr) {
1254                         rte_free(parser->queue[i].ibv_attr);
1255                         parser->queue[i].ibv_attr = NULL;
1256                 }
1257         }
1258         rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1259                            NULL, "cannot allocate verbs spec attributes");
1260         return -rte_errno;
1261 exit_count_error:
1262         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1263                            NULL, "cannot create counter");
1264         return -rte_errno;
1265 }
1266
1267 /**
1268  * Copy the specification created into the flow.
1269  *
1270  * @param parser
1271  *   Internal parser structure.
1272  * @param src
1273  *   Create specification.
1274  * @param size
1275  *   Size in bytes of the specification to copy.
1276  */
1277 static void
1278 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1279                       unsigned int size)
1280 {
1281         unsigned int i;
1282         void *dst;
1283
1284         for (i = 0; i != hash_rxq_init_n; ++i) {
1285                 if (!parser->queue[i].ibv_attr)
1286                         continue;
1287                 /* Specification must be the same l3 type or none. */
1288                 if (parser->layer == HASH_RXQ_ETH ||
1289                     (hash_rxq_init[parser->layer].ip_version ==
1290                      hash_rxq_init[i].ip_version) ||
1291                     (hash_rxq_init[i].ip_version == 0)) {
1292                         dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1293                                         parser->queue[i].offset);
1294                         memcpy(dst, src, size);
1295                         ++parser->queue[i].ibv_attr->num_of_specs;
1296                         parser->queue[i].offset += size;
1297                 }
1298         }
1299 }
1300
1301 /**
1302  * Convert Ethernet item to Verbs specification.
1303  *
1304  * @param item[in]
1305  *   Item specification.
1306  * @param default_mask[in]
1307  *   Default bit-masks to use when item->mask is not provided.
1308  * @param data[in, out]
1309  *   User structure.
1310  *
1311  * @return
1312  *   0 on success, a negative errno value otherwise and rte_errno is set.
1313  */
1314 static int
1315 mlx5_flow_create_eth(const struct rte_flow_item *item,
1316                      const void *default_mask,
1317                      struct mlx5_flow_data *data)
1318 {
1319         const struct rte_flow_item_eth *spec = item->spec;
1320         const struct rte_flow_item_eth *mask = item->mask;
1321         struct mlx5_flow_parse *parser = data->parser;
1322         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1323         struct ibv_flow_spec_eth eth = {
1324                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1325                 .size = eth_size,
1326         };
1327
1328         /* Don't update layer for the inner pattern. */
1329         if (!parser->inner)
1330                 parser->layer = HASH_RXQ_ETH;
1331         if (spec) {
1332                 unsigned int i;
1333
1334                 if (!mask)
1335                         mask = default_mask;
1336                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1337                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1338                 eth.val.ether_type = spec->type;
1339                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1340                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1341                 eth.mask.ether_type = mask->type;
1342                 /* Remove unwanted bits from values. */
1343                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1344                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1345                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1346                 }
1347                 eth.val.ether_type &= eth.mask.ether_type;
1348         }
1349         mlx5_flow_create_copy(parser, &eth, eth_size);
1350         return 0;
1351 }
1352
1353 /**
1354  * Convert VLAN item to Verbs specification.
1355  *
1356  * @param item[in]
1357  *   Item specification.
1358  * @param default_mask[in]
1359  *   Default bit-masks to use when item->mask is not provided.
1360  * @param data[in, out]
1361  *   User structure.
1362  *
1363  * @return
1364  *   0 on success, a negative errno value otherwise and rte_errno is set.
1365  */
1366 static int
1367 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1368                       const void *default_mask,
1369                       struct mlx5_flow_data *data)
1370 {
1371         const struct rte_flow_item_vlan *spec = item->spec;
1372         const struct rte_flow_item_vlan *mask = item->mask;
1373         struct mlx5_flow_parse *parser = data->parser;
1374         struct ibv_flow_spec_eth *eth;
1375         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1376         const char *msg = "VLAN cannot be empty";
1377
1378         if (spec) {
1379                 unsigned int i;
1380                 if (!mask)
1381                         mask = default_mask;
1382
1383                 for (i = 0; i != hash_rxq_init_n; ++i) {
1384                         if (!parser->queue[i].ibv_attr)
1385                                 continue;
1386
1387                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1388                                        parser->queue[i].offset - eth_size);
1389                         eth->val.vlan_tag = spec->tci;
1390                         eth->mask.vlan_tag = mask->tci;
1391                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1392                         /*
1393                          * From verbs perspective an empty VLAN is equivalent
1394                          * to a packet without VLAN layer.
1395                          */
1396                         if (!eth->mask.vlan_tag)
1397                                 goto error;
1398                         /* Outer TPID cannot be matched. */
1399                         if (eth->mask.ether_type) {
1400                                 msg = "VLAN TPID matching is not supported";
1401                                 goto error;
1402                         }
1403                         eth->val.ether_type = spec->inner_type;
1404                         eth->mask.ether_type = mask->inner_type;
1405                         eth->val.ether_type &= eth->mask.ether_type;
1406                 }
1407                 return 0;
1408         }
1409 error:
1410         return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1411                                   item, msg);
1412 }
1413
1414 /**
1415  * Convert IPv4 item to Verbs specification.
1416  *
1417  * @param item[in]
1418  *   Item specification.
1419  * @param default_mask[in]
1420  *   Default bit-masks to use when item->mask is not provided.
1421  * @param data[in, out]
1422  *   User structure.
1423  *
1424  * @return
1425  *   0 on success, a negative errno value otherwise and rte_errno is set.
1426  */
1427 static int
1428 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1429                       const void *default_mask,
1430                       struct mlx5_flow_data *data)
1431 {
1432         struct priv *priv = data->dev->data->dev_private;
1433         const struct rte_flow_item_ipv4 *spec = item->spec;
1434         const struct rte_flow_item_ipv4 *mask = item->mask;
1435         struct mlx5_flow_parse *parser = data->parser;
1436         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1437         struct ibv_flow_spec_ipv4_ext ipv4 = {
1438                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1439                 .size = ipv4_size,
1440         };
1441
1442         if (parser->layer == HASH_RXQ_TUNNEL &&
1443             parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1444             !priv->config.l3_vxlan_en)
1445                 return rte_flow_error_set(data->error, EINVAL,
1446                                           RTE_FLOW_ERROR_TYPE_ITEM,
1447                                           item,
1448                                           "L3 VXLAN not enabled by device"
1449                                           " parameter and/or not configured"
1450                                           " in firmware");
1451         /* Don't update layer for the inner pattern. */
1452         if (!parser->inner)
1453                 parser->layer = HASH_RXQ_IPV4;
1454         if (spec) {
1455                 if (!mask)
1456                         mask = default_mask;
1457                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1458                         .src_ip = spec->hdr.src_addr,
1459                         .dst_ip = spec->hdr.dst_addr,
1460                         .proto = spec->hdr.next_proto_id,
1461                         .tos = spec->hdr.type_of_service,
1462                 };
1463                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1464                         .src_ip = mask->hdr.src_addr,
1465                         .dst_ip = mask->hdr.dst_addr,
1466                         .proto = mask->hdr.next_proto_id,
1467                         .tos = mask->hdr.type_of_service,
1468                 };
1469                 /* Remove unwanted bits from values. */
1470                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1471                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1472                 ipv4.val.proto &= ipv4.mask.proto;
1473                 ipv4.val.tos &= ipv4.mask.tos;
1474         }
1475         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1476         return 0;
1477 }
1478
1479 /**
1480  * Convert IPv6 item to Verbs specification.
1481  *
1482  * @param item[in]
1483  *   Item specification.
1484  * @param default_mask[in]
1485  *   Default bit-masks to use when item->mask is not provided.
1486  * @param data[in, out]
1487  *   User structure.
1488  *
1489  * @return
1490  *   0 on success, a negative errno value otherwise and rte_errno is set.
1491  */
1492 static int
1493 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1494                       const void *default_mask,
1495                       struct mlx5_flow_data *data)
1496 {
1497         struct priv *priv = data->dev->data->dev_private;
1498         const struct rte_flow_item_ipv6 *spec = item->spec;
1499         const struct rte_flow_item_ipv6 *mask = item->mask;
1500         struct mlx5_flow_parse *parser = data->parser;
1501         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1502         struct ibv_flow_spec_ipv6 ipv6 = {
1503                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1504                 .size = ipv6_size,
1505         };
1506
1507         if (parser->layer == HASH_RXQ_TUNNEL &&
1508             parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1509             !priv->config.l3_vxlan_en)
1510                 return rte_flow_error_set(data->error, EINVAL,
1511                                           RTE_FLOW_ERROR_TYPE_ITEM,
1512                                           item,
1513                                           "L3 VXLAN not enabled by device"
1514                                           " parameter and/or not configured"
1515                                           " in firmware");
1516         /* Don't update layer for the inner pattern. */
1517         if (!parser->inner)
1518                 parser->layer = HASH_RXQ_IPV6;
1519         if (spec) {
1520                 unsigned int i;
1521                 uint32_t vtc_flow_val;
1522                 uint32_t vtc_flow_mask;
1523
1524                 if (!mask)
1525                         mask = default_mask;
1526                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1527                        RTE_DIM(ipv6.val.src_ip));
1528                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1529                        RTE_DIM(ipv6.val.dst_ip));
1530                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1531                        RTE_DIM(ipv6.mask.src_ip));
1532                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1533                        RTE_DIM(ipv6.mask.dst_ip));
1534                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1535                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1536                 ipv6.val.flow_label =
1537                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1538                                          IPV6_HDR_FL_SHIFT);
1539                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1540                                          IPV6_HDR_TC_SHIFT;
1541                 ipv6.val.next_hdr = spec->hdr.proto;
1542                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1543                 ipv6.mask.flow_label =
1544                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1545                                          IPV6_HDR_FL_SHIFT);
1546                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1547                                           IPV6_HDR_TC_SHIFT;
1548                 ipv6.mask.next_hdr = mask->hdr.proto;
1549                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1550                 /* Remove unwanted bits from values. */
1551                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1552                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1553                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1554                 }
1555                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1556                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1557                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1558                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1559         }
1560         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1561         return 0;
1562 }
1563
1564 /**
1565  * Convert UDP item to Verbs specification.
1566  *
1567  * @param item[in]
1568  *   Item specification.
1569  * @param default_mask[in]
1570  *   Default bit-masks to use when item->mask is not provided.
1571  * @param data[in, out]
1572  *   User structure.
1573  *
1574  * @return
1575  *   0 on success, a negative errno value otherwise and rte_errno is set.
1576  */
1577 static int
1578 mlx5_flow_create_udp(const struct rte_flow_item *item,
1579                      const void *default_mask,
1580                      struct mlx5_flow_data *data)
1581 {
1582         const struct rte_flow_item_udp *spec = item->spec;
1583         const struct rte_flow_item_udp *mask = item->mask;
1584         struct mlx5_flow_parse *parser = data->parser;
1585         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1586         struct ibv_flow_spec_tcp_udp udp = {
1587                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1588                 .size = udp_size,
1589         };
1590
1591         /* Don't update layer for the inner pattern. */
1592         if (!parser->inner) {
1593                 if (parser->layer == HASH_RXQ_IPV4)
1594                         parser->layer = HASH_RXQ_UDPV4;
1595                 else
1596                         parser->layer = HASH_RXQ_UDPV6;
1597         }
1598         if (spec) {
1599                 if (!mask)
1600                         mask = default_mask;
1601                 udp.val.dst_port = spec->hdr.dst_port;
1602                 udp.val.src_port = spec->hdr.src_port;
1603                 udp.mask.dst_port = mask->hdr.dst_port;
1604                 udp.mask.src_port = mask->hdr.src_port;
1605                 /* Remove unwanted bits from values. */
1606                 udp.val.src_port &= udp.mask.src_port;
1607                 udp.val.dst_port &= udp.mask.dst_port;
1608         }
1609         mlx5_flow_create_copy(parser, &udp, udp_size);
1610         return 0;
1611 }
1612
1613 /**
1614  * Convert TCP item to Verbs specification.
1615  *
1616  * @param item[in]
1617  *   Item specification.
1618  * @param default_mask[in]
1619  *   Default bit-masks to use when item->mask is not provided.
1620  * @param data[in, out]
1621  *   User structure.
1622  *
1623  * @return
1624  *   0 on success, a negative errno value otherwise and rte_errno is set.
1625  */
1626 static int
1627 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1628                      const void *default_mask,
1629                      struct mlx5_flow_data *data)
1630 {
1631         const struct rte_flow_item_tcp *spec = item->spec;
1632         const struct rte_flow_item_tcp *mask = item->mask;
1633         struct mlx5_flow_parse *parser = data->parser;
1634         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1635         struct ibv_flow_spec_tcp_udp tcp = {
1636                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1637                 .size = tcp_size,
1638         };
1639
1640         /* Don't update layer for the inner pattern. */
1641         if (!parser->inner) {
1642                 if (parser->layer == HASH_RXQ_IPV4)
1643                         parser->layer = HASH_RXQ_TCPV4;
1644                 else
1645                         parser->layer = HASH_RXQ_TCPV6;
1646         }
1647         if (spec) {
1648                 if (!mask)
1649                         mask = default_mask;
1650                 tcp.val.dst_port = spec->hdr.dst_port;
1651                 tcp.val.src_port = spec->hdr.src_port;
1652                 tcp.mask.dst_port = mask->hdr.dst_port;
1653                 tcp.mask.src_port = mask->hdr.src_port;
1654                 /* Remove unwanted bits from values. */
1655                 tcp.val.src_port &= tcp.mask.src_port;
1656                 tcp.val.dst_port &= tcp.mask.dst_port;
1657         }
1658         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1659         return 0;
1660 }
1661
1662 /**
1663  * Convert VXLAN item to Verbs specification.
1664  *
1665  * @param item[in]
1666  *   Item specification.
1667  * @param default_mask[in]
1668  *   Default bit-masks to use when item->mask is not provided.
1669  * @param data[in, out]
1670  *   User structure.
1671  *
1672  * @return
1673  *   0 on success, a negative errno value otherwise and rte_errno is set.
1674  */
1675 static int
1676 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1677                        const void *default_mask,
1678                        struct mlx5_flow_data *data)
1679 {
1680         const struct rte_flow_item_vxlan *spec = item->spec;
1681         const struct rte_flow_item_vxlan *mask = item->mask;
1682         struct mlx5_flow_parse *parser = data->parser;
1683         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1684         struct ibv_flow_spec_tunnel vxlan = {
1685                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1686                 .size = size,
1687         };
1688         union vni {
1689                 uint32_t vlan_id;
1690                 uint8_t vni[4];
1691         } id;
1692
1693         id.vni[0] = 0;
1694         parser->inner = IBV_FLOW_SPEC_INNER;
1695         parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1696         if (spec) {
1697                 if (!mask)
1698                         mask = default_mask;
1699                 memcpy(&id.vni[1], spec->vni, 3);
1700                 vxlan.val.tunnel_id = id.vlan_id;
1701                 memcpy(&id.vni[1], mask->vni, 3);
1702                 vxlan.mask.tunnel_id = id.vlan_id;
1703                 /* Remove unwanted bits from values. */
1704                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1705         }
1706         /*
1707          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1708          * layer is defined in the Verbs specification it is interpreted as
1709          * wildcard and all packets will match this rule, if it follows a full
1710          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1711          * before will also match this rule.
1712          * To avoid such situation, VNI 0 is currently refused.
1713          */
1714         if (!vxlan.val.tunnel_id)
1715                 return rte_flow_error_set(data->error, EINVAL,
1716                                           RTE_FLOW_ERROR_TYPE_ITEM,
1717                                           item,
1718                                           "VxLAN vni cannot be 0");
1719         mlx5_flow_create_copy(parser, &vxlan, size);
1720         return 0;
1721 }
1722
1723 /**
1724  * Convert GRE item to Verbs specification.
1725  *
1726  * @param item[in]
1727  *   Item specification.
1728  * @param default_mask[in]
1729  *   Default bit-masks to use when item->mask is not provided.
1730  * @param data[in, out]
1731  *   User structure.
1732  *
1733  * @return
1734  *   0 on success, a negative errno value otherwise and rte_errno is set.
1735  */
1736 static int
1737 mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
1738                      const void *default_mask __rte_unused,
1739                      struct mlx5_flow_data *data)
1740 {
1741         struct mlx5_flow_parse *parser = data->parser;
1742         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1743         struct ibv_flow_spec_tunnel tunnel = {
1744                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1745                 .size = size,
1746         };
1747         struct ibv_flow_spec_ipv4_ext *ipv4;
1748         struct ibv_flow_spec_ipv6 *ipv6;
1749         unsigned int i;
1750
1751         parser->inner = IBV_FLOW_SPEC_INNER;
1752         parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1753         /* Update encapsulation IP layer protocol. */
1754         for (i = 0; i != hash_rxq_init_n; ++i) {
1755                 if (!parser->queue[i].ibv_attr)
1756                         continue;
1757                 if (parser->out_layer == HASH_RXQ_IPV4) {
1758                         ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1759                                 parser->queue[i].offset -
1760                                 sizeof(struct ibv_flow_spec_ipv4_ext));
1761                         if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1762                                 break;
1763                         ipv4->val.proto = MLX5_GRE;
1764                         ipv4->mask.proto = 0xff;
1765                 } else if (parser->out_layer == HASH_RXQ_IPV6) {
1766                         ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1767                                 parser->queue[i].offset -
1768                                 sizeof(struct ibv_flow_spec_ipv6));
1769                         if (ipv6->mask.next_hdr &&
1770                             ipv6->val.next_hdr != MLX5_GRE)
1771                                 break;
1772                         ipv6->val.next_hdr = MLX5_GRE;
1773                         ipv6->mask.next_hdr = 0xff;
1774                 }
1775         }
1776         if (i != hash_rxq_init_n)
1777                 return rte_flow_error_set(data->error, EINVAL,
1778                                           RTE_FLOW_ERROR_TYPE_ITEM,
1779                                           item,
1780                                           "IP protocol of GRE must be 47");
1781         mlx5_flow_create_copy(parser, &tunnel, size);
1782         return 0;
1783 }
1784
1785 /**
1786  * Convert mark/flag action to Verbs specification.
1787  *
1788  * @param parser
1789  *   Internal parser structure.
1790  * @param mark_id
1791  *   Mark identifier.
1792  *
1793  * @return
1794  *   0 on success, a negative errno value otherwise and rte_errno is set.
1795  */
1796 static int
1797 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1798 {
1799         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1800         struct ibv_flow_spec_action_tag tag = {
1801                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1802                 .size = size,
1803                 .tag_id = mlx5_flow_mark_set(mark_id),
1804         };
1805
1806         assert(parser->mark);
1807         mlx5_flow_create_copy(parser, &tag, size);
1808         return 0;
1809 }
1810
1811 /**
1812  * Convert count action to Verbs specification.
1813  *
1814  * @param dev
1815  *   Pointer to Ethernet device.
1816  * @param parser
1817  *   Pointer to MLX5 flow parser structure.
1818  *
1819  * @return
1820  *   0 on success, a negative errno value otherwise and rte_errno is set.
1821  */
1822 static int
1823 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1824                        struct mlx5_flow_parse *parser __rte_unused)
1825 {
1826 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1827         struct priv *priv = dev->data->dev_private;
1828         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1829         struct ibv_counter_set_init_attr init_attr = {0};
1830         struct ibv_flow_spec_counter_action counter = {
1831                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1832                 .size = size,
1833                 .counter_set_handle = 0,
1834         };
1835
1836         init_attr.counter_set_id = 0;
1837         parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1838         if (!parser->cs) {
1839                 rte_errno = EINVAL;
1840                 return -rte_errno;
1841         }
1842         counter.counter_set_handle = parser->cs->handle;
1843         mlx5_flow_create_copy(parser, &counter, size);
1844 #endif
1845         return 0;
1846 }
1847
1848 /**
1849  * Complete flow rule creation with a drop queue.
1850  *
1851  * @param dev
1852  *   Pointer to Ethernet device.
1853  * @param parser
1854  *   Internal parser structure.
1855  * @param flow
1856  *   Pointer to the rte_flow.
1857  * @param[out] error
1858  *   Perform verbose error reporting if not NULL.
1859  *
1860  * @return
1861  *   0 on success, a negative errno value otherwise and rte_errno is set.
1862  */
1863 static int
1864 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1865                                    struct mlx5_flow_parse *parser,
1866                                    struct rte_flow *flow,
1867                                    struct rte_flow_error *error)
1868 {
1869         struct priv *priv = dev->data->dev_private;
1870         struct ibv_flow_spec_action_drop *drop;
1871         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1872
1873         assert(priv->pd);
1874         assert(priv->ctx);
1875         flow->drop = 1;
1876         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1877                         parser->queue[HASH_RXQ_ETH].offset);
1878         *drop = (struct ibv_flow_spec_action_drop){
1879                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1880                         .size = size,
1881         };
1882         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1883         parser->queue[HASH_RXQ_ETH].offset += size;
1884         flow->frxq[HASH_RXQ_ETH].ibv_attr =
1885                 parser->queue[HASH_RXQ_ETH].ibv_attr;
1886         if (parser->count)
1887                 flow->cs = parser->cs;
1888         if (!priv->dev->data->dev_started)
1889                 return 0;
1890         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1891         flow->frxq[HASH_RXQ_ETH].ibv_flow =
1892                 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1893                                        flow->frxq[HASH_RXQ_ETH].ibv_attr);
1894         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1895                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1896                                    NULL, "flow rule creation failure");
1897                 goto error;
1898         }
1899         return 0;
1900 error:
1901         assert(flow);
1902         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1903                 claim_zero(mlx5_glue->destroy_flow
1904                            (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1905                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1906         }
1907         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1908                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1909                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1910         }
1911         if (flow->cs) {
1912                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1913                 flow->cs = NULL;
1914                 parser->cs = NULL;
1915         }
1916         return -rte_errno;
1917 }
1918
1919 /**
1920  * Create hash Rx queues when RSS is enabled.
1921  *
1922  * @param dev
1923  *   Pointer to Ethernet device.
1924  * @param parser
1925  *   Internal parser structure.
1926  * @param flow
1927  *   Pointer to the rte_flow.
1928  * @param[out] error
1929  *   Perform verbose error reporting if not NULL.
1930  *
1931  * @return
1932  *   0 on success, a negative errno value otherwise and rte_errno is set.
1933  */
1934 static int
1935 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1936                                   struct mlx5_flow_parse *parser,
1937                                   struct rte_flow *flow,
1938                                   struct rte_flow_error *error)
1939 {
1940         struct priv *priv = dev->data->dev_private;
1941         unsigned int i;
1942
1943         for (i = 0; i != hash_rxq_init_n; ++i) {
1944                 uint64_t hash_fields;
1945
1946                 if (!parser->queue[i].ibv_attr)
1947                         continue;
1948                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1949                 parser->queue[i].ibv_attr = NULL;
1950                 hash_fields = hash_rxq_init[i].hash_fields;
1951                 if (!priv->dev->data->dev_started)
1952                         continue;
1953                 flow->frxq[i].hrxq =
1954                         mlx5_hrxq_get(dev,
1955                                       parser->rss_conf.key,
1956                                       parser->rss_conf.key_len,
1957                                       hash_fields,
1958                                       parser->rss_conf.queue,
1959                                       parser->rss_conf.queue_num,
1960                                       parser->tunnel);
1961                 if (flow->frxq[i].hrxq)
1962                         continue;
1963                 flow->frxq[i].hrxq =
1964                         mlx5_hrxq_new(dev,
1965                                       parser->rss_conf.key,
1966                                       parser->rss_conf.key_len,
1967                                       hash_fields,
1968                                       parser->rss_conf.queue,
1969                                       parser->rss_conf.queue_num,
1970                                       parser->tunnel);
1971                 if (!flow->frxq[i].hrxq) {
1972                         return rte_flow_error_set(error, ENOMEM,
1973                                                   RTE_FLOW_ERROR_TYPE_HANDLE,
1974                                                   NULL,
1975                                                   "cannot create hash rxq");
1976                 }
1977         }
1978         return 0;
1979 }
1980
1981 /**
1982  * RXQ update after flow rule creation.
1983  *
1984  * @param dev
1985  *   Pointer to Ethernet device.
1986  * @param flow
1987  *   Pointer to the flow rule.
1988  */
1989 static void
1990 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
1991 {
1992         struct priv *priv = dev->data->dev_private;
1993         unsigned int i;
1994         unsigned int j;
1995
1996         if (!dev->data->dev_started)
1997                 return;
1998         for (i = 0; i != flow->rss_conf.queue_num; ++i) {
1999                 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2000                                                  [(*flow->queues)[i]];
2001                 struct mlx5_rxq_ctrl *rxq_ctrl =
2002                         container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2003                 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2004
2005                 rxq_data->mark |= flow->mark;
2006                 if (!tunnel)
2007                         continue;
2008                 rxq_ctrl->tunnel_types[tunnel] += 1;
2009                 /* Clear tunnel type if more than one tunnel types set. */
2010                 for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2011                         if (j == tunnel)
2012                                 continue;
2013                         if (rxq_ctrl->tunnel_types[j] > 0) {
2014                                 rxq_data->tunnel = 0;
2015                                 break;
2016                         }
2017                 }
2018                 if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2019                         rxq_data->tunnel = flow->tunnel;
2020         }
2021 }
2022
2023 /**
2024  * Complete flow rule creation.
2025  *
2026  * @param dev
2027  *   Pointer to Ethernet device.
2028  * @param parser
2029  *   Internal parser structure.
2030  * @param flow
2031  *   Pointer to the rte_flow.
2032  * @param[out] error
2033  *   Perform verbose error reporting if not NULL.
2034  *
2035  * @return
2036  *   0 on success, a negative errno value otherwise and rte_errno is set.
2037  */
2038 static int
2039 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2040                               struct mlx5_flow_parse *parser,
2041                               struct rte_flow *flow,
2042                               struct rte_flow_error *error)
2043 {
2044         struct priv *priv = dev->data->dev_private;
2045         int ret;
2046         unsigned int i;
2047         unsigned int flows_n = 0;
2048
2049         assert(priv->pd);
2050         assert(priv->ctx);
2051         assert(!parser->drop);
2052         ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2053         if (ret)
2054                 goto error;
2055         if (parser->count)
2056                 flow->cs = parser->cs;
2057         if (!priv->dev->data->dev_started)
2058                 return 0;
2059         for (i = 0; i != hash_rxq_init_n; ++i) {
2060                 if (!flow->frxq[i].hrxq)
2061                         continue;
2062                 flow->frxq[i].ibv_flow =
2063                         mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2064                                                flow->frxq[i].ibv_attr);
2065                 if (!flow->frxq[i].ibv_flow) {
2066                         rte_flow_error_set(error, ENOMEM,
2067                                            RTE_FLOW_ERROR_TYPE_HANDLE,
2068                                            NULL, "flow rule creation failure");
2069                         goto error;
2070                 }
2071                 ++flows_n;
2072                 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
2073                         dev->data->port_id,
2074                         (void *)flow, i,
2075                         (void *)flow->frxq[i].hrxq,
2076                         (void *)flow->frxq[i].ibv_flow);
2077         }
2078         if (!flows_n) {
2079                 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2080                                    NULL, "internal error in flow creation");
2081                 goto error;
2082         }
2083         mlx5_flow_create_update_rxqs(dev, flow);
2084         return 0;
2085 error:
2086         ret = rte_errno; /* Save rte_errno before cleanup. */
2087         assert(flow);
2088         for (i = 0; i != hash_rxq_init_n; ++i) {
2089                 if (flow->frxq[i].ibv_flow) {
2090                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2091
2092                         claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2093                 }
2094                 if (flow->frxq[i].hrxq)
2095                         mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2096                 if (flow->frxq[i].ibv_attr)
2097                         rte_free(flow->frxq[i].ibv_attr);
2098         }
2099         if (flow->cs) {
2100                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2101                 flow->cs = NULL;
2102                 parser->cs = NULL;
2103         }
2104         rte_errno = ret; /* Restore rte_errno. */
2105         return -rte_errno;
2106 }
2107
2108 /**
2109  * Convert a flow.
2110  *
2111  * @param dev
2112  *   Pointer to Ethernet device.
2113  * @param list
2114  *   Pointer to a TAILQ flow list.
2115  * @param[in] attr
2116  *   Flow rule attributes.
2117  * @param[in] pattern
2118  *   Pattern specification (list terminated by the END pattern item).
2119  * @param[in] actions
2120  *   Associated actions (list terminated by the END action).
2121  * @param[out] error
2122  *   Perform verbose error reporting if not NULL.
2123  *
2124  * @return
2125  *   A flow on success, NULL otherwise and rte_errno is set.
2126  */
2127 static struct rte_flow *
2128 mlx5_flow_list_create(struct rte_eth_dev *dev,
2129                       struct mlx5_flows *list,
2130                       const struct rte_flow_attr *attr,
2131                       const struct rte_flow_item items[],
2132                       const struct rte_flow_action actions[],
2133                       struct rte_flow_error *error)
2134 {
2135         struct mlx5_flow_parse parser = { .create = 1, };
2136         struct rte_flow *flow = NULL;
2137         unsigned int i;
2138         int ret;
2139
2140         ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2141         if (ret)
2142                 goto exit;
2143         flow = rte_calloc(__func__, 1,
2144                           sizeof(*flow) +
2145                           parser.rss_conf.queue_num * sizeof(uint16_t),
2146                           0);
2147         if (!flow) {
2148                 rte_flow_error_set(error, ENOMEM,
2149                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2150                                    NULL,
2151                                    "cannot allocate flow memory");
2152                 return NULL;
2153         }
2154         /* Copy configuration. */
2155         flow->queues = (uint16_t (*)[])(flow + 1);
2156         flow->tunnel = parser.tunnel;
2157         flow->rss_conf = (struct rte_flow_action_rss){
2158                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2159                 .level = 0,
2160                 .types = parser.rss_conf.types,
2161                 .key_len = parser.rss_conf.key_len,
2162                 .queue_num = parser.rss_conf.queue_num,
2163                 .key = memcpy(flow->rss_key, parser.rss_conf.key,
2164                               sizeof(*parser.rss_conf.key) *
2165                               parser.rss_conf.key_len),
2166                 .queue = memcpy(flow->queues, parser.rss_conf.queue,
2167                                 sizeof(*parser.rss_conf.queue) *
2168                                 parser.rss_conf.queue_num),
2169         };
2170         flow->mark = parser.mark;
2171         /* finalise the flow. */
2172         if (parser.drop)
2173                 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2174                                                          error);
2175         else
2176                 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2177         if (ret)
2178                 goto exit;
2179         TAILQ_INSERT_TAIL(list, flow, next);
2180         DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2181                 (void *)flow);
2182         return flow;
2183 exit:
2184         DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2185                 error->message);
2186         for (i = 0; i != hash_rxq_init_n; ++i) {
2187                 if (parser.queue[i].ibv_attr)
2188                         rte_free(parser.queue[i].ibv_attr);
2189         }
2190         rte_free(flow);
2191         return NULL;
2192 }
2193
2194 /**
2195  * Validate a flow supported by the NIC.
2196  *
2197  * @see rte_flow_validate()
2198  * @see rte_flow_ops
2199  */
2200 int
2201 mlx5_flow_validate(struct rte_eth_dev *dev,
2202                    const struct rte_flow_attr *attr,
2203                    const struct rte_flow_item items[],
2204                    const struct rte_flow_action actions[],
2205                    struct rte_flow_error *error)
2206 {
2207         struct mlx5_flow_parse parser = { .create = 0, };
2208
2209         return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2210 }
2211
2212 /**
2213  * Create a flow.
2214  *
2215  * @see rte_flow_create()
2216  * @see rte_flow_ops
2217  */
2218 struct rte_flow *
2219 mlx5_flow_create(struct rte_eth_dev *dev,
2220                  const struct rte_flow_attr *attr,
2221                  const struct rte_flow_item items[],
2222                  const struct rte_flow_action actions[],
2223                  struct rte_flow_error *error)
2224 {
2225         struct priv *priv = dev->data->dev_private;
2226
2227         return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2228                                      error);
2229 }
2230
2231 /**
2232  * Destroy a flow in a list.
2233  *
2234  * @param dev
2235  *   Pointer to Ethernet device.
2236  * @param list
2237  *   Pointer to a TAILQ flow list.
2238  * @param[in] flow
2239  *   Flow to destroy.
2240  */
2241 static void
2242 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2243                        struct rte_flow *flow)
2244 {
2245         struct priv *priv = dev->data->dev_private;
2246         unsigned int i;
2247
2248         if (flow->drop || !dev->data->dev_started)
2249                 goto free;
2250         for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2251                 /* Update queue tunnel type. */
2252                 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2253                                                  [(*flow->queues)[i]];
2254                 struct mlx5_rxq_ctrl *rxq_ctrl =
2255                         container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2256                 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2257
2258                 assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2259                 rxq_ctrl->tunnel_types[tunnel] -= 1;
2260                 if (!rxq_ctrl->tunnel_types[tunnel]) {
2261                         /* Update tunnel type. */
2262                         uint8_t j;
2263                         uint8_t types = 0;
2264                         uint8_t last;
2265
2266                         for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2267                                 if (rxq_ctrl->tunnel_types[j]) {
2268                                         types += 1;
2269                                         last = j;
2270                                 }
2271                         /* Keep same if more than one tunnel types left. */
2272                         if (types == 1)
2273                                 rxq_data->tunnel = ptype_ext[last];
2274                         else if (types == 0)
2275                                 /* No tunnel type left. */
2276                                 rxq_data->tunnel = 0;
2277                 }
2278         }
2279         for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2280                 struct rte_flow *tmp;
2281                 int mark = 0;
2282
2283                 /*
2284                  * To remove the mark from the queue, the queue must not be
2285                  * present in any other marked flow (RSS or not).
2286                  */
2287                 TAILQ_FOREACH(tmp, list, next) {
2288                         unsigned int j;
2289                         uint16_t *tqs = NULL;
2290                         uint16_t tq_n = 0;
2291
2292                         if (!tmp->mark)
2293                                 continue;
2294                         for (j = 0; j != hash_rxq_init_n; ++j) {
2295                                 if (!tmp->frxq[j].hrxq)
2296                                         continue;
2297                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2298                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2299                         }
2300                         if (!tq_n)
2301                                 continue;
2302                         for (j = 0; (j != tq_n) && !mark; j++)
2303                                 if (tqs[j] == (*flow->queues)[i])
2304                                         mark = 1;
2305                 }
2306                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2307         }
2308 free:
2309         if (flow->drop) {
2310                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2311                         claim_zero(mlx5_glue->destroy_flow
2312                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2313                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2314         } else {
2315                 for (i = 0; i != hash_rxq_init_n; ++i) {
2316                         struct mlx5_flow *frxq = &flow->frxq[i];
2317
2318                         if (frxq->ibv_flow)
2319                                 claim_zero(mlx5_glue->destroy_flow
2320                                            (frxq->ibv_flow));
2321                         if (frxq->hrxq)
2322                                 mlx5_hrxq_release(dev, frxq->hrxq);
2323                         if (frxq->ibv_attr)
2324                                 rte_free(frxq->ibv_attr);
2325                 }
2326         }
2327         if (flow->cs) {
2328                 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2329                 flow->cs = NULL;
2330         }
2331         TAILQ_REMOVE(list, flow, next);
2332         DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2333                 (void *)flow);
2334         rte_free(flow);
2335 }
2336
2337 /**
2338  * Destroy all flows.
2339  *
2340  * @param dev
2341  *   Pointer to Ethernet device.
2342  * @param list
2343  *   Pointer to a TAILQ flow list.
2344  */
2345 void
2346 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2347 {
2348         while (!TAILQ_EMPTY(list)) {
2349                 struct rte_flow *flow;
2350
2351                 flow = TAILQ_FIRST(list);
2352                 mlx5_flow_list_destroy(dev, list, flow);
2353         }
2354 }
2355
2356 /**
2357  * Create drop queue.
2358  *
2359  * @param dev
2360  *   Pointer to Ethernet device.
2361  *
2362  * @return
2363  *   0 on success, a negative errno value otherwise and rte_errno is set.
2364  */
2365 int
2366 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2367 {
2368         struct priv *priv = dev->data->dev_private;
2369         struct mlx5_hrxq_drop *fdq = NULL;
2370
2371         assert(priv->pd);
2372         assert(priv->ctx);
2373         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2374         if (!fdq) {
2375                 DRV_LOG(WARNING,
2376                         "port %u cannot allocate memory for drop queue",
2377                         dev->data->port_id);
2378                 rte_errno = ENOMEM;
2379                 return -rte_errno;
2380         }
2381         fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2382         if (!fdq->cq) {
2383                 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2384                         dev->data->port_id);
2385                 rte_errno = errno;
2386                 goto error;
2387         }
2388         fdq->wq = mlx5_glue->create_wq
2389                 (priv->ctx,
2390                  &(struct ibv_wq_init_attr){
2391                         .wq_type = IBV_WQT_RQ,
2392                         .max_wr = 1,
2393                         .max_sge = 1,
2394                         .pd = priv->pd,
2395                         .cq = fdq->cq,
2396                  });
2397         if (!fdq->wq) {
2398                 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2399                         dev->data->port_id);
2400                 rte_errno = errno;
2401                 goto error;
2402         }
2403         fdq->ind_table = mlx5_glue->create_rwq_ind_table
2404                 (priv->ctx,
2405                  &(struct ibv_rwq_ind_table_init_attr){
2406                         .log_ind_tbl_size = 0,
2407                         .ind_tbl = &fdq->wq,
2408                         .comp_mask = 0,
2409                  });
2410         if (!fdq->ind_table) {
2411                 DRV_LOG(WARNING,
2412                         "port %u cannot allocate indirection table for drop"
2413                         " queue",
2414                         dev->data->port_id);
2415                 rte_errno = errno;
2416                 goto error;
2417         }
2418         fdq->qp = mlx5_glue->create_qp_ex
2419                 (priv->ctx,
2420                  &(struct ibv_qp_init_attr_ex){
2421                         .qp_type = IBV_QPT_RAW_PACKET,
2422                         .comp_mask =
2423                                 IBV_QP_INIT_ATTR_PD |
2424                                 IBV_QP_INIT_ATTR_IND_TABLE |
2425                                 IBV_QP_INIT_ATTR_RX_HASH,
2426                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2427                                 .rx_hash_function =
2428                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2429                                 .rx_hash_key_len = rss_hash_default_key_len,
2430                                 .rx_hash_key = rss_hash_default_key,
2431                                 .rx_hash_fields_mask = 0,
2432                                 },
2433                         .rwq_ind_tbl = fdq->ind_table,
2434                         .pd = priv->pd
2435                  });
2436         if (!fdq->qp) {
2437                 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2438                         dev->data->port_id);
2439                 rte_errno = errno;
2440                 goto error;
2441         }
2442         priv->flow_drop_queue = fdq;
2443         return 0;
2444 error:
2445         if (fdq->qp)
2446                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2447         if (fdq->ind_table)
2448                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2449         if (fdq->wq)
2450                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2451         if (fdq->cq)
2452                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2453         if (fdq)
2454                 rte_free(fdq);
2455         priv->flow_drop_queue = NULL;
2456         return -rte_errno;
2457 }
2458
2459 /**
2460  * Delete drop queue.
2461  *
2462  * @param dev
2463  *   Pointer to Ethernet device.
2464  */
2465 void
2466 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2467 {
2468         struct priv *priv = dev->data->dev_private;
2469         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2470
2471         if (!fdq)
2472                 return;
2473         if (fdq->qp)
2474                 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2475         if (fdq->ind_table)
2476                 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2477         if (fdq->wq)
2478                 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2479         if (fdq->cq)
2480                 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2481         rte_free(fdq);
2482         priv->flow_drop_queue = NULL;
2483 }
2484
2485 /**
2486  * Remove all flows.
2487  *
2488  * @param dev
2489  *   Pointer to Ethernet device.
2490  * @param list
2491  *   Pointer to a TAILQ flow list.
2492  */
2493 void
2494 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2495 {
2496         struct priv *priv = dev->data->dev_private;
2497         struct rte_flow *flow;
2498         unsigned int i;
2499
2500         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2501                 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2502
2503                 if (flow->drop) {
2504                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2505                                 continue;
2506                         claim_zero(mlx5_glue->destroy_flow
2507                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2508                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2509                         DRV_LOG(DEBUG, "port %u flow %p removed",
2510                                 dev->data->port_id, (void *)flow);
2511                         /* Next flow. */
2512                         continue;
2513                 }
2514                 /* Verify the flow has not already been cleaned. */
2515                 for (i = 0; i != hash_rxq_init_n; ++i) {
2516                         if (!flow->frxq[i].ibv_flow)
2517                                 continue;
2518                         /*
2519                          * Indirection table may be necessary to remove the
2520                          * flags in the Rx queues.
2521                          * This helps to speed-up the process by avoiding
2522                          * another loop.
2523                          */
2524                         ind_tbl = flow->frxq[i].hrxq->ind_table;
2525                         break;
2526                 }
2527                 if (i == hash_rxq_init_n)
2528                         return;
2529                 if (flow->mark) {
2530                         assert(ind_tbl);
2531                         for (i = 0; i != ind_tbl->queues_n; ++i)
2532                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2533                 }
2534                 for (i = 0; i != hash_rxq_init_n; ++i) {
2535                         if (!flow->frxq[i].ibv_flow)
2536                                 continue;
2537                         claim_zero(mlx5_glue->destroy_flow
2538                                    (flow->frxq[i].ibv_flow));
2539                         flow->frxq[i].ibv_flow = NULL;
2540                         mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2541                         flow->frxq[i].hrxq = NULL;
2542                 }
2543                 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2544                         (void *)flow);
2545         }
2546         /* Cleanup Rx queue tunnel info. */
2547         for (i = 0; i != priv->rxqs_n; ++i) {
2548                 struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2549                 struct mlx5_rxq_ctrl *rxq_ctrl =
2550                         container_of(q, struct mlx5_rxq_ctrl, rxq);
2551
2552                 if (!q)
2553                         continue;
2554                 memset((void *)rxq_ctrl->tunnel_types, 0,
2555                        sizeof(rxq_ctrl->tunnel_types));
2556                 q->tunnel = 0;
2557         }
2558 }
2559
2560 /**
2561  * Add all flows.
2562  *
2563  * @param dev
2564  *   Pointer to Ethernet device.
2565  * @param list
2566  *   Pointer to a TAILQ flow list.
2567  *
2568  * @return
2569  *   0 on success, a negative errno value otherwise and rte_errno is set.
2570  */
2571 int
2572 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2573 {
2574         struct priv *priv = dev->data->dev_private;
2575         struct rte_flow *flow;
2576
2577         TAILQ_FOREACH(flow, list, next) {
2578                 unsigned int i;
2579
2580                 if (flow->drop) {
2581                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2582                                 mlx5_glue->create_flow
2583                                 (priv->flow_drop_queue->qp,
2584                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2585                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2586                                 DRV_LOG(DEBUG,
2587                                         "port %u flow %p cannot be applied",
2588                                         dev->data->port_id, (void *)flow);
2589                                 rte_errno = EINVAL;
2590                                 return -rte_errno;
2591                         }
2592                         DRV_LOG(DEBUG, "port %u flow %p applied",
2593                                 dev->data->port_id, (void *)flow);
2594                         /* Next flow. */
2595                         continue;
2596                 }
2597                 for (i = 0; i != hash_rxq_init_n; ++i) {
2598                         if (!flow->frxq[i].ibv_attr)
2599                                 continue;
2600                         flow->frxq[i].hrxq =
2601                                 mlx5_hrxq_get(dev, flow->rss_conf.key,
2602                                               flow->rss_conf.key_len,
2603                                               hash_rxq_init[i].hash_fields,
2604                                               flow->rss_conf.queue,
2605                                               flow->rss_conf.queue_num,
2606                                               flow->tunnel);
2607                         if (flow->frxq[i].hrxq)
2608                                 goto flow_create;
2609                         flow->frxq[i].hrxq =
2610                                 mlx5_hrxq_new(dev, flow->rss_conf.key,
2611                                               flow->rss_conf.key_len,
2612                                               hash_rxq_init[i].hash_fields,
2613                                               flow->rss_conf.queue,
2614                                               flow->rss_conf.queue_num,
2615                                               flow->tunnel);
2616                         if (!flow->frxq[i].hrxq) {
2617                                 DRV_LOG(DEBUG,
2618                                         "port %u flow %p cannot be applied",
2619                                         dev->data->port_id, (void *)flow);
2620                                 rte_errno = EINVAL;
2621                                 return -rte_errno;
2622                         }
2623 flow_create:
2624                         flow->frxq[i].ibv_flow =
2625                                 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2626                                                        flow->frxq[i].ibv_attr);
2627                         if (!flow->frxq[i].ibv_flow) {
2628                                 DRV_LOG(DEBUG,
2629                                         "port %u flow %p cannot be applied",
2630                                         dev->data->port_id, (void *)flow);
2631                                 rte_errno = EINVAL;
2632                                 return -rte_errno;
2633                         }
2634                         DRV_LOG(DEBUG, "port %u flow %p applied",
2635                                 dev->data->port_id, (void *)flow);
2636                 }
2637                 mlx5_flow_create_update_rxqs(dev, flow);
2638         }
2639         return 0;
2640 }
2641
2642 /**
2643  * Verify the flow list is empty
2644  *
2645  * @param dev
2646  *  Pointer to Ethernet device.
2647  *
2648  * @return the number of flows not released.
2649  */
2650 int
2651 mlx5_flow_verify(struct rte_eth_dev *dev)
2652 {
2653         struct priv *priv = dev->data->dev_private;
2654         struct rte_flow *flow;
2655         int ret = 0;
2656
2657         TAILQ_FOREACH(flow, &priv->flows, next) {
2658                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2659                         dev->data->port_id, (void *)flow);
2660                 ++ret;
2661         }
2662         return ret;
2663 }
2664
2665 /**
2666  * Enable a control flow configured from the control plane.
2667  *
2668  * @param dev
2669  *   Pointer to Ethernet device.
2670  * @param eth_spec
2671  *   An Ethernet flow spec to apply.
2672  * @param eth_mask
2673  *   An Ethernet flow mask to apply.
2674  * @param vlan_spec
2675  *   A VLAN flow spec to apply.
2676  * @param vlan_mask
2677  *   A VLAN flow mask to apply.
2678  *
2679  * @return
2680  *   0 on success, a negative errno value otherwise and rte_errno is set.
2681  */
2682 int
2683 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2684                     struct rte_flow_item_eth *eth_spec,
2685                     struct rte_flow_item_eth *eth_mask,
2686                     struct rte_flow_item_vlan *vlan_spec,
2687                     struct rte_flow_item_vlan *vlan_mask)
2688 {
2689         struct priv *priv = dev->data->dev_private;
2690         const struct rte_flow_attr attr = {
2691                 .ingress = 1,
2692                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2693         };
2694         struct rte_flow_item items[] = {
2695                 {
2696                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2697                         .spec = eth_spec,
2698                         .last = NULL,
2699                         .mask = eth_mask,
2700                 },
2701                 {
2702                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2703                                 RTE_FLOW_ITEM_TYPE_END,
2704                         .spec = vlan_spec,
2705                         .last = NULL,
2706                         .mask = vlan_mask,
2707                 },
2708                 {
2709                         .type = RTE_FLOW_ITEM_TYPE_END,
2710                 },
2711         };
2712         uint16_t queue[priv->reta_idx_n];
2713         struct rte_flow_action_rss action_rss = {
2714                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2715                 .level = 0,
2716                 .types = priv->rss_conf.rss_hf,
2717                 .key_len = priv->rss_conf.rss_key_len,
2718                 .queue_num = priv->reta_idx_n,
2719                 .key = priv->rss_conf.rss_key,
2720                 .queue = queue,
2721         };
2722         struct rte_flow_action actions[] = {
2723                 {
2724                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2725                         .conf = &action_rss,
2726                 },
2727                 {
2728                         .type = RTE_FLOW_ACTION_TYPE_END,
2729                 },
2730         };
2731         struct rte_flow *flow;
2732         struct rte_flow_error error;
2733         unsigned int i;
2734
2735         if (!priv->reta_idx_n) {
2736                 rte_errno = EINVAL;
2737                 return -rte_errno;
2738         }
2739         for (i = 0; i != priv->reta_idx_n; ++i)
2740                 queue[i] = (*priv->reta_idx)[i];
2741         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2742                                      actions, &error);
2743         if (!flow)
2744                 return -rte_errno;
2745         return 0;
2746 }
2747
2748 /**
2749  * Enable a flow control configured from the control plane.
2750  *
2751  * @param dev
2752  *   Pointer to Ethernet device.
2753  * @param eth_spec
2754  *   An Ethernet flow spec to apply.
2755  * @param eth_mask
2756  *   An Ethernet flow mask to apply.
2757  *
2758  * @return
2759  *   0 on success, a negative errno value otherwise and rte_errno is set.
2760  */
2761 int
2762 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2763                struct rte_flow_item_eth *eth_spec,
2764                struct rte_flow_item_eth *eth_mask)
2765 {
2766         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2767 }
2768
2769 /**
2770  * Destroy a flow.
2771  *
2772  * @see rte_flow_destroy()
2773  * @see rte_flow_ops
2774  */
2775 int
2776 mlx5_flow_destroy(struct rte_eth_dev *dev,
2777                   struct rte_flow *flow,
2778                   struct rte_flow_error *error __rte_unused)
2779 {
2780         struct priv *priv = dev->data->dev_private;
2781
2782         mlx5_flow_list_destroy(dev, &priv->flows, flow);
2783         return 0;
2784 }
2785
2786 /**
2787  * Destroy all flows.
2788  *
2789  * @see rte_flow_flush()
2790  * @see rte_flow_ops
2791  */
2792 int
2793 mlx5_flow_flush(struct rte_eth_dev *dev,
2794                 struct rte_flow_error *error __rte_unused)
2795 {
2796         struct priv *priv = dev->data->dev_private;
2797
2798         mlx5_flow_list_flush(dev, &priv->flows);
2799         return 0;
2800 }
2801
2802 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2803 /**
2804  * Query flow counter.
2805  *
2806  * @param cs
2807  *   the counter set.
2808  * @param counter_value
2809  *   returned data from the counter.
2810  *
2811  * @return
2812  *   0 on success, a negative errno value otherwise and rte_errno is set.
2813  */
2814 static int
2815 mlx5_flow_query_count(struct ibv_counter_set *cs,
2816                       struct mlx5_flow_counter_stats *counter_stats,
2817                       struct rte_flow_query_count *query_count,
2818                       struct rte_flow_error *error)
2819 {
2820         uint64_t counters[2];
2821         struct ibv_query_counter_set_attr query_cs_attr = {
2822                 .cs = cs,
2823                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2824         };
2825         struct ibv_counter_set_data query_out = {
2826                 .out = counters,
2827                 .outlen = 2 * sizeof(uint64_t),
2828         };
2829         int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2830
2831         if (err)
2832                 return rte_flow_error_set(error, err,
2833                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2834                                           NULL,
2835                                           "cannot read counter");
2836         query_count->hits_set = 1;
2837         query_count->bytes_set = 1;
2838         query_count->hits = counters[0] - counter_stats->hits;
2839         query_count->bytes = counters[1] - counter_stats->bytes;
2840         if (query_count->reset) {
2841                 counter_stats->hits = counters[0];
2842                 counter_stats->bytes = counters[1];
2843         }
2844         return 0;
2845 }
2846
2847 /**
2848  * Query a flows.
2849  *
2850  * @see rte_flow_query()
2851  * @see rte_flow_ops
2852  */
2853 int
2854 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2855                 struct rte_flow *flow,
2856                 enum rte_flow_action_type action __rte_unused,
2857                 void *data,
2858                 struct rte_flow_error *error)
2859 {
2860         if (flow->cs) {
2861                 int ret;
2862
2863                 ret = mlx5_flow_query_count(flow->cs,
2864                                             &flow->counter_stats,
2865                                             (struct rte_flow_query_count *)data,
2866                                             error);
2867                 if (ret)
2868                         return ret;
2869         } else {
2870                 return rte_flow_error_set(error, EINVAL,
2871                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2872                                           NULL,
2873                                           "no counter found for flow");
2874         }
2875         return 0;
2876 }
2877 #endif
2878
2879 /**
2880  * Isolated mode.
2881  *
2882  * @see rte_flow_isolate()
2883  * @see rte_flow_ops
2884  */
2885 int
2886 mlx5_flow_isolate(struct rte_eth_dev *dev,
2887                   int enable,
2888                   struct rte_flow_error *error)
2889 {
2890         struct priv *priv = dev->data->dev_private;
2891
2892         if (dev->data->dev_started) {
2893                 rte_flow_error_set(error, EBUSY,
2894                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2895                                    NULL,
2896                                    "port must be stopped first");
2897                 return -rte_errno;
2898         }
2899         priv->isolated = !!enable;
2900         if (enable)
2901                 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2902         else
2903                 priv->dev->dev_ops = &mlx5_dev_ops;
2904         return 0;
2905 }
2906
2907 /**
2908  * Convert a flow director filter to a generic flow.
2909  *
2910  * @param dev
2911  *   Pointer to Ethernet device.
2912  * @param fdir_filter
2913  *   Flow director filter to add.
2914  * @param attributes
2915  *   Generic flow parameters structure.
2916  *
2917  * @return
2918  *   0 on success, a negative errno value otherwise and rte_errno is set.
2919  */
2920 static int
2921 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2922                          const struct rte_eth_fdir_filter *fdir_filter,
2923                          struct mlx5_fdir *attributes)
2924 {
2925         struct priv *priv = dev->data->dev_private;
2926         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2927         const struct rte_eth_fdir_masks *mask =
2928                 &dev->data->dev_conf.fdir_conf.mask;
2929
2930         /* Validate queue number. */
2931         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2932                 DRV_LOG(ERR, "port %u invalid queue number %d",
2933                         dev->data->port_id, fdir_filter->action.rx_queue);
2934                 rte_errno = EINVAL;
2935                 return -rte_errno;
2936         }
2937         attributes->attr.ingress = 1;
2938         attributes->items[0] = (struct rte_flow_item) {
2939                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2940                 .spec = &attributes->l2,
2941                 .mask = &attributes->l2_mask,
2942         };
2943         switch (fdir_filter->action.behavior) {
2944         case RTE_ETH_FDIR_ACCEPT:
2945                 attributes->actions[0] = (struct rte_flow_action){
2946                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2947                         .conf = &attributes->queue,
2948                 };
2949                 break;
2950         case RTE_ETH_FDIR_REJECT:
2951                 attributes->actions[0] = (struct rte_flow_action){
2952                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2953                 };
2954                 break;
2955         default:
2956                 DRV_LOG(ERR, "port %u invalid behavior %d",
2957                         dev->data->port_id,
2958                         fdir_filter->action.behavior);
2959                 rte_errno = ENOTSUP;
2960                 return -rte_errno;
2961         }
2962         attributes->queue.index = fdir_filter->action.rx_queue;
2963         /* Handle L3. */
2964         switch (fdir_filter->input.flow_type) {
2965         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2966         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2967         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2968                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2969                         .src_addr = input->flow.ip4_flow.src_ip,
2970                         .dst_addr = input->flow.ip4_flow.dst_ip,
2971                         .time_to_live = input->flow.ip4_flow.ttl,
2972                         .type_of_service = input->flow.ip4_flow.tos,
2973                         .next_proto_id = input->flow.ip4_flow.proto,
2974                 };
2975                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2976                         .src_addr = mask->ipv4_mask.src_ip,
2977                         .dst_addr = mask->ipv4_mask.dst_ip,
2978                         .time_to_live = mask->ipv4_mask.ttl,
2979                         .type_of_service = mask->ipv4_mask.tos,
2980                         .next_proto_id = mask->ipv4_mask.proto,
2981                 };
2982                 attributes->items[1] = (struct rte_flow_item){
2983                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2984                         .spec = &attributes->l3,
2985                         .mask = &attributes->l3_mask,
2986                 };
2987                 break;
2988         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2989         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2990         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2991                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2992                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2993                         .proto = input->flow.ipv6_flow.proto,
2994                 };
2995
2996                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2997                        input->flow.ipv6_flow.src_ip,
2998                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2999                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3000                        input->flow.ipv6_flow.dst_ip,
3001                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3002                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3003                        mask->ipv6_mask.src_ip,
3004                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3005                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3006                        mask->ipv6_mask.dst_ip,
3007                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3008                 attributes->items[1] = (struct rte_flow_item){
3009                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
3010                         .spec = &attributes->l3,
3011                         .mask = &attributes->l3_mask,
3012                 };
3013                 break;
3014         default:
3015                 DRV_LOG(ERR, "port %u invalid flow type%d",
3016                         dev->data->port_id, fdir_filter->input.flow_type);
3017                 rte_errno = ENOTSUP;
3018                 return -rte_errno;
3019         }
3020         /* Handle L4. */
3021         switch (fdir_filter->input.flow_type) {
3022         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3023                 attributes->l4.udp.hdr = (struct udp_hdr){
3024                         .src_port = input->flow.udp4_flow.src_port,
3025                         .dst_port = input->flow.udp4_flow.dst_port,
3026                 };
3027                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3028                         .src_port = mask->src_port_mask,
3029                         .dst_port = mask->dst_port_mask,
3030                 };
3031                 attributes->items[2] = (struct rte_flow_item){
3032                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3033                         .spec = &attributes->l4,
3034                         .mask = &attributes->l4_mask,
3035                 };
3036                 break;
3037         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3038                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3039                         .src_port = input->flow.tcp4_flow.src_port,
3040                         .dst_port = input->flow.tcp4_flow.dst_port,
3041                 };
3042                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3043                         .src_port = mask->src_port_mask,
3044                         .dst_port = mask->dst_port_mask,
3045                 };
3046                 attributes->items[2] = (struct rte_flow_item){
3047                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3048                         .spec = &attributes->l4,
3049                         .mask = &attributes->l4_mask,
3050                 };
3051                 break;
3052         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3053                 attributes->l4.udp.hdr = (struct udp_hdr){
3054                         .src_port = input->flow.udp6_flow.src_port,
3055                         .dst_port = input->flow.udp6_flow.dst_port,
3056                 };
3057                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3058                         .src_port = mask->src_port_mask,
3059                         .dst_port = mask->dst_port_mask,
3060                 };
3061                 attributes->items[2] = (struct rte_flow_item){
3062                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3063                         .spec = &attributes->l4,
3064                         .mask = &attributes->l4_mask,
3065                 };
3066                 break;
3067         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3068                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3069                         .src_port = input->flow.tcp6_flow.src_port,
3070                         .dst_port = input->flow.tcp6_flow.dst_port,
3071                 };
3072                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3073                         .src_port = mask->src_port_mask,
3074                         .dst_port = mask->dst_port_mask,
3075                 };
3076                 attributes->items[2] = (struct rte_flow_item){
3077                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3078                         .spec = &attributes->l4,
3079                         .mask = &attributes->l4_mask,
3080                 };
3081                 break;
3082         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3083         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3084                 break;
3085         default:
3086                 DRV_LOG(ERR, "port %u invalid flow type%d",
3087                         dev->data->port_id, fdir_filter->input.flow_type);
3088                 rte_errno = ENOTSUP;
3089                 return -rte_errno;
3090         }
3091         return 0;
3092 }
3093
3094 /**
3095  * Add new flow director filter and store it in list.
3096  *
3097  * @param dev
3098  *   Pointer to Ethernet device.
3099  * @param fdir_filter
3100  *   Flow director filter to add.
3101  *
3102  * @return
3103  *   0 on success, a negative errno value otherwise and rte_errno is set.
3104  */
3105 static int
3106 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3107                      const struct rte_eth_fdir_filter *fdir_filter)
3108 {
3109         struct priv *priv = dev->data->dev_private;
3110         struct mlx5_fdir attributes = {
3111                 .attr.group = 0,
3112                 .l2_mask = {
3113                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3114                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3115                         .type = 0,
3116                 },
3117         };
3118         struct mlx5_flow_parse parser = {
3119                 .layer = HASH_RXQ_ETH,
3120         };
3121         struct rte_flow_error error;
3122         struct rte_flow *flow;
3123         int ret;
3124
3125         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3126         if (ret)
3127                 return ret;
3128         ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3129                                 attributes.actions, &error, &parser);
3130         if (ret)
3131                 return ret;
3132         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3133                                      attributes.items, attributes.actions,
3134                                      &error);
3135         if (flow) {
3136                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3137                         (void *)flow);
3138                 return 0;
3139         }
3140         return -rte_errno;
3141 }
3142
3143 /**
3144  * Delete specific filter.
3145  *
3146  * @param dev
3147  *   Pointer to Ethernet device.
3148  * @param fdir_filter
3149  *   Filter to be deleted.
3150  *
3151  * @return
3152  *   0 on success, a negative errno value otherwise and rte_errno is set.
3153  */
3154 static int
3155 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3156                         const struct rte_eth_fdir_filter *fdir_filter)
3157 {
3158         struct priv *priv = dev->data->dev_private;
3159         struct mlx5_fdir attributes = {
3160                 .attr.group = 0,
3161         };
3162         struct mlx5_flow_parse parser = {
3163                 .create = 1,
3164                 .layer = HASH_RXQ_ETH,
3165         };
3166         struct rte_flow_error error;
3167         struct rte_flow *flow;
3168         unsigned int i;
3169         int ret;
3170
3171         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3172         if (ret)
3173                 return ret;
3174         ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3175                                 attributes.actions, &error, &parser);
3176         if (ret)
3177                 goto exit;
3178         /*
3179          * Special case for drop action which is only set in the
3180          * specifications when the flow is created.  In this situation the
3181          * drop specification is missing.
3182          */
3183         if (parser.drop) {
3184                 struct ibv_flow_spec_action_drop *drop;
3185
3186                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
3187                                 parser.queue[HASH_RXQ_ETH].offset);
3188                 *drop = (struct ibv_flow_spec_action_drop){
3189                         .type = IBV_FLOW_SPEC_ACTION_DROP,
3190                         .size = sizeof(struct ibv_flow_spec_action_drop),
3191                 };
3192                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
3193         }
3194         TAILQ_FOREACH(flow, &priv->flows, next) {
3195                 struct ibv_flow_attr *attr;
3196                 struct ibv_spec_header *attr_h;
3197                 void *spec;
3198                 struct ibv_flow_attr *flow_attr;
3199                 struct ibv_spec_header *flow_h;
3200                 void *flow_spec;
3201                 unsigned int specs_n;
3202
3203                 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
3204                 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
3205                 /* Compare first the attributes. */
3206                 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3207                         continue;
3208                 if (attr->num_of_specs == 0)
3209                         continue;
3210                 spec = (void *)((uintptr_t)attr +
3211                                 sizeof(struct ibv_flow_attr));
3212                 flow_spec = (void *)((uintptr_t)flow_attr +
3213                                      sizeof(struct ibv_flow_attr));
3214                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3215                 for (i = 0; i != specs_n; ++i) {
3216                         attr_h = spec;
3217                         flow_h = flow_spec;
3218                         if (memcmp(spec, flow_spec,
3219                                    RTE_MIN(attr_h->size, flow_h->size)))
3220                                 goto wrong_flow;
3221                         spec = (void *)((uintptr_t)spec + attr_h->size);
3222                         flow_spec = (void *)((uintptr_t)flow_spec +
3223                                              flow_h->size);
3224                 }
3225                 /* At this point, the flow match. */
3226                 break;
3227 wrong_flow:
3228                 /* The flow does not match. */
3229                 continue;
3230         }
3231         ret = rte_errno; /* Save rte_errno before cleanup. */
3232         if (flow)
3233                 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3234 exit:
3235         for (i = 0; i != hash_rxq_init_n; ++i) {
3236                 if (parser.queue[i].ibv_attr)
3237                         rte_free(parser.queue[i].ibv_attr);
3238         }
3239         rte_errno = ret; /* Restore rte_errno. */
3240         return -rte_errno;
3241 }
3242
3243 /**
3244  * Update queue for specific filter.
3245  *
3246  * @param dev
3247  *   Pointer to Ethernet device.
3248  * @param fdir_filter
3249  *   Filter to be updated.
3250  *
3251  * @return
3252  *   0 on success, a negative errno value otherwise and rte_errno is set.
3253  */
3254 static int
3255 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3256                         const struct rte_eth_fdir_filter *fdir_filter)
3257 {
3258         int ret;
3259
3260         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3261         if (ret)
3262                 return ret;
3263         return mlx5_fdir_filter_add(dev, fdir_filter);
3264 }
3265
3266 /**
3267  * Flush all filters.
3268  *
3269  * @param dev
3270  *   Pointer to Ethernet device.
3271  */
3272 static void
3273 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3274 {
3275         struct priv *priv = dev->data->dev_private;
3276
3277         mlx5_flow_list_flush(dev, &priv->flows);
3278 }
3279
3280 /**
3281  * Get flow director information.
3282  *
3283  * @param dev
3284  *   Pointer to Ethernet device.
3285  * @param[out] fdir_info
3286  *   Resulting flow director information.
3287  */
3288 static void
3289 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3290 {
3291         struct priv *priv = dev->data->dev_private;
3292         struct rte_eth_fdir_masks *mask =
3293                 &priv->dev->data->dev_conf.fdir_conf.mask;
3294
3295         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3296         fdir_info->guarant_spc = 0;
3297         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3298         fdir_info->max_flexpayload = 0;
3299         fdir_info->flow_types_mask[0] = 0;
3300         fdir_info->flex_payload_unit = 0;
3301         fdir_info->max_flex_payload_segment_num = 0;
3302         fdir_info->flex_payload_limit = 0;
3303         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3304 }
3305
3306 /**
3307  * Deal with flow director operations.
3308  *
3309  * @param dev
3310  *   Pointer to Ethernet device.
3311  * @param filter_op
3312  *   Operation to perform.
3313  * @param arg
3314  *   Pointer to operation-specific structure.
3315  *
3316  * @return
3317  *   0 on success, a negative errno value otherwise and rte_errno is set.
3318  */
3319 static int
3320 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3321                     void *arg)
3322 {
3323         struct priv *priv = dev->data->dev_private;
3324         enum rte_fdir_mode fdir_mode =
3325                 priv->dev->data->dev_conf.fdir_conf.mode;
3326
3327         if (filter_op == RTE_ETH_FILTER_NOP)
3328                 return 0;
3329         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3330             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3331                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3332                         dev->data->port_id, fdir_mode);
3333                 rte_errno = EINVAL;
3334                 return -rte_errno;
3335         }
3336         switch (filter_op) {
3337         case RTE_ETH_FILTER_ADD:
3338                 return mlx5_fdir_filter_add(dev, arg);
3339         case RTE_ETH_FILTER_UPDATE:
3340                 return mlx5_fdir_filter_update(dev, arg);
3341         case RTE_ETH_FILTER_DELETE:
3342                 return mlx5_fdir_filter_delete(dev, arg);
3343         case RTE_ETH_FILTER_FLUSH:
3344                 mlx5_fdir_filter_flush(dev);
3345                 break;
3346         case RTE_ETH_FILTER_INFO:
3347                 mlx5_fdir_info_get(dev, arg);
3348                 break;
3349         default:
3350                 DRV_LOG(DEBUG, "port %u unknown operation %u",
3351                         dev->data->port_id, filter_op);
3352                 rte_errno = EINVAL;
3353                 return -rte_errno;
3354         }
3355         return 0;
3356 }
3357
3358 /**
3359  * Manage filter operations.
3360  *
3361  * @param dev
3362  *   Pointer to Ethernet device structure.
3363  * @param filter_type
3364  *   Filter type.
3365  * @param filter_op
3366  *   Operation to perform.
3367  * @param arg
3368  *   Pointer to operation-specific structure.
3369  *
3370  * @return
3371  *   0 on success, a negative errno value otherwise and rte_errno is set.
3372  */
3373 int
3374 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3375                      enum rte_filter_type filter_type,
3376                      enum rte_filter_op filter_op,
3377                      void *arg)
3378 {
3379         switch (filter_type) {
3380         case RTE_ETH_FILTER_GENERIC:
3381                 if (filter_op != RTE_ETH_FILTER_GET) {
3382                         rte_errno = EINVAL;
3383                         return -rte_errno;
3384                 }
3385                 *(const void **)arg = &mlx5_flow_ops;
3386                 return 0;
3387         case RTE_ETH_FILTER_FDIR:
3388                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3389         default:
3390                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3391                         dev->data->port_id, filter_type);
3392                 rte_errno = ENOTSUP;
3393                 return -rte_errno;
3394         }
3395         return 0;
3396 }
3397
3398 /**
3399  * Detect number of Verbs flow priorities supported.
3400  *
3401  * @param dev
3402  *   Pointer to Ethernet device.
3403  *
3404  * @return
3405  *   number of supported Verbs flow priority.
3406  */
3407 unsigned int
3408 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3409 {
3410         struct priv *priv = dev->data->dev_private;
3411         unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3412         struct {
3413                 struct ibv_flow_attr attr;
3414                 struct ibv_flow_spec_eth eth;
3415                 struct ibv_flow_spec_action_drop drop;
3416         } flow_attr = {
3417                 .attr = {
3418                         .num_of_specs = 2,
3419                 },
3420                 .eth = {
3421                         .type = IBV_FLOW_SPEC_ETH,
3422                         .size = sizeof(struct ibv_flow_spec_eth),
3423                 },
3424                 .drop = {
3425                         .size = sizeof(struct ibv_flow_spec_action_drop),
3426                         .type = IBV_FLOW_SPEC_ACTION_DROP,
3427                 },
3428         };
3429         struct ibv_flow *flow;
3430
3431         do {
3432                 flow_attr.attr.priority = verb_priorities - 1;
3433                 flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3434                                               &flow_attr.attr);
3435                 if (flow) {
3436                         claim_zero(mlx5_glue->destroy_flow(flow));
3437                         /* Try more priorities. */
3438                         verb_priorities *= 2;
3439                 } else {
3440                         /* Failed, restore last right number. */
3441                         verb_priorities /= 2;
3442                         break;
3443                 }
3444         } while (1);
3445         DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3446                 " user flow priorities: %d",
3447                 dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3448         return verb_priorities;
3449 }