net/mlx5: remove parser/flow drop queue
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35 #include <string.h>
36
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46
47 #include <rte_ethdev.h>
48 #include <rte_flow.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
51
52 #include "mlx5.h"
53 #include "mlx5_prm.h"
54
55 /* Define minimal priority for control plane flows. */
56 #define MLX5_CTRL_FLOW_PRIORITY 4
57
58 /* Internet Protocol versions. */
59 #define MLX5_IPV4 4
60 #define MLX5_IPV6 6
61
62 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
63 struct ibv_counter_set_init_attr {
64         int dummy;
65 };
66 struct ibv_flow_spec_counter_action {
67         int dummy;
68 };
69 struct ibv_counter_set {
70         int dummy;
71 };
72
73 static inline int
74 ibv_destroy_counter_set(struct ibv_counter_set *cs)
75 {
76         (void)cs;
77         return -ENOTSUP;
78 }
79 #endif
80
81 /* Dev ops structure defined in mlx5.c */
82 extern const struct eth_dev_ops mlx5_dev_ops;
83 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
84
85 static int
86 mlx5_flow_create_eth(const struct rte_flow_item *item,
87                      const void *default_mask,
88                      void *data);
89
90 static int
91 mlx5_flow_create_vlan(const struct rte_flow_item *item,
92                       const void *default_mask,
93                       void *data);
94
95 static int
96 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
97                       const void *default_mask,
98                       void *data);
99
100 static int
101 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
102                       const void *default_mask,
103                       void *data);
104
105 static int
106 mlx5_flow_create_udp(const struct rte_flow_item *item,
107                      const void *default_mask,
108                      void *data);
109
110 static int
111 mlx5_flow_create_tcp(const struct rte_flow_item *item,
112                      const void *default_mask,
113                      void *data);
114
115 static int
116 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
117                        const void *default_mask,
118                        void *data);
119
120 struct mlx5_flow_parse;
121
122 static void
123 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
124                       unsigned int size);
125
126 static int
127 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
128
129 static int
130 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
131
132 /* Hash RX queue types. */
133 enum hash_rxq_type {
134         HASH_RXQ_TCPV4,
135         HASH_RXQ_UDPV4,
136         HASH_RXQ_IPV4,
137         HASH_RXQ_TCPV6,
138         HASH_RXQ_UDPV6,
139         HASH_RXQ_IPV6,
140         HASH_RXQ_ETH,
141 };
142
143 /* Initialization data for hash RX queue. */
144 struct hash_rxq_init {
145         uint64_t hash_fields; /* Fields that participate in the hash. */
146         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
147         unsigned int flow_priority; /* Flow priority to use. */
148         unsigned int ip_version; /* Internet protocol. */
149 };
150
151 /* Initialization data for hash RX queues. */
152 const struct hash_rxq_init hash_rxq_init[] = {
153         [HASH_RXQ_TCPV4] = {
154                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
155                                 IBV_RX_HASH_DST_IPV4 |
156                                 IBV_RX_HASH_SRC_PORT_TCP |
157                                 IBV_RX_HASH_DST_PORT_TCP),
158                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
159                 .flow_priority = 0,
160                 .ip_version = MLX5_IPV4,
161         },
162         [HASH_RXQ_UDPV4] = {
163                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
164                                 IBV_RX_HASH_DST_IPV4 |
165                                 IBV_RX_HASH_SRC_PORT_UDP |
166                                 IBV_RX_HASH_DST_PORT_UDP),
167                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
168                 .flow_priority = 0,
169                 .ip_version = MLX5_IPV4,
170         },
171         [HASH_RXQ_IPV4] = {
172                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
173                                 IBV_RX_HASH_DST_IPV4),
174                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
175                                 ETH_RSS_FRAG_IPV4),
176                 .flow_priority = 1,
177                 .ip_version = MLX5_IPV4,
178         },
179         [HASH_RXQ_TCPV6] = {
180                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
181                                 IBV_RX_HASH_DST_IPV6 |
182                                 IBV_RX_HASH_SRC_PORT_TCP |
183                                 IBV_RX_HASH_DST_PORT_TCP),
184                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
185                 .flow_priority = 0,
186                 .ip_version = MLX5_IPV6,
187         },
188         [HASH_RXQ_UDPV6] = {
189                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
190                                 IBV_RX_HASH_DST_IPV6 |
191                                 IBV_RX_HASH_SRC_PORT_UDP |
192                                 IBV_RX_HASH_DST_PORT_UDP),
193                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
194                 .flow_priority = 0,
195                 .ip_version = MLX5_IPV6,
196         },
197         [HASH_RXQ_IPV6] = {
198                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
199                                 IBV_RX_HASH_DST_IPV6),
200                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
201                                 ETH_RSS_FRAG_IPV6),
202                 .flow_priority = 1,
203                 .ip_version = MLX5_IPV6,
204         },
205         [HASH_RXQ_ETH] = {
206                 .hash_fields = 0,
207                 .dpdk_rss_hf = 0,
208                 .flow_priority = 2,
209         },
210 };
211
212 /* Number of entries in hash_rxq_init[]. */
213 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
214
215 /** Structure for holding counter stats. */
216 struct mlx5_flow_counter_stats {
217         uint64_t hits; /**< Number of packets matched by the rule. */
218         uint64_t bytes; /**< Number of bytes matched by the rule. */
219 };
220
221 /** Structure for Drop queue. */
222 struct mlx5_hrxq_drop {
223         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
224         struct ibv_qp *qp; /**< Verbs queue pair. */
225         struct ibv_wq *wq; /**< Verbs work queue. */
226         struct ibv_cq *cq; /**< Verbs completion queue. */
227 };
228
229 /* Flows structures. */
230 struct mlx5_flow {
231         uint64_t hash_fields; /**< Fields that participate in the hash. */
232         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
233         struct ibv_flow *ibv_flow; /**< Verbs flow. */
234         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
235 };
236
237 /* Drop flows structures. */
238 struct mlx5_flow_drop {
239         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
240         struct ibv_flow *ibv_flow; /**< Verbs flow. */
241 };
242
243 struct rte_flow {
244         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
245         uint32_t mark:1; /**< Set if the flow is marked. */
246         uint32_t drop:1; /**< Drop queue. */
247         uint16_t queues_n; /**< Number of entries in queue[]. */
248         uint16_t (*queues)[]; /**< Queues indexes to use. */
249         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
250         uint8_t rss_key[40]; /**< copy of the RSS key. */
251         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
252         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
253         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
254         /**< Flow with Rx queue. */
255 };
256
257 /** Static initializer for items. */
258 #define ITEMS(...) \
259         (const enum rte_flow_item_type []){ \
260                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
261         }
262
263 /** Structure to generate a simple graph of layers supported by the NIC. */
264 struct mlx5_flow_items {
265         /** List of possible actions for these items. */
266         const enum rte_flow_action_type *const actions;
267         /** Bit-masks corresponding to the possibilities for the item. */
268         const void *mask;
269         /**
270          * Default bit-masks to use when item->mask is not provided. When
271          * \default_mask is also NULL, the full supported bit-mask (\mask) is
272          * used instead.
273          */
274         const void *default_mask;
275         /** Bit-masks size in bytes. */
276         const unsigned int mask_sz;
277         /**
278          * Conversion function from rte_flow to NIC specific flow.
279          *
280          * @param item
281          *   rte_flow item to convert.
282          * @param default_mask
283          *   Default bit-masks to use when item->mask is not provided.
284          * @param data
285          *   Internal structure to store the conversion.
286          *
287          * @return
288          *   0 on success, negative value otherwise.
289          */
290         int (*convert)(const struct rte_flow_item *item,
291                        const void *default_mask,
292                        void *data);
293         /** Size in bytes of the destination structure. */
294         const unsigned int dst_sz;
295         /** List of possible following items.  */
296         const enum rte_flow_item_type *const items;
297 };
298
299 /** Valid action for this PMD. */
300 static const enum rte_flow_action_type valid_actions[] = {
301         RTE_FLOW_ACTION_TYPE_DROP,
302         RTE_FLOW_ACTION_TYPE_QUEUE,
303         RTE_FLOW_ACTION_TYPE_MARK,
304         RTE_FLOW_ACTION_TYPE_FLAG,
305 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
306         RTE_FLOW_ACTION_TYPE_COUNT,
307 #endif
308         RTE_FLOW_ACTION_TYPE_END,
309 };
310
311 /** Graph of supported items and associated actions. */
312 static const struct mlx5_flow_items mlx5_flow_items[] = {
313         [RTE_FLOW_ITEM_TYPE_END] = {
314                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
315                                RTE_FLOW_ITEM_TYPE_VXLAN),
316         },
317         [RTE_FLOW_ITEM_TYPE_ETH] = {
318                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
319                                RTE_FLOW_ITEM_TYPE_IPV4,
320                                RTE_FLOW_ITEM_TYPE_IPV6),
321                 .actions = valid_actions,
322                 .mask = &(const struct rte_flow_item_eth){
323                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
324                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
325                         .type = -1,
326                 },
327                 .default_mask = &rte_flow_item_eth_mask,
328                 .mask_sz = sizeof(struct rte_flow_item_eth),
329                 .convert = mlx5_flow_create_eth,
330                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
331         },
332         [RTE_FLOW_ITEM_TYPE_VLAN] = {
333                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
334                                RTE_FLOW_ITEM_TYPE_IPV6),
335                 .actions = valid_actions,
336                 .mask = &(const struct rte_flow_item_vlan){
337                         .tci = -1,
338                 },
339                 .default_mask = &rte_flow_item_vlan_mask,
340                 .mask_sz = sizeof(struct rte_flow_item_vlan),
341                 .convert = mlx5_flow_create_vlan,
342                 .dst_sz = 0,
343         },
344         [RTE_FLOW_ITEM_TYPE_IPV4] = {
345                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
346                                RTE_FLOW_ITEM_TYPE_TCP),
347                 .actions = valid_actions,
348                 .mask = &(const struct rte_flow_item_ipv4){
349                         .hdr = {
350                                 .src_addr = -1,
351                                 .dst_addr = -1,
352                                 .type_of_service = -1,
353                                 .next_proto_id = -1,
354                         },
355                 },
356                 .default_mask = &rte_flow_item_ipv4_mask,
357                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
358                 .convert = mlx5_flow_create_ipv4,
359                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
360         },
361         [RTE_FLOW_ITEM_TYPE_IPV6] = {
362                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
363                                RTE_FLOW_ITEM_TYPE_TCP),
364                 .actions = valid_actions,
365                 .mask = &(const struct rte_flow_item_ipv6){
366                         .hdr = {
367                                 .src_addr = {
368                                         0xff, 0xff, 0xff, 0xff,
369                                         0xff, 0xff, 0xff, 0xff,
370                                         0xff, 0xff, 0xff, 0xff,
371                                         0xff, 0xff, 0xff, 0xff,
372                                 },
373                                 .dst_addr = {
374                                         0xff, 0xff, 0xff, 0xff,
375                                         0xff, 0xff, 0xff, 0xff,
376                                         0xff, 0xff, 0xff, 0xff,
377                                         0xff, 0xff, 0xff, 0xff,
378                                 },
379                                 .vtc_flow = -1,
380                                 .proto = -1,
381                                 .hop_limits = -1,
382                         },
383                 },
384                 .default_mask = &rte_flow_item_ipv6_mask,
385                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
386                 .convert = mlx5_flow_create_ipv6,
387                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
388         },
389         [RTE_FLOW_ITEM_TYPE_UDP] = {
390                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
391                 .actions = valid_actions,
392                 .mask = &(const struct rte_flow_item_udp){
393                         .hdr = {
394                                 .src_port = -1,
395                                 .dst_port = -1,
396                         },
397                 },
398                 .default_mask = &rte_flow_item_udp_mask,
399                 .mask_sz = sizeof(struct rte_flow_item_udp),
400                 .convert = mlx5_flow_create_udp,
401                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
402         },
403         [RTE_FLOW_ITEM_TYPE_TCP] = {
404                 .actions = valid_actions,
405                 .mask = &(const struct rte_flow_item_tcp){
406                         .hdr = {
407                                 .src_port = -1,
408                                 .dst_port = -1,
409                         },
410                 },
411                 .default_mask = &rte_flow_item_tcp_mask,
412                 .mask_sz = sizeof(struct rte_flow_item_tcp),
413                 .convert = mlx5_flow_create_tcp,
414                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
415         },
416         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
417                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
418                 .actions = valid_actions,
419                 .mask = &(const struct rte_flow_item_vxlan){
420                         .vni = "\xff\xff\xff",
421                 },
422                 .default_mask = &rte_flow_item_vxlan_mask,
423                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
424                 .convert = mlx5_flow_create_vxlan,
425                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
426         },
427 };
428
429 /** Structure to pass to the conversion function. */
430 struct mlx5_flow_parse {
431         uint32_t inner; /**< Set once VXLAN is encountered. */
432         uint32_t create:1;
433         /**< Whether resources should remain after a validate. */
434         uint32_t drop:1; /**< Target is a drop queue. */
435         uint32_t mark:1; /**< Mark is present in the flow. */
436         uint32_t count:1; /**< Count is present in the flow. */
437         uint32_t mark_id; /**< Mark identifier. */
438         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
439         uint16_t queues_n; /**< Number of entries in queue[]. */
440         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
441         uint8_t rss_key[40]; /**< copy of the RSS key. */
442         enum hash_rxq_type layer; /**< Last pattern layer detected. */
443         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
444         struct {
445                 struct ibv_flow_attr *ibv_attr;
446                 /**< Pointer to Verbs attributes. */
447                 unsigned int offset;
448                 /**< Current position or total size of the attribute. */
449         } queue[RTE_DIM(hash_rxq_init)];
450 };
451
452 static const struct rte_flow_ops mlx5_flow_ops = {
453         .validate = mlx5_flow_validate,
454         .create = mlx5_flow_create,
455         .destroy = mlx5_flow_destroy,
456         .flush = mlx5_flow_flush,
457 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
458         .query = mlx5_flow_query,
459 #else
460         .query = NULL,
461 #endif
462         .isolate = mlx5_flow_isolate,
463 };
464
465 /* Convert FDIR request to Generic flow. */
466 struct mlx5_fdir {
467         struct rte_flow_attr attr;
468         struct rte_flow_action actions[2];
469         struct rte_flow_item items[4];
470         struct rte_flow_item_eth l2;
471         struct rte_flow_item_eth l2_mask;
472         union {
473                 struct rte_flow_item_ipv4 ipv4;
474                 struct rte_flow_item_ipv6 ipv6;
475         } l3;
476         union {
477                 struct rte_flow_item_udp udp;
478                 struct rte_flow_item_tcp tcp;
479         } l4;
480         struct rte_flow_action_queue queue;
481 };
482
483 /* Verbs specification header. */
484 struct ibv_spec_header {
485         enum ibv_flow_spec_type type;
486         uint16_t size;
487 };
488
489 /**
490  * Check support for a given item.
491  *
492  * @param item[in]
493  *   Item specification.
494  * @param mask[in]
495  *   Bit-masks covering supported fields to compare with spec, last and mask in
496  *   \item.
497  * @param size
498  *   Bit-Mask size in bytes.
499  *
500  * @return
501  *   0 on success.
502  */
503 static int
504 mlx5_flow_item_validate(const struct rte_flow_item *item,
505                         const uint8_t *mask, unsigned int size)
506 {
507         int ret = 0;
508
509         if (!item->spec && (item->mask || item->last))
510                 return -1;
511         if (item->spec && !item->mask) {
512                 unsigned int i;
513                 const uint8_t *spec = item->spec;
514
515                 for (i = 0; i < size; ++i)
516                         if ((spec[i] | mask[i]) != mask[i])
517                                 return -1;
518         }
519         if (item->last && !item->mask) {
520                 unsigned int i;
521                 const uint8_t *spec = item->last;
522
523                 for (i = 0; i < size; ++i)
524                         if ((spec[i] | mask[i]) != mask[i])
525                                 return -1;
526         }
527         if (item->mask) {
528                 unsigned int i;
529                 const uint8_t *spec = item->mask;
530
531                 for (i = 0; i < size; ++i)
532                         if ((spec[i] | mask[i]) != mask[i])
533                                 return -1;
534         }
535         if (item->spec && item->last) {
536                 uint8_t spec[size];
537                 uint8_t last[size];
538                 const uint8_t *apply = mask;
539                 unsigned int i;
540
541                 if (item->mask)
542                         apply = item->mask;
543                 for (i = 0; i < size; ++i) {
544                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
545                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
546                 }
547                 ret = memcmp(spec, last, size);
548         }
549         return ret;
550 }
551
552 /**
553  * Copy the RSS configuration from the user ones.
554  *
555  * @param priv
556  *   Pointer to private structure.
557  * @param parser
558  *   Internal parser structure.
559  * @param rss_conf
560  *   User RSS configuration to save.
561  *
562  * @return
563  *   0 on success, errno value on failure.
564  */
565 static int
566 priv_flow_convert_rss_conf(struct priv *priv,
567                            struct mlx5_flow_parse *parser,
568                            const struct rte_eth_rss_conf *rss_conf)
569 {
570         const struct rte_eth_rss_conf *rss =
571                 rss_conf ? rss_conf : &priv->rss_conf;
572
573         if (rss->rss_key_len > 40)
574                 return EINVAL;
575         parser->rss_conf.rss_key_len = rss->rss_key_len;
576         parser->rss_conf.rss_hf = rss->rss_hf;
577         memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
578         parser->rss_conf.rss_key = parser->rss_key;
579         return 0;
580 }
581
582 /**
583  * Extract attribute to the parser.
584  *
585  * @param priv
586  *   Pointer to private structure.
587  * @param[in] attr
588  *   Flow rule attributes.
589  * @param[out] error
590  *   Perform verbose error reporting if not NULL.
591  * @param[in, out] parser
592  *   Internal parser structure.
593  *
594  * @return
595  *   0 on success, a negative errno value otherwise and rte_errno is set.
596  */
597 static int
598 priv_flow_convert_attributes(struct priv *priv,
599                              const struct rte_flow_attr *attr,
600                              struct rte_flow_error *error,
601                              struct mlx5_flow_parse *parser)
602 {
603         (void)priv;
604         (void)parser;
605         if (attr->group) {
606                 rte_flow_error_set(error, ENOTSUP,
607                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
608                                    NULL,
609                                    "groups are not supported");
610                 return -rte_errno;
611         }
612         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
613                 rte_flow_error_set(error, ENOTSUP,
614                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
615                                    NULL,
616                                    "priorities are not supported");
617                 return -rte_errno;
618         }
619         if (attr->egress) {
620                 rte_flow_error_set(error, ENOTSUP,
621                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
622                                    NULL,
623                                    "egress is not supported");
624                 return -rte_errno;
625         }
626         if (!attr->ingress) {
627                 rte_flow_error_set(error, ENOTSUP,
628                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
629                                    NULL,
630                                    "only ingress is supported");
631                 return -rte_errno;
632         }
633         return 0;
634 }
635
636 /**
637  * Extract actions request to the parser.
638  *
639  * @param priv
640  *   Pointer to private structure.
641  * @param[in] actions
642  *   Associated actions (list terminated by the END action).
643  * @param[out] error
644  *   Perform verbose error reporting if not NULL.
645  * @param[in, out] parser
646  *   Internal parser structure.
647  *
648  * @return
649  *   0 on success, a negative errno value otherwise and rte_errno is set.
650  */
651 static int
652 priv_flow_convert_actions(struct priv *priv,
653                           const struct rte_flow_action actions[],
654                           struct rte_flow_error *error,
655                           struct mlx5_flow_parse *parser)
656 {
657         /*
658          * Add default RSS configuration necessary for Verbs to create QP even
659          * if no RSS is necessary.
660          */
661         priv_flow_convert_rss_conf(priv, parser,
662                                    (const struct rte_eth_rss_conf *)
663                                    &priv->rss_conf);
664         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
665                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
666                         continue;
667                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
668                         parser->drop = 1;
669                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
670                         const struct rte_flow_action_queue *queue =
671                                 (const struct rte_flow_action_queue *)
672                                 actions->conf;
673                         uint16_t n;
674                         uint16_t found = 0;
675
676                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
677                                 goto exit_action_not_supported;
678                         for (n = 0; n < parser->queues_n; ++n) {
679                                 if (parser->queues[n] == queue->index) {
680                                         found = 1;
681                                         break;
682                                 }
683                         }
684                         if (parser->queues_n > 1 && !found) {
685                                 rte_flow_error_set(error, ENOTSUP,
686                                            RTE_FLOW_ERROR_TYPE_ACTION,
687                                            actions,
688                                            "queue action not in RSS queues");
689                                 return -rte_errno;
690                         }
691                         if (!found) {
692                                 parser->queues_n = 1;
693                                 parser->queues[0] = queue->index;
694                         }
695                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
696                         const struct rte_flow_action_rss *rss =
697                                 (const struct rte_flow_action_rss *)
698                                 actions->conf;
699                         uint16_t n;
700
701                         if (!rss || !rss->num) {
702                                 rte_flow_error_set(error, EINVAL,
703                                                    RTE_FLOW_ERROR_TYPE_ACTION,
704                                                    actions,
705                                                    "no valid queues");
706                                 return -rte_errno;
707                         }
708                         if (parser->queues_n == 1) {
709                                 uint16_t found = 0;
710
711                                 assert(parser->queues_n);
712                                 for (n = 0; n < rss->num; ++n) {
713                                         if (parser->queues[0] ==
714                                             rss->queue[n]) {
715                                                 found = 1;
716                                                 break;
717                                         }
718                                 }
719                                 if (!found) {
720                                         rte_flow_error_set(error, ENOTSUP,
721                                                    RTE_FLOW_ERROR_TYPE_ACTION,
722                                                    actions,
723                                                    "queue action not in RSS"
724                                                    " queues");
725                                         return -rte_errno;
726                                 }
727                         }
728                         for (n = 0; n < rss->num; ++n) {
729                                 if (rss->queue[n] >= priv->rxqs_n) {
730                                         rte_flow_error_set(error, EINVAL,
731                                                    RTE_FLOW_ERROR_TYPE_ACTION,
732                                                    actions,
733                                                    "queue id > number of"
734                                                    " queues");
735                                         return -rte_errno;
736                                 }
737                         }
738                         for (n = 0; n < rss->num; ++n)
739                                 parser->queues[n] = rss->queue[n];
740                         parser->queues_n = rss->num;
741                         if (priv_flow_convert_rss_conf(priv, parser,
742                                                        rss->rss_conf)) {
743                                 rte_flow_error_set(error, EINVAL,
744                                                    RTE_FLOW_ERROR_TYPE_ACTION,
745                                                    actions,
746                                                    "wrong RSS configuration");
747                                 return -rte_errno;
748                         }
749                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
750                         const struct rte_flow_action_mark *mark =
751                                 (const struct rte_flow_action_mark *)
752                                 actions->conf;
753
754                         if (!mark) {
755                                 rte_flow_error_set(error, EINVAL,
756                                                    RTE_FLOW_ERROR_TYPE_ACTION,
757                                                    actions,
758                                                    "mark must be defined");
759                                 return -rte_errno;
760                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
761                                 rte_flow_error_set(error, ENOTSUP,
762                                                    RTE_FLOW_ERROR_TYPE_ACTION,
763                                                    actions,
764                                                    "mark must be between 0"
765                                                    " and 16777199");
766                                 return -rte_errno;
767                         }
768                         parser->mark = 1;
769                         parser->mark_id = mark->id;
770                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
771                         parser->mark = 1;
772                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
773                            priv->counter_set_supported) {
774                         parser->count = 1;
775                 } else {
776                         goto exit_action_not_supported;
777                 }
778         }
779         if (parser->drop && parser->mark)
780                 parser->mark = 0;
781         if (!parser->queues_n && !parser->drop) {
782                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
783                                    NULL, "no valid action");
784                 return -rte_errno;
785         }
786         return 0;
787 exit_action_not_supported:
788         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
789                            actions, "action not supported");
790         return -rte_errno;
791 }
792
793 /**
794  * Validate items.
795  *
796  * @param priv
797  *   Pointer to private structure.
798  * @param[in] items
799  *   Pattern specification (list terminated by the END pattern item).
800  * @param[out] error
801  *   Perform verbose error reporting if not NULL.
802  * @param[in, out] parser
803  *   Internal parser structure.
804  *
805  * @return
806  *   0 on success, a negative errno value otherwise and rte_errno is set.
807  */
808 static int
809 priv_flow_convert_items_validate(struct priv *priv,
810                                  const struct rte_flow_item items[],
811                                  struct rte_flow_error *error,
812                                  struct mlx5_flow_parse *parser)
813 {
814         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
815         unsigned int i;
816
817         (void)priv;
818         /* Initialise the offsets to start after verbs attribute. */
819         for (i = 0; i != hash_rxq_init_n; ++i)
820                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
821         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
822                 const struct mlx5_flow_items *token = NULL;
823                 unsigned int n;
824                 int err;
825
826                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
827                         continue;
828                 for (i = 0;
829                      cur_item->items &&
830                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
831                      ++i) {
832                         if (cur_item->items[i] == items->type) {
833                                 token = &mlx5_flow_items[items->type];
834                                 break;
835                         }
836                 }
837                 if (!token)
838                         goto exit_item_not_supported;
839                 cur_item = token;
840                 err = mlx5_flow_item_validate(items,
841                                               (const uint8_t *)cur_item->mask,
842                                               cur_item->mask_sz);
843                 if (err)
844                         goto exit_item_not_supported;
845                 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
846                         if (parser->inner) {
847                                 rte_flow_error_set(error, ENOTSUP,
848                                                    RTE_FLOW_ERROR_TYPE_ITEM,
849                                                    items,
850                                                    "cannot recognize multiple"
851                                                    " VXLAN encapsulations");
852                                 return -rte_errno;
853                         }
854                         parser->inner = IBV_FLOW_SPEC_INNER;
855                 }
856                 if (parser->drop || parser->queues_n == 1) {
857                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
858                 } else {
859                         for (n = 0; n != hash_rxq_init_n; ++n)
860                                 parser->queue[n].offset += cur_item->dst_sz;
861                 }
862         }
863         if (parser->mark) {
864                 for (i = 0; i != hash_rxq_init_n; ++i)
865                         parser->queue[i].offset +=
866                                 sizeof(struct ibv_flow_spec_action_tag);
867         }
868         if (parser->count) {
869                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
870
871                 for (i = 0; i != hash_rxq_init_n; ++i)
872                         parser->queue[i].offset += size;
873         }
874         return 0;
875 exit_item_not_supported:
876         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
877                            items, "item not supported");
878         return -rte_errno;
879 }
880
881 /**
882  * Allocate memory space to store verbs flow attributes.
883  *
884  * @param priv
885  *   Pointer to private structure.
886  * @param[in] priority
887  *   Flow priority.
888  * @param[in] size
889  *   Amount of byte to allocate.
890  * @param[out] error
891  *   Perform verbose error reporting if not NULL.
892  *
893  * @return
894  *   A verbs flow attribute on success, NULL otherwise.
895  */
896 static struct ibv_flow_attr*
897 priv_flow_convert_allocate(struct priv *priv,
898                            unsigned int priority,
899                            unsigned int size,
900                            struct rte_flow_error *error)
901 {
902         struct ibv_flow_attr *ibv_attr;
903
904         (void)priv;
905         ibv_attr = rte_calloc(__func__, 1, size, 0);
906         if (!ibv_attr) {
907                 rte_flow_error_set(error, ENOMEM,
908                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
909                                    NULL,
910                                    "cannot allocate verbs spec attributes.");
911                 return NULL;
912         }
913         ibv_attr->priority = priority;
914         return ibv_attr;
915 }
916
917 /**
918  * Finalise verbs flow attributes.
919  *
920  * @param priv
921  *   Pointer to private structure.
922  * @param[in, out] parser
923  *   Internal parser structure.
924  */
925 static void
926 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
927 {
928         const unsigned int ipv4 =
929                 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
930         const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
931         const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
932         const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
933         const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
934         const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
935         unsigned int i;
936
937         (void)priv;
938         if (parser->layer == HASH_RXQ_ETH) {
939                 goto fill;
940         } else {
941                 /*
942                  * This layer becomes useless as the pattern define under
943                  * layers.
944                  */
945                 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
946                 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
947         }
948         /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
949         for (i = ohmin; i != (ohmax + 1); ++i) {
950                 if (!parser->queue[i].ibv_attr)
951                         continue;
952                 rte_free(parser->queue[i].ibv_attr);
953                 parser->queue[i].ibv_attr = NULL;
954         }
955         /* Remove impossible flow according to the RSS configuration. */
956         if (hash_rxq_init[parser->layer].dpdk_rss_hf &
957             parser->rss_conf.rss_hf) {
958                 /* Remove any other flow. */
959                 for (i = hmin; i != (hmax + 1); ++i) {
960                         if ((i == parser->layer) ||
961                              (!parser->queue[i].ibv_attr))
962                                 continue;
963                         rte_free(parser->queue[i].ibv_attr);
964                         parser->queue[i].ibv_attr = NULL;
965                 }
966         } else  if (!parser->queue[ip].ibv_attr) {
967                 /* no RSS possible with the current configuration. */
968                 parser->queues_n = 1;
969                 return;
970         }
971 fill:
972         /*
973          * Fill missing layers in verbs specifications, or compute the correct
974          * offset to allocate the memory space for the attributes and
975          * specifications.
976          */
977         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
978                 union {
979                         struct ibv_flow_spec_ipv4_ext ipv4;
980                         struct ibv_flow_spec_ipv6 ipv6;
981                         struct ibv_flow_spec_tcp_udp udp_tcp;
982                 } specs;
983                 void *dst;
984                 uint16_t size;
985
986                 if (i == parser->layer)
987                         continue;
988                 if (parser->layer == HASH_RXQ_ETH) {
989                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
990                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
991                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
992                                         .type = IBV_FLOW_SPEC_IPV4_EXT,
993                                         .size = size,
994                                 };
995                         } else {
996                                 size = sizeof(struct ibv_flow_spec_ipv6);
997                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
998                                         .type = IBV_FLOW_SPEC_IPV6,
999                                         .size = size,
1000                                 };
1001                         }
1002                         if (parser->queue[i].ibv_attr) {
1003                                 dst = (void *)((uintptr_t)
1004                                                parser->queue[i].ibv_attr +
1005                                                parser->queue[i].offset);
1006                                 memcpy(dst, &specs, size);
1007                                 ++parser->queue[i].ibv_attr->num_of_specs;
1008                         }
1009                         parser->queue[i].offset += size;
1010                 }
1011                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1012                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1013                         size = sizeof(struct ibv_flow_spec_tcp_udp);
1014                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1015                                 .type = ((i == HASH_RXQ_UDPV4 ||
1016                                           i == HASH_RXQ_UDPV6) ?
1017                                          IBV_FLOW_SPEC_UDP :
1018                                          IBV_FLOW_SPEC_TCP),
1019                                 .size = size,
1020                         };
1021                         if (parser->queue[i].ibv_attr) {
1022                                 dst = (void *)((uintptr_t)
1023                                                parser->queue[i].ibv_attr +
1024                                                parser->queue[i].offset);
1025                                 memcpy(dst, &specs, size);
1026                                 ++parser->queue[i].ibv_attr->num_of_specs;
1027                         }
1028                         parser->queue[i].offset += size;
1029                 }
1030         }
1031 }
1032
1033 /**
1034  * Validate and convert a flow supported by the NIC.
1035  *
1036  * @param priv
1037  *   Pointer to private structure.
1038  * @param[in] attr
1039  *   Flow rule attributes.
1040  * @param[in] pattern
1041  *   Pattern specification (list terminated by the END pattern item).
1042  * @param[in] actions
1043  *   Associated actions (list terminated by the END action).
1044  * @param[out] error
1045  *   Perform verbose error reporting if not NULL.
1046  * @param[in, out] parser
1047  *   Internal parser structure.
1048  *
1049  * @return
1050  *   0 on success, a negative errno value otherwise and rte_errno is set.
1051  */
1052 static int
1053 priv_flow_convert(struct priv *priv,
1054                   const struct rte_flow_attr *attr,
1055                   const struct rte_flow_item items[],
1056                   const struct rte_flow_action actions[],
1057                   struct rte_flow_error *error,
1058                   struct mlx5_flow_parse *parser)
1059 {
1060         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1061         unsigned int i;
1062         int ret;
1063
1064         /* First step. Validate the attributes, items and actions. */
1065         *parser = (struct mlx5_flow_parse){
1066                 .create = parser->create,
1067                 .layer = HASH_RXQ_ETH,
1068                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1069         };
1070         ret = priv_flow_convert_attributes(priv, attr, error, parser);
1071         if (ret)
1072                 return ret;
1073         ret = priv_flow_convert_actions(priv, actions, error, parser);
1074         if (ret)
1075                 return ret;
1076         ret = priv_flow_convert_items_validate(priv, items, error, parser);
1077         if (ret)
1078                 return ret;
1079         priv_flow_convert_finalise(priv, parser);
1080         /*
1081          * Second step.
1082          * Allocate the memory space to store verbs specifications.
1083          */
1084         if (parser->drop || parser->queues_n == 1) {
1085                 unsigned int priority =
1086                         attr->priority +
1087                         hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1088                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1089
1090                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1091                         priv_flow_convert_allocate(priv, priority,
1092                                                    offset, error);
1093                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1094                         return ENOMEM;
1095                 parser->queue[HASH_RXQ_ETH].offset =
1096                         sizeof(struct ibv_flow_attr);
1097         } else {
1098                 for (i = 0; i != hash_rxq_init_n; ++i) {
1099                         unsigned int priority =
1100                                 attr->priority +
1101                                 hash_rxq_init[i].flow_priority;
1102                         unsigned int offset;
1103
1104                         if (!(parser->rss_conf.rss_hf &
1105                               hash_rxq_init[i].dpdk_rss_hf) &&
1106                             (i != HASH_RXQ_ETH))
1107                                 continue;
1108                         offset = parser->queue[i].offset;
1109                         parser->queue[i].ibv_attr =
1110                                 priv_flow_convert_allocate(priv, priority,
1111                                                            offset, error);
1112                         if (!parser->queue[i].ibv_attr)
1113                                 goto exit_enomem;
1114                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1115                 }
1116         }
1117         /* Third step. Conversion parse, fill the specifications. */
1118         parser->inner = 0;
1119         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1120                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1121                         continue;
1122                 cur_item = &mlx5_flow_items[items->type];
1123                 ret = cur_item->convert(items,
1124                                         (cur_item->default_mask ?
1125                                          cur_item->default_mask :
1126                                          cur_item->mask),
1127                                         parser);
1128                 if (ret) {
1129                         rte_flow_error_set(error, ret,
1130                                            RTE_FLOW_ERROR_TYPE_ITEM,
1131                                            items, "item not supported");
1132                         goto exit_free;
1133                 }
1134         }
1135         if (parser->mark)
1136                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1137         if (parser->count && parser->create) {
1138                 mlx5_flow_create_count(priv, parser);
1139                 if (!parser->cs)
1140                         goto exit_count_error;
1141         }
1142         /*
1143          * Last step. Complete missing specification to reach the RSS
1144          * configuration.
1145          */
1146         if (parser->queues_n > 1) {
1147                 priv_flow_convert_finalise(priv, parser);
1148         } else {
1149                 /*
1150                  * Action queue have their priority overridden with
1151                  * Ethernet priority, this priority needs to be adjusted to
1152                  * their most specific layer priority.
1153                  */
1154                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1155                         attr->priority +
1156                         hash_rxq_init[parser->layer].flow_priority;
1157         }
1158 exit_free:
1159         /* Only verification is expected, all resources should be released. */
1160         if (!parser->create) {
1161                 for (i = 0; i != hash_rxq_init_n; ++i) {
1162                         if (parser->queue[i].ibv_attr) {
1163                                 rte_free(parser->queue[i].ibv_attr);
1164                                 parser->queue[i].ibv_attr = NULL;
1165                         }
1166                 }
1167         }
1168         return ret;
1169 exit_enomem:
1170         for (i = 0; i != hash_rxq_init_n; ++i) {
1171                 if (parser->queue[i].ibv_attr) {
1172                         rte_free(parser->queue[i].ibv_attr);
1173                         parser->queue[i].ibv_attr = NULL;
1174                 }
1175         }
1176         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1177                            NULL, "cannot allocate verbs spec attributes.");
1178         return ret;
1179 exit_count_error:
1180         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1181                            NULL, "cannot create counter.");
1182         return rte_errno;
1183 }
1184
1185 /**
1186  * Copy the specification created into the flow.
1187  *
1188  * @param parser
1189  *   Internal parser structure.
1190  * @param src
1191  *   Create specification.
1192  * @param size
1193  *   Size in bytes of the specification to copy.
1194  */
1195 static void
1196 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1197                       unsigned int size)
1198 {
1199         unsigned int i;
1200         void *dst;
1201
1202         for (i = 0; i != hash_rxq_init_n; ++i) {
1203                 if (!parser->queue[i].ibv_attr)
1204                         continue;
1205                 /* Specification must be the same l3 type or none. */
1206                 if (parser->layer == HASH_RXQ_ETH ||
1207                     (hash_rxq_init[parser->layer].ip_version ==
1208                      hash_rxq_init[i].ip_version) ||
1209                     (hash_rxq_init[i].ip_version == 0)) {
1210                         dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1211                                         parser->queue[i].offset);
1212                         memcpy(dst, src, size);
1213                         ++parser->queue[i].ibv_attr->num_of_specs;
1214                         parser->queue[i].offset += size;
1215                 }
1216         }
1217 }
1218
1219 /**
1220  * Convert Ethernet item to Verbs specification.
1221  *
1222  * @param item[in]
1223  *   Item specification.
1224  * @param default_mask[in]
1225  *   Default bit-masks to use when item->mask is not provided.
1226  * @param data[in, out]
1227  *   User structure.
1228  */
1229 static int
1230 mlx5_flow_create_eth(const struct rte_flow_item *item,
1231                      const void *default_mask,
1232                      void *data)
1233 {
1234         const struct rte_flow_item_eth *spec = item->spec;
1235         const struct rte_flow_item_eth *mask = item->mask;
1236         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1237         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1238         struct ibv_flow_spec_eth eth = {
1239                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1240                 .size = eth_size,
1241         };
1242
1243         /* Don't update layer for the inner pattern. */
1244         if (!parser->inner)
1245                 parser->layer = HASH_RXQ_ETH;
1246         if (spec) {
1247                 unsigned int i;
1248
1249                 if (!mask)
1250                         mask = default_mask;
1251                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1252                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1253                 eth.val.ether_type = spec->type;
1254                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1255                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1256                 eth.mask.ether_type = mask->type;
1257                 /* Remove unwanted bits from values. */
1258                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1259                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1260                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1261                 }
1262                 eth.val.ether_type &= eth.mask.ether_type;
1263         }
1264         mlx5_flow_create_copy(parser, &eth, eth_size);
1265         return 0;
1266 }
1267
1268 /**
1269  * Convert VLAN item to Verbs specification.
1270  *
1271  * @param item[in]
1272  *   Item specification.
1273  * @param default_mask[in]
1274  *   Default bit-masks to use when item->mask is not provided.
1275  * @param data[in, out]
1276  *   User structure.
1277  */
1278 static int
1279 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1280                       const void *default_mask,
1281                       void *data)
1282 {
1283         const struct rte_flow_item_vlan *spec = item->spec;
1284         const struct rte_flow_item_vlan *mask = item->mask;
1285         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1286         struct ibv_flow_spec_eth *eth;
1287         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1288
1289         if (spec) {
1290                 unsigned int i;
1291                 if (!mask)
1292                         mask = default_mask;
1293
1294                 for (i = 0; i != hash_rxq_init_n; ++i) {
1295                         if (!parser->queue[i].ibv_attr)
1296                                 continue;
1297
1298                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1299                                        parser->queue[i].offset - eth_size);
1300                         eth->val.vlan_tag = spec->tci;
1301                         eth->mask.vlan_tag = mask->tci;
1302                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1303                 }
1304         }
1305         return 0;
1306 }
1307
1308 /**
1309  * Convert IPv4 item to Verbs specification.
1310  *
1311  * @param item[in]
1312  *   Item specification.
1313  * @param default_mask[in]
1314  *   Default bit-masks to use when item->mask is not provided.
1315  * @param data[in, out]
1316  *   User structure.
1317  */
1318 static int
1319 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1320                       const void *default_mask,
1321                       void *data)
1322 {
1323         const struct rte_flow_item_ipv4 *spec = item->spec;
1324         const struct rte_flow_item_ipv4 *mask = item->mask;
1325         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1326         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1327         struct ibv_flow_spec_ipv4_ext ipv4 = {
1328                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1329                 .size = ipv4_size,
1330         };
1331
1332         /* Don't update layer for the inner pattern. */
1333         if (!parser->inner)
1334                 parser->layer = HASH_RXQ_IPV4;
1335         if (spec) {
1336                 if (!mask)
1337                         mask = default_mask;
1338                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1339                         .src_ip = spec->hdr.src_addr,
1340                         .dst_ip = spec->hdr.dst_addr,
1341                         .proto = spec->hdr.next_proto_id,
1342                         .tos = spec->hdr.type_of_service,
1343                 };
1344                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1345                         .src_ip = mask->hdr.src_addr,
1346                         .dst_ip = mask->hdr.dst_addr,
1347                         .proto = mask->hdr.next_proto_id,
1348                         .tos = mask->hdr.type_of_service,
1349                 };
1350                 /* Remove unwanted bits from values. */
1351                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1352                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1353                 ipv4.val.proto &= ipv4.mask.proto;
1354                 ipv4.val.tos &= ipv4.mask.tos;
1355         }
1356         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1357         return 0;
1358 }
1359
1360 /**
1361  * Convert IPv6 item to Verbs specification.
1362  *
1363  * @param item[in]
1364  *   Item specification.
1365  * @param default_mask[in]
1366  *   Default bit-masks to use when item->mask is not provided.
1367  * @param data[in, out]
1368  *   User structure.
1369  */
1370 static int
1371 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1372                       const void *default_mask,
1373                       void *data)
1374 {
1375         const struct rte_flow_item_ipv6 *spec = item->spec;
1376         const struct rte_flow_item_ipv6 *mask = item->mask;
1377         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1378         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1379         struct ibv_flow_spec_ipv6 ipv6 = {
1380                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1381                 .size = ipv6_size,
1382         };
1383
1384         /* Don't update layer for the inner pattern. */
1385         if (!parser->inner)
1386                 parser->layer = HASH_RXQ_IPV6;
1387         if (spec) {
1388                 unsigned int i;
1389
1390                 if (!mask)
1391                         mask = default_mask;
1392                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1393                        RTE_DIM(ipv6.val.src_ip));
1394                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1395                        RTE_DIM(ipv6.val.dst_ip));
1396                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1397                        RTE_DIM(ipv6.mask.src_ip));
1398                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1399                        RTE_DIM(ipv6.mask.dst_ip));
1400                 ipv6.mask.flow_label = mask->hdr.vtc_flow;
1401                 ipv6.mask.next_hdr = mask->hdr.proto;
1402                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1403                 /* Remove unwanted bits from values. */
1404                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1405                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1406                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1407                 }
1408                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1409                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1410                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1411         }
1412         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1413         return 0;
1414 }
1415
1416 /**
1417  * Convert UDP item to Verbs specification.
1418  *
1419  * @param item[in]
1420  *   Item specification.
1421  * @param default_mask[in]
1422  *   Default bit-masks to use when item->mask is not provided.
1423  * @param data[in, out]
1424  *   User structure.
1425  */
1426 static int
1427 mlx5_flow_create_udp(const struct rte_flow_item *item,
1428                      const void *default_mask,
1429                      void *data)
1430 {
1431         const struct rte_flow_item_udp *spec = item->spec;
1432         const struct rte_flow_item_udp *mask = item->mask;
1433         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1434         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1435         struct ibv_flow_spec_tcp_udp udp = {
1436                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1437                 .size = udp_size,
1438         };
1439
1440         /* Don't update layer for the inner pattern. */
1441         if (!parser->inner) {
1442                 if (parser->layer == HASH_RXQ_IPV4)
1443                         parser->layer = HASH_RXQ_UDPV4;
1444                 else
1445                         parser->layer = HASH_RXQ_UDPV6;
1446         }
1447         if (spec) {
1448                 if (!mask)
1449                         mask = default_mask;
1450                 udp.val.dst_port = spec->hdr.dst_port;
1451                 udp.val.src_port = spec->hdr.src_port;
1452                 udp.mask.dst_port = mask->hdr.dst_port;
1453                 udp.mask.src_port = mask->hdr.src_port;
1454                 /* Remove unwanted bits from values. */
1455                 udp.val.src_port &= udp.mask.src_port;
1456                 udp.val.dst_port &= udp.mask.dst_port;
1457         }
1458         mlx5_flow_create_copy(parser, &udp, udp_size);
1459         return 0;
1460 }
1461
1462 /**
1463  * Convert TCP item to Verbs specification.
1464  *
1465  * @param item[in]
1466  *   Item specification.
1467  * @param default_mask[in]
1468  *   Default bit-masks to use when item->mask is not provided.
1469  * @param data[in, out]
1470  *   User structure.
1471  */
1472 static int
1473 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1474                      const void *default_mask,
1475                      void *data)
1476 {
1477         const struct rte_flow_item_tcp *spec = item->spec;
1478         const struct rte_flow_item_tcp *mask = item->mask;
1479         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1480         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1481         struct ibv_flow_spec_tcp_udp tcp = {
1482                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1483                 .size = tcp_size,
1484         };
1485
1486         /* Don't update layer for the inner pattern. */
1487         if (!parser->inner) {
1488                 if (parser->layer == HASH_RXQ_IPV4)
1489                         parser->layer = HASH_RXQ_TCPV4;
1490                 else
1491                         parser->layer = HASH_RXQ_TCPV6;
1492         }
1493         if (spec) {
1494                 if (!mask)
1495                         mask = default_mask;
1496                 tcp.val.dst_port = spec->hdr.dst_port;
1497                 tcp.val.src_port = spec->hdr.src_port;
1498                 tcp.mask.dst_port = mask->hdr.dst_port;
1499                 tcp.mask.src_port = mask->hdr.src_port;
1500                 /* Remove unwanted bits from values. */
1501                 tcp.val.src_port &= tcp.mask.src_port;
1502                 tcp.val.dst_port &= tcp.mask.dst_port;
1503         }
1504         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1505         return 0;
1506 }
1507
1508 /**
1509  * Convert VXLAN item to Verbs specification.
1510  *
1511  * @param item[in]
1512  *   Item specification.
1513  * @param default_mask[in]
1514  *   Default bit-masks to use when item->mask is not provided.
1515  * @param data[in, out]
1516  *   User structure.
1517  */
1518 static int
1519 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1520                        const void *default_mask,
1521                        void *data)
1522 {
1523         const struct rte_flow_item_vxlan *spec = item->spec;
1524         const struct rte_flow_item_vxlan *mask = item->mask;
1525         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1526         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1527         struct ibv_flow_spec_tunnel vxlan = {
1528                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1529                 .size = size,
1530         };
1531         union vni {
1532                 uint32_t vlan_id;
1533                 uint8_t vni[4];
1534         } id;
1535
1536         id.vni[0] = 0;
1537         parser->inner = IBV_FLOW_SPEC_INNER;
1538         if (spec) {
1539                 if (!mask)
1540                         mask = default_mask;
1541                 memcpy(&id.vni[1], spec->vni, 3);
1542                 vxlan.val.tunnel_id = id.vlan_id;
1543                 memcpy(&id.vni[1], mask->vni, 3);
1544                 vxlan.mask.tunnel_id = id.vlan_id;
1545                 /* Remove unwanted bits from values. */
1546                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1547         }
1548         /*
1549          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1550          * layer is defined in the Verbs specification it is interpreted as
1551          * wildcard and all packets will match this rule, if it follows a full
1552          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1553          * before will also match this rule.
1554          * To avoid such situation, VNI 0 is currently refused.
1555          */
1556         if (!vxlan.val.tunnel_id)
1557                 return EINVAL;
1558         mlx5_flow_create_copy(parser, &vxlan, size);
1559         return 0;
1560 }
1561
1562 /**
1563  * Convert mark/flag action to Verbs specification.
1564  *
1565  * @param parser
1566  *   Internal parser structure.
1567  * @param mark_id
1568  *   Mark identifier.
1569  */
1570 static int
1571 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1572 {
1573         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1574         struct ibv_flow_spec_action_tag tag = {
1575                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1576                 .size = size,
1577                 .tag_id = mlx5_flow_mark_set(mark_id),
1578         };
1579
1580         assert(parser->mark);
1581         mlx5_flow_create_copy(parser, &tag, size);
1582         return 0;
1583 }
1584
1585 /**
1586  * Convert count action to Verbs specification.
1587  *
1588  * @param priv
1589  *   Pointer to private structure.
1590  * @param parser
1591  *   Pointer to MLX5 flow parser structure.
1592  *
1593  * @return
1594  *   0 on success, errno value on failure.
1595  */
1596 static int
1597 mlx5_flow_create_count(struct priv *priv __rte_unused,
1598                        struct mlx5_flow_parse *parser __rte_unused)
1599 {
1600 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1601         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1602         struct ibv_counter_set_init_attr init_attr = {0};
1603         struct ibv_flow_spec_counter_action counter = {
1604                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1605                 .size = size,
1606                 .counter_set_handle = 0,
1607         };
1608
1609         init_attr.counter_set_id = 0;
1610         parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
1611         if (!parser->cs)
1612                 return EINVAL;
1613         counter.counter_set_handle = parser->cs->handle;
1614         mlx5_flow_create_copy(parser, &counter, size);
1615 #endif
1616         return 0;
1617 }
1618
1619 /**
1620  * Complete flow rule creation with a drop queue.
1621  *
1622  * @param priv
1623  *   Pointer to private structure.
1624  * @param parser
1625  *   Internal parser structure.
1626  * @param flow
1627  *   Pointer to the rte_flow.
1628  * @param[out] error
1629  *   Perform verbose error reporting if not NULL.
1630  *
1631  * @return
1632  *   0 on success, errno value on failure.
1633  */
1634 static int
1635 priv_flow_create_action_queue_drop(struct priv *priv,
1636                                    struct mlx5_flow_parse *parser,
1637                                    struct rte_flow *flow,
1638                                    struct rte_flow_error *error)
1639 {
1640         struct ibv_flow_spec_action_drop *drop;
1641         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1642         int err = 0;
1643
1644         assert(priv->pd);
1645         assert(priv->ctx);
1646         flow->drop = 1;
1647         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1648                         parser->queue[HASH_RXQ_ETH].offset);
1649         *drop = (struct ibv_flow_spec_action_drop){
1650                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1651                         .size = size,
1652         };
1653         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1654         parser->queue[HASH_RXQ_ETH].offset += size;
1655         flow->frxq[HASH_RXQ_ETH].ibv_attr =
1656                 parser->queue[HASH_RXQ_ETH].ibv_attr;
1657         if (parser->count)
1658                 flow->cs = parser->cs;
1659         if (!priv->dev->data->dev_started)
1660                 return 0;
1661         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1662         flow->frxq[HASH_RXQ_ETH].ibv_flow =
1663                 ibv_create_flow(priv->flow_drop_queue->qp,
1664                                 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1665         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1666                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1667                                    NULL, "flow rule creation failure");
1668                 err = ENOMEM;
1669                 goto error;
1670         }
1671         return 0;
1672 error:
1673         assert(flow);
1674         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1675                 claim_zero(ibv_destroy_flow(flow->frxq[HASH_RXQ_ETH].ibv_flow));
1676                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1677         }
1678         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1679                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1680                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1681         }
1682         if (flow->cs) {
1683                 claim_zero(ibv_destroy_counter_set(flow->cs));
1684                 flow->cs = NULL;
1685                 parser->cs = NULL;
1686         }
1687         return err;
1688 }
1689
1690 /**
1691  * Create hash Rx queues when RSS is enabled.
1692  *
1693  * @param priv
1694  *   Pointer to private structure.
1695  * @param parser
1696  *   Internal parser structure.
1697  * @param flow
1698  *   Pointer to the rte_flow.
1699  * @param[out] error
1700  *   Perform verbose error reporting if not NULL.
1701  *
1702  * @return
1703  *   0 on success, a errno value otherwise and rte_errno is set.
1704  */
1705 static int
1706 priv_flow_create_action_queue_rss(struct priv *priv,
1707                                   struct mlx5_flow_parse *parser,
1708                                   struct rte_flow *flow,
1709                                   struct rte_flow_error *error)
1710 {
1711         unsigned int i;
1712
1713         for (i = 0; i != hash_rxq_init_n; ++i) {
1714                 uint64_t hash_fields;
1715
1716                 if (!parser->queue[i].ibv_attr)
1717                         continue;
1718                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1719                 parser->queue[i].ibv_attr = NULL;
1720                 hash_fields = hash_rxq_init[i].hash_fields;
1721                 if (!priv->dev->data->dev_started)
1722                         continue;
1723                 flow->frxq[i].hrxq =
1724                         mlx5_priv_hrxq_get(priv,
1725                                            parser->rss_conf.rss_key,
1726                                            parser->rss_conf.rss_key_len,
1727                                            hash_fields,
1728                                            parser->queues,
1729                                            parser->queues_n);
1730                 if (flow->frxq[i].hrxq)
1731                         continue;
1732                 flow->frxq[i].hrxq =
1733                         mlx5_priv_hrxq_new(priv,
1734                                            parser->rss_conf.rss_key,
1735                                            parser->rss_conf.rss_key_len,
1736                                            hash_fields,
1737                                            parser->queues,
1738                                            parser->queues_n);
1739                 if (!flow->frxq[i].hrxq) {
1740                         rte_flow_error_set(error, ENOMEM,
1741                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1742                                            NULL, "cannot create hash rxq");
1743                         return ENOMEM;
1744                 }
1745         }
1746         return 0;
1747 }
1748
1749 /**
1750  * Complete flow rule creation.
1751  *
1752  * @param priv
1753  *   Pointer to private structure.
1754  * @param parser
1755  *   Internal parser structure.
1756  * @param flow
1757  *   Pointer to the rte_flow.
1758  * @param[out] error
1759  *   Perform verbose error reporting if not NULL.
1760  *
1761  * @return
1762  *   0 on success, a errno value otherwise and rte_errno is set.
1763  */
1764 static int
1765 priv_flow_create_action_queue(struct priv *priv,
1766                               struct mlx5_flow_parse *parser,
1767                               struct rte_flow *flow,
1768                               struct rte_flow_error *error)
1769 {
1770         int err = 0;
1771         unsigned int i;
1772
1773         assert(priv->pd);
1774         assert(priv->ctx);
1775         assert(!parser->drop);
1776         err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1777         if (err)
1778                 goto error;
1779         if (parser->count)
1780                 flow->cs = parser->cs;
1781         if (!priv->dev->data->dev_started)
1782                 return 0;
1783         for (i = 0; i != hash_rxq_init_n; ++i) {
1784                 if (!flow->frxq[i].hrxq)
1785                         continue;
1786                 flow->frxq[i].ibv_flow =
1787                         ibv_create_flow(flow->frxq[i].hrxq->qp,
1788                                         flow->frxq[i].ibv_attr);
1789                 if (!flow->frxq[i].ibv_flow) {
1790                         rte_flow_error_set(error, ENOMEM,
1791                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1792                                            NULL, "flow rule creation failure");
1793                         err = ENOMEM;
1794                         goto error;
1795                 }
1796                 DEBUG("%p type %d QP %p ibv_flow %p",
1797                       (void *)flow, i,
1798                       (void *)flow->frxq[i].hrxq,
1799                       (void *)flow->frxq[i].ibv_flow);
1800         }
1801         for (i = 0; i != parser->queues_n; ++i) {
1802                 struct mlx5_rxq_data *q =
1803                         (*priv->rxqs)[parser->queues[i]];
1804
1805                 q->mark |= parser->mark;
1806         }
1807         return 0;
1808 error:
1809         assert(flow);
1810         for (i = 0; i != hash_rxq_init_n; ++i) {
1811                 if (flow->frxq[i].ibv_flow) {
1812                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1813
1814                         claim_zero(ibv_destroy_flow(ibv_flow));
1815                 }
1816                 if (flow->frxq[i].hrxq)
1817                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1818                 if (flow->frxq[i].ibv_attr)
1819                         rte_free(flow->frxq[i].ibv_attr);
1820         }
1821         if (flow->cs) {
1822                 claim_zero(ibv_destroy_counter_set(flow->cs));
1823                 flow->cs = NULL;
1824                 parser->cs = NULL;
1825         }
1826         return err;
1827 }
1828
1829 /**
1830  * Convert a flow.
1831  *
1832  * @param priv
1833  *   Pointer to private structure.
1834  * @param list
1835  *   Pointer to a TAILQ flow list.
1836  * @param[in] attr
1837  *   Flow rule attributes.
1838  * @param[in] pattern
1839  *   Pattern specification (list terminated by the END pattern item).
1840  * @param[in] actions
1841  *   Associated actions (list terminated by the END action).
1842  * @param[out] error
1843  *   Perform verbose error reporting if not NULL.
1844  *
1845  * @return
1846  *   A flow on success, NULL otherwise.
1847  */
1848 static struct rte_flow *
1849 priv_flow_create(struct priv *priv,
1850                  struct mlx5_flows *list,
1851                  const struct rte_flow_attr *attr,
1852                  const struct rte_flow_item items[],
1853                  const struct rte_flow_action actions[],
1854                  struct rte_flow_error *error)
1855 {
1856         struct mlx5_flow_parse parser = { .create = 1, };
1857         struct rte_flow *flow = NULL;
1858         unsigned int i;
1859         int err;
1860
1861         err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1862         if (err)
1863                 goto exit;
1864         flow = rte_calloc(__func__, 1,
1865                           sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1866                           0);
1867         if (!flow) {
1868                 rte_flow_error_set(error, ENOMEM,
1869                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1870                                    NULL,
1871                                    "cannot allocate flow memory");
1872                 return NULL;
1873         }
1874         /* Copy queues configuration. */
1875         flow->queues = (uint16_t (*)[])(flow + 1);
1876         memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1877         flow->queues_n = parser.queues_n;
1878         flow->mark = parser.mark;
1879         /* Copy RSS configuration. */
1880         flow->rss_conf = parser.rss_conf;
1881         flow->rss_conf.rss_key = flow->rss_key;
1882         memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1883         /* finalise the flow. */
1884         if (parser.drop)
1885                 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1886                                                          error);
1887         else
1888                 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1889         if (err)
1890                 goto exit;
1891         TAILQ_INSERT_TAIL(list, flow, next);
1892         DEBUG("Flow created %p", (void *)flow);
1893         return flow;
1894 exit:
1895         for (i = 0; i != hash_rxq_init_n; ++i) {
1896                 if (parser.queue[i].ibv_attr)
1897                         rte_free(parser.queue[i].ibv_attr);
1898         }
1899         rte_free(flow);
1900         return NULL;
1901 }
1902
1903 /**
1904  * Validate a flow supported by the NIC.
1905  *
1906  * @see rte_flow_validate()
1907  * @see rte_flow_ops
1908  */
1909 int
1910 mlx5_flow_validate(struct rte_eth_dev *dev,
1911                    const struct rte_flow_attr *attr,
1912                    const struct rte_flow_item items[],
1913                    const struct rte_flow_action actions[],
1914                    struct rte_flow_error *error)
1915 {
1916         struct priv *priv = dev->data->dev_private;
1917         int ret;
1918         struct mlx5_flow_parse parser = { .create = 0, };
1919
1920         priv_lock(priv);
1921         ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1922         priv_unlock(priv);
1923         return ret;
1924 }
1925
1926 /**
1927  * Create a flow.
1928  *
1929  * @see rte_flow_create()
1930  * @see rte_flow_ops
1931  */
1932 struct rte_flow *
1933 mlx5_flow_create(struct rte_eth_dev *dev,
1934                  const struct rte_flow_attr *attr,
1935                  const struct rte_flow_item items[],
1936                  const struct rte_flow_action actions[],
1937                  struct rte_flow_error *error)
1938 {
1939         struct priv *priv = dev->data->dev_private;
1940         struct rte_flow *flow;
1941
1942         priv_lock(priv);
1943         flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1944                                 error);
1945         priv_unlock(priv);
1946         return flow;
1947 }
1948
1949 /**
1950  * Destroy a flow.
1951  *
1952  * @param priv
1953  *   Pointer to private structure.
1954  * @param list
1955  *   Pointer to a TAILQ flow list.
1956  * @param[in] flow
1957  *   Flow to destroy.
1958  */
1959 static void
1960 priv_flow_destroy(struct priv *priv,
1961                   struct mlx5_flows *list,
1962                   struct rte_flow *flow)
1963 {
1964         unsigned int i;
1965
1966         if (flow->drop || !flow->mark)
1967                 goto free;
1968         for (i = 0; i != flow->queues_n; ++i) {
1969                 struct rte_flow *tmp;
1970                 int mark = 0;
1971
1972                 /*
1973                  * To remove the mark from the queue, the queue must not be
1974                  * present in any other marked flow (RSS or not).
1975                  */
1976                 TAILQ_FOREACH(tmp, list, next) {
1977                         unsigned int j;
1978                         uint16_t *tqs = NULL;
1979                         uint16_t tq_n = 0;
1980
1981                         if (!tmp->mark)
1982                                 continue;
1983                         for (j = 0; j != hash_rxq_init_n; ++j) {
1984                                 if (!tmp->frxq[j].hrxq)
1985                                         continue;
1986                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
1987                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1988                         }
1989                         if (!tq_n)
1990                                 continue;
1991                         for (j = 0; (j != tq_n) && !mark; j++)
1992                                 if (tqs[j] == (*flow->queues)[i])
1993                                         mark = 1;
1994                 }
1995                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
1996         }
1997 free:
1998         if (flow->drop) {
1999                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2000                         claim_zero(ibv_destroy_flow
2001                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2002                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2003         } else {
2004                 for (i = 0; i != hash_rxq_init_n; ++i) {
2005                         struct mlx5_flow *frxq = &flow->frxq[i];
2006
2007                         if (frxq->ibv_flow)
2008                                 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
2009                         if (frxq->hrxq)
2010                                 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2011                         if (frxq->ibv_attr)
2012                                 rte_free(frxq->ibv_attr);
2013                 }
2014         }
2015         if (flow->cs) {
2016                 claim_zero(ibv_destroy_counter_set(flow->cs));
2017                 flow->cs = NULL;
2018         }
2019         TAILQ_REMOVE(list, flow, next);
2020         DEBUG("Flow destroyed %p", (void *)flow);
2021         rte_free(flow);
2022 }
2023
2024 /**
2025  * Destroy all flows.
2026  *
2027  * @param priv
2028  *   Pointer to private structure.
2029  * @param list
2030  *   Pointer to a TAILQ flow list.
2031  */
2032 void
2033 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2034 {
2035         while (!TAILQ_EMPTY(list)) {
2036                 struct rte_flow *flow;
2037
2038                 flow = TAILQ_FIRST(list);
2039                 priv_flow_destroy(priv, list, flow);
2040         }
2041 }
2042
2043 /**
2044  * Create drop queue.
2045  *
2046  * @param priv
2047  *   Pointer to private structure.
2048  *
2049  * @return
2050  *   0 on success.
2051  */
2052 int
2053 priv_flow_create_drop_queue(struct priv *priv)
2054 {
2055         struct mlx5_hrxq_drop *fdq = NULL;
2056
2057         assert(priv->pd);
2058         assert(priv->ctx);
2059         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2060         if (!fdq) {
2061                 WARN("cannot allocate memory for drop queue");
2062                 goto error;
2063         }
2064         fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
2065         if (!fdq->cq) {
2066                 WARN("cannot allocate CQ for drop queue");
2067                 goto error;
2068         }
2069         fdq->wq = ibv_create_wq(priv->ctx,
2070                         &(struct ibv_wq_init_attr){
2071                         .wq_type = IBV_WQT_RQ,
2072                         .max_wr = 1,
2073                         .max_sge = 1,
2074                         .pd = priv->pd,
2075                         .cq = fdq->cq,
2076                         });
2077         if (!fdq->wq) {
2078                 WARN("cannot allocate WQ for drop queue");
2079                 goto error;
2080         }
2081         fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
2082                         &(struct ibv_rwq_ind_table_init_attr){
2083                         .log_ind_tbl_size = 0,
2084                         .ind_tbl = &fdq->wq,
2085                         .comp_mask = 0,
2086                         });
2087         if (!fdq->ind_table) {
2088                 WARN("cannot allocate indirection table for drop queue");
2089                 goto error;
2090         }
2091         fdq->qp = ibv_create_qp_ex(priv->ctx,
2092                 &(struct ibv_qp_init_attr_ex){
2093                         .qp_type = IBV_QPT_RAW_PACKET,
2094                         .comp_mask =
2095                                 IBV_QP_INIT_ATTR_PD |
2096                                 IBV_QP_INIT_ATTR_IND_TABLE |
2097                                 IBV_QP_INIT_ATTR_RX_HASH,
2098                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2099                                 .rx_hash_function =
2100                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2101                                 .rx_hash_key_len = rss_hash_default_key_len,
2102                                 .rx_hash_key = rss_hash_default_key,
2103                                 .rx_hash_fields_mask = 0,
2104                                 },
2105                         .rwq_ind_tbl = fdq->ind_table,
2106                         .pd = priv->pd
2107                 });
2108         if (!fdq->qp) {
2109                 WARN("cannot allocate QP for drop queue");
2110                 goto error;
2111         }
2112         priv->flow_drop_queue = fdq;
2113         return 0;
2114 error:
2115         if (fdq->qp)
2116                 claim_zero(ibv_destroy_qp(fdq->qp));
2117         if (fdq->ind_table)
2118                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2119         if (fdq->wq)
2120                 claim_zero(ibv_destroy_wq(fdq->wq));
2121         if (fdq->cq)
2122                 claim_zero(ibv_destroy_cq(fdq->cq));
2123         if (fdq)
2124                 rte_free(fdq);
2125         priv->flow_drop_queue = NULL;
2126         return -1;
2127 }
2128
2129 /**
2130  * Delete drop queue.
2131  *
2132  * @param priv
2133  *   Pointer to private structure.
2134  */
2135 void
2136 priv_flow_delete_drop_queue(struct priv *priv)
2137 {
2138         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2139
2140         if (!fdq)
2141                 return;
2142         if (fdq->qp)
2143                 claim_zero(ibv_destroy_qp(fdq->qp));
2144         if (fdq->ind_table)
2145                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2146         if (fdq->wq)
2147                 claim_zero(ibv_destroy_wq(fdq->wq));
2148         if (fdq->cq)
2149                 claim_zero(ibv_destroy_cq(fdq->cq));
2150         rte_free(fdq);
2151         priv->flow_drop_queue = NULL;
2152 }
2153
2154 /**
2155  * Remove all flows.
2156  *
2157  * @param priv
2158  *   Pointer to private structure.
2159  * @param list
2160  *   Pointer to a TAILQ flow list.
2161  */
2162 void
2163 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2164 {
2165         struct rte_flow *flow;
2166
2167         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2168                 unsigned int i;
2169
2170                 if (flow->drop) {
2171                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2172                                 continue;
2173                         claim_zero(ibv_destroy_flow
2174                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2175                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2176                         /* Next flow. */
2177                         continue;
2178                 }
2179                 if (flow->mark) {
2180                         struct mlx5_ind_table_ibv *ind_tbl = NULL;
2181
2182                         for (i = 0; i != hash_rxq_init_n; ++i) {
2183                                 if (!flow->frxq[i].hrxq)
2184                                         continue;
2185                                 ind_tbl = flow->frxq[i].hrxq->ind_table;
2186                         }
2187                         assert(ind_tbl);
2188                         for (i = 0; i != ind_tbl->queues_n; ++i)
2189                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2190                 }
2191                 for (i = 0; i != hash_rxq_init_n; ++i) {
2192                         if (!flow->frxq[i].ibv_flow)
2193                                 continue;
2194                         claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2195                         flow->frxq[i].ibv_flow = NULL;
2196                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2197                         flow->frxq[i].hrxq = NULL;
2198                 }
2199                 DEBUG("Flow %p removed", (void *)flow);
2200         }
2201 }
2202
2203 /**
2204  * Add all flows.
2205  *
2206  * @param priv
2207  *   Pointer to private structure.
2208  * @param list
2209  *   Pointer to a TAILQ flow list.
2210  *
2211  * @return
2212  *   0 on success, a errno value otherwise and rte_errno is set.
2213  */
2214 int
2215 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2216 {
2217         struct rte_flow *flow;
2218
2219         TAILQ_FOREACH(flow, list, next) {
2220                 unsigned int i;
2221
2222                 if (flow->drop) {
2223                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2224                                 ibv_create_flow
2225                                 (priv->flow_drop_queue->qp,
2226                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2227                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2228                                 DEBUG("Flow %p cannot be applied",
2229                                       (void *)flow);
2230                                 rte_errno = EINVAL;
2231                                 return rte_errno;
2232                         }
2233                         DEBUG("Flow %p applied", (void *)flow);
2234                         /* Next flow. */
2235                         continue;
2236                 }
2237                 for (i = 0; i != hash_rxq_init_n; ++i) {
2238                         if (!flow->frxq[i].ibv_attr)
2239                                 continue;
2240                         flow->frxq[i].hrxq =
2241                                 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2242                                                    flow->rss_conf.rss_key_len,
2243                                                    hash_rxq_init[i].hash_fields,
2244                                                    (*flow->queues),
2245                                                    flow->queues_n);
2246                         if (flow->frxq[i].hrxq)
2247                                 goto flow_create;
2248                         flow->frxq[i].hrxq =
2249                                 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2250                                                    flow->rss_conf.rss_key_len,
2251                                                    hash_rxq_init[i].hash_fields,
2252                                                    (*flow->queues),
2253                                                    flow->queues_n);
2254                         if (!flow->frxq[i].hrxq) {
2255                                 DEBUG("Flow %p cannot be applied",
2256                                       (void *)flow);
2257                                 rte_errno = EINVAL;
2258                                 return rte_errno;
2259                         }
2260 flow_create:
2261                         flow->frxq[i].ibv_flow =
2262                                 ibv_create_flow(flow->frxq[i].hrxq->qp,
2263                                                 flow->frxq[i].ibv_attr);
2264                         if (!flow->frxq[i].ibv_flow) {
2265                                 DEBUG("Flow %p cannot be applied",
2266                                       (void *)flow);
2267                                 rte_errno = EINVAL;
2268                                 return rte_errno;
2269                         }
2270                         DEBUG("Flow %p applied", (void *)flow);
2271                 }
2272                 if (!flow->mark)
2273                         continue;
2274                 for (i = 0; i != flow->queues_n; ++i)
2275                         (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2276         }
2277         return 0;
2278 }
2279
2280 /**
2281  * Verify the flow list is empty
2282  *
2283  * @param priv
2284  *  Pointer to private structure.
2285  *
2286  * @return the number of flows not released.
2287  */
2288 int
2289 priv_flow_verify(struct priv *priv)
2290 {
2291         struct rte_flow *flow;
2292         int ret = 0;
2293
2294         TAILQ_FOREACH(flow, &priv->flows, next) {
2295                 DEBUG("%p: flow %p still referenced", (void *)priv,
2296                       (void *)flow);
2297                 ++ret;
2298         }
2299         return ret;
2300 }
2301
2302 /**
2303  * Enable a control flow configured from the control plane.
2304  *
2305  * @param dev
2306  *   Pointer to Ethernet device.
2307  * @param eth_spec
2308  *   An Ethernet flow spec to apply.
2309  * @param eth_mask
2310  *   An Ethernet flow mask to apply.
2311  * @param vlan_spec
2312  *   A VLAN flow spec to apply.
2313  * @param vlan_mask
2314  *   A VLAN flow mask to apply.
2315  *
2316  * @return
2317  *   0 on success.
2318  */
2319 int
2320 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2321                     struct rte_flow_item_eth *eth_spec,
2322                     struct rte_flow_item_eth *eth_mask,
2323                     struct rte_flow_item_vlan *vlan_spec,
2324                     struct rte_flow_item_vlan *vlan_mask)
2325 {
2326         struct priv *priv = dev->data->dev_private;
2327         const struct rte_flow_attr attr = {
2328                 .ingress = 1,
2329                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2330         };
2331         struct rte_flow_item items[] = {
2332                 {
2333                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2334                         .spec = eth_spec,
2335                         .last = NULL,
2336                         .mask = eth_mask,
2337                 },
2338                 {
2339                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2340                                 RTE_FLOW_ITEM_TYPE_END,
2341                         .spec = vlan_spec,
2342                         .last = NULL,
2343                         .mask = vlan_mask,
2344                 },
2345                 {
2346                         .type = RTE_FLOW_ITEM_TYPE_END,
2347                 },
2348         };
2349         struct rte_flow_action actions[] = {
2350                 {
2351                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2352                 },
2353                 {
2354                         .type = RTE_FLOW_ACTION_TYPE_END,
2355                 },
2356         };
2357         struct rte_flow *flow;
2358         struct rte_flow_error error;
2359         unsigned int i;
2360         union {
2361                 struct rte_flow_action_rss rss;
2362                 struct {
2363                         const struct rte_eth_rss_conf *rss_conf;
2364                         uint16_t num;
2365                         uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2366                 } local;
2367         } action_rss;
2368
2369         if (!priv->reta_idx_n)
2370                 return EINVAL;
2371         for (i = 0; i != priv->reta_idx_n; ++i)
2372                 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2373         action_rss.local.rss_conf = &priv->rss_conf;
2374         action_rss.local.num = priv->reta_idx_n;
2375         actions[0].conf = (const void *)&action_rss.rss;
2376         flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2377                                 &error);
2378         if (!flow)
2379                 return rte_errno;
2380         return 0;
2381 }
2382
2383 /**
2384  * Enable a flow control configured from the control plane.
2385  *
2386  * @param dev
2387  *   Pointer to Ethernet device.
2388  * @param eth_spec
2389  *   An Ethernet flow spec to apply.
2390  * @param eth_mask
2391  *   An Ethernet flow mask to apply.
2392  *
2393  * @return
2394  *   0 on success.
2395  */
2396 int
2397 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2398                struct rte_flow_item_eth *eth_spec,
2399                struct rte_flow_item_eth *eth_mask)
2400 {
2401         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2402 }
2403
2404 /**
2405  * Destroy a flow.
2406  *
2407  * @see rte_flow_destroy()
2408  * @see rte_flow_ops
2409  */
2410 int
2411 mlx5_flow_destroy(struct rte_eth_dev *dev,
2412                   struct rte_flow *flow,
2413                   struct rte_flow_error *error)
2414 {
2415         struct priv *priv = dev->data->dev_private;
2416
2417         (void)error;
2418         priv_lock(priv);
2419         priv_flow_destroy(priv, &priv->flows, flow);
2420         priv_unlock(priv);
2421         return 0;
2422 }
2423
2424 /**
2425  * Destroy all flows.
2426  *
2427  * @see rte_flow_flush()
2428  * @see rte_flow_ops
2429  */
2430 int
2431 mlx5_flow_flush(struct rte_eth_dev *dev,
2432                 struct rte_flow_error *error)
2433 {
2434         struct priv *priv = dev->data->dev_private;
2435
2436         (void)error;
2437         priv_lock(priv);
2438         priv_flow_flush(priv, &priv->flows);
2439         priv_unlock(priv);
2440         return 0;
2441 }
2442
2443 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2444 /**
2445  * Query flow counter.
2446  *
2447  * @param cs
2448  *   the counter set.
2449  * @param counter_value
2450  *   returned data from the counter.
2451  *
2452  * @return
2453  *   0 on success, a errno value otherwise and rte_errno is set.
2454  */
2455 static int
2456 priv_flow_query_count(struct ibv_counter_set *cs,
2457                       struct mlx5_flow_counter_stats *counter_stats,
2458                       struct rte_flow_query_count *query_count,
2459                       struct rte_flow_error *error)
2460 {
2461         uint64_t counters[2];
2462         struct ibv_query_counter_set_attr query_cs_attr = {
2463                 .cs = cs,
2464                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2465         };
2466         struct ibv_counter_set_data query_out = {
2467                 .out = counters,
2468                 .outlen = 2 * sizeof(uint64_t),
2469         };
2470         int res = ibv_query_counter_set(&query_cs_attr, &query_out);
2471
2472         if (res) {
2473                 rte_flow_error_set(error, -res,
2474                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2475                                    NULL,
2476                                    "cannot read counter");
2477                 return -res;
2478         }
2479         query_count->hits_set = 1;
2480         query_count->bytes_set = 1;
2481         query_count->hits = counters[0] - counter_stats->hits;
2482         query_count->bytes = counters[1] - counter_stats->bytes;
2483         if (query_count->reset) {
2484                 counter_stats->hits = counters[0];
2485                 counter_stats->bytes = counters[1];
2486         }
2487         return 0;
2488 }
2489
2490 /**
2491  * Query a flows.
2492  *
2493  * @see rte_flow_query()
2494  * @see rte_flow_ops
2495  */
2496 int
2497 mlx5_flow_query(struct rte_eth_dev *dev,
2498                 struct rte_flow *flow,
2499                 enum rte_flow_action_type action __rte_unused,
2500                 void *data,
2501                 struct rte_flow_error *error)
2502 {
2503         struct priv *priv = dev->data->dev_private;
2504         int res = EINVAL;
2505
2506         priv_lock(priv);
2507         if (flow->cs) {
2508                 res = priv_flow_query_count(flow->cs,
2509                                         &flow->counter_stats,
2510                                         (struct rte_flow_query_count *)data,
2511                                         error);
2512         } else {
2513                 rte_flow_error_set(error, res,
2514                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2515                                    NULL,
2516                                    "no counter found for flow");
2517         }
2518         priv_unlock(priv);
2519         return -res;
2520 }
2521 #endif
2522
2523 /**
2524  * Isolated mode.
2525  *
2526  * @see rte_flow_isolate()
2527  * @see rte_flow_ops
2528  */
2529 int
2530 mlx5_flow_isolate(struct rte_eth_dev *dev,
2531                   int enable,
2532                   struct rte_flow_error *error)
2533 {
2534         struct priv *priv = dev->data->dev_private;
2535
2536         priv_lock(priv);
2537         if (dev->data->dev_started) {
2538                 rte_flow_error_set(error, EBUSY,
2539                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2540                                    NULL,
2541                                    "port must be stopped first");
2542                 priv_unlock(priv);
2543                 return -rte_errno;
2544         }
2545         priv->isolated = !!enable;
2546         if (enable)
2547                 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2548         else
2549                 priv->dev->dev_ops = &mlx5_dev_ops;
2550         priv_unlock(priv);
2551         return 0;
2552 }
2553
2554 /**
2555  * Convert a flow director filter to a generic flow.
2556  *
2557  * @param priv
2558  *   Private structure.
2559  * @param fdir_filter
2560  *   Flow director filter to add.
2561  * @param attributes
2562  *   Generic flow parameters structure.
2563  *
2564  * @return
2565  *  0 on success, errno value on error.
2566  */
2567 static int
2568 priv_fdir_filter_convert(struct priv *priv,
2569                          const struct rte_eth_fdir_filter *fdir_filter,
2570                          struct mlx5_fdir *attributes)
2571 {
2572         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2573
2574         /* Validate queue number. */
2575         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2576                 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2577                 return EINVAL;
2578         }
2579         attributes->attr.ingress = 1;
2580         attributes->items[0] = (struct rte_flow_item) {
2581                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2582                 .spec = &attributes->l2,
2583                 .mask = &attributes->l2_mask,
2584         };
2585         switch (fdir_filter->action.behavior) {
2586         case RTE_ETH_FDIR_ACCEPT:
2587                 attributes->actions[0] = (struct rte_flow_action){
2588                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2589                         .conf = &attributes->queue,
2590                 };
2591                 break;
2592         case RTE_ETH_FDIR_REJECT:
2593                 attributes->actions[0] = (struct rte_flow_action){
2594                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2595                 };
2596                 break;
2597         default:
2598                 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2599                 return ENOTSUP;
2600         }
2601         attributes->queue.index = fdir_filter->action.rx_queue;
2602         switch (fdir_filter->input.flow_type) {
2603         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2604                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2605                         .src_addr = input->flow.udp4_flow.ip.src_ip,
2606                         .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2607                         .time_to_live = input->flow.udp4_flow.ip.ttl,
2608                         .type_of_service = input->flow.udp4_flow.ip.tos,
2609                         .next_proto_id = input->flow.udp4_flow.ip.proto,
2610                 };
2611                 attributes->l4.udp.hdr = (struct udp_hdr){
2612                         .src_port = input->flow.udp4_flow.src_port,
2613                         .dst_port = input->flow.udp4_flow.dst_port,
2614                 };
2615                 attributes->items[1] = (struct rte_flow_item){
2616                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2617                         .spec = &attributes->l3,
2618                 };
2619                 attributes->items[2] = (struct rte_flow_item){
2620                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2621                         .spec = &attributes->l4,
2622                 };
2623                 break;
2624         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2625                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2626                         .src_addr = input->flow.tcp4_flow.ip.src_ip,
2627                         .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2628                         .time_to_live = input->flow.tcp4_flow.ip.ttl,
2629                         .type_of_service = input->flow.tcp4_flow.ip.tos,
2630                         .next_proto_id = input->flow.tcp4_flow.ip.proto,
2631                 };
2632                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2633                         .src_port = input->flow.tcp4_flow.src_port,
2634                         .dst_port = input->flow.tcp4_flow.dst_port,
2635                 };
2636                 attributes->items[1] = (struct rte_flow_item){
2637                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2638                         .spec = &attributes->l3,
2639                 };
2640                 attributes->items[2] = (struct rte_flow_item){
2641                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2642                         .spec = &attributes->l4,
2643                 };
2644                 break;
2645         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2646                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2647                         .src_addr = input->flow.ip4_flow.src_ip,
2648                         .dst_addr = input->flow.ip4_flow.dst_ip,
2649                         .time_to_live = input->flow.ip4_flow.ttl,
2650                         .type_of_service = input->flow.ip4_flow.tos,
2651                         .next_proto_id = input->flow.ip4_flow.proto,
2652                 };
2653                 attributes->items[1] = (struct rte_flow_item){
2654                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2655                         .spec = &attributes->l3,
2656                 };
2657                 break;
2658         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2659                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2660                         .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2661                         .proto = input->flow.udp6_flow.ip.proto,
2662                 };
2663                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2664                        input->flow.udp6_flow.ip.src_ip,
2665                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2666                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2667                        input->flow.udp6_flow.ip.dst_ip,
2668                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2669                 attributes->l4.udp.hdr = (struct udp_hdr){
2670                         .src_port = input->flow.udp6_flow.src_port,
2671                         .dst_port = input->flow.udp6_flow.dst_port,
2672                 };
2673                 attributes->items[1] = (struct rte_flow_item){
2674                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2675                         .spec = &attributes->l3,
2676                 };
2677                 attributes->items[2] = (struct rte_flow_item){
2678                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2679                         .spec = &attributes->l4,
2680                 };
2681                 break;
2682         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2683                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2684                         .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2685                         .proto = input->flow.tcp6_flow.ip.proto,
2686                 };
2687                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2688                        input->flow.tcp6_flow.ip.src_ip,
2689                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2690                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2691                        input->flow.tcp6_flow.ip.dst_ip,
2692                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2693                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2694                         .src_port = input->flow.tcp6_flow.src_port,
2695                         .dst_port = input->flow.tcp6_flow.dst_port,
2696                 };
2697                 attributes->items[1] = (struct rte_flow_item){
2698                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2699                         .spec = &attributes->l3,
2700                 };
2701                 attributes->items[2] = (struct rte_flow_item){
2702                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2703                         .spec = &attributes->l4,
2704                 };
2705                 break;
2706         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2707                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2708                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2709                         .proto = input->flow.ipv6_flow.proto,
2710                 };
2711                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2712                        input->flow.ipv6_flow.src_ip,
2713                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2714                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2715                        input->flow.ipv6_flow.dst_ip,
2716                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2717                 attributes->items[1] = (struct rte_flow_item){
2718                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2719                         .spec = &attributes->l3,
2720                 };
2721                 break;
2722         default:
2723                 ERROR("invalid flow type%d",
2724                       fdir_filter->input.flow_type);
2725                 return ENOTSUP;
2726         }
2727         return 0;
2728 }
2729
2730 /**
2731  * Add new flow director filter and store it in list.
2732  *
2733  * @param priv
2734  *   Private structure.
2735  * @param fdir_filter
2736  *   Flow director filter to add.
2737  *
2738  * @return
2739  *   0 on success, errno value on failure.
2740  */
2741 static int
2742 priv_fdir_filter_add(struct priv *priv,
2743                      const struct rte_eth_fdir_filter *fdir_filter)
2744 {
2745         struct mlx5_fdir attributes = {
2746                 .attr.group = 0,
2747                 .l2_mask = {
2748                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2749                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2750                         .type = 0,
2751                 },
2752         };
2753         struct mlx5_flow_parse parser = {
2754                 .layer = HASH_RXQ_ETH,
2755         };
2756         struct rte_flow_error error;
2757         struct rte_flow *flow;
2758         int ret;
2759
2760         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2761         if (ret)
2762                 return -ret;
2763         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2764                                 attributes.actions, &error, &parser);
2765         if (ret)
2766                 return -ret;
2767         flow = priv_flow_create(priv,
2768                                 &priv->flows,
2769                                 &attributes.attr,
2770                                 attributes.items,
2771                                 attributes.actions,
2772                                 &error);
2773         if (flow) {
2774                 DEBUG("FDIR created %p", (void *)flow);
2775                 return 0;
2776         }
2777         return ENOTSUP;
2778 }
2779
2780 /**
2781  * Delete specific filter.
2782  *
2783  * @param priv
2784  *   Private structure.
2785  * @param fdir_filter
2786  *   Filter to be deleted.
2787  *
2788  * @return
2789  *   0 on success, errno value on failure.
2790  */
2791 static int
2792 priv_fdir_filter_delete(struct priv *priv,
2793                         const struct rte_eth_fdir_filter *fdir_filter)
2794 {
2795         struct mlx5_fdir attributes = {
2796                 .attr.group = 0,
2797         };
2798         struct mlx5_flow_parse parser = {
2799                 .create = 1,
2800                 .layer = HASH_RXQ_ETH,
2801         };
2802         struct rte_flow_error error;
2803         struct rte_flow *flow;
2804         unsigned int i;
2805         int ret;
2806
2807         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2808         if (ret)
2809                 return -ret;
2810         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2811                                 attributes.actions, &error, &parser);
2812         if (ret)
2813                 goto exit;
2814         /*
2815          * Special case for drop action which is only set in the
2816          * specifications when the flow is created.  In this situation the
2817          * drop specification is missing.
2818          */
2819         if (parser.drop) {
2820                 struct ibv_flow_spec_action_drop *drop;
2821
2822                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2823                                 parser.queue[HASH_RXQ_ETH].offset);
2824                 *drop = (struct ibv_flow_spec_action_drop){
2825                         .type = IBV_FLOW_SPEC_ACTION_DROP,
2826                         .size = sizeof(struct ibv_flow_spec_action_drop),
2827                 };
2828                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2829         }
2830         TAILQ_FOREACH(flow, &priv->flows, next) {
2831                 struct ibv_flow_attr *attr;
2832                 struct ibv_spec_header *attr_h;
2833                 void *spec;
2834                 struct ibv_flow_attr *flow_attr;
2835                 struct ibv_spec_header *flow_h;
2836                 void *flow_spec;
2837                 unsigned int specs_n;
2838
2839                 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2840                 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2841                 /* Compare first the attributes. */
2842                 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2843                         continue;
2844                 if (attr->num_of_specs == 0)
2845                         continue;
2846                 spec = (void *)((uintptr_t)attr +
2847                                 sizeof(struct ibv_flow_attr));
2848                 flow_spec = (void *)((uintptr_t)flow_attr +
2849                                      sizeof(struct ibv_flow_attr));
2850                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2851                 for (i = 0; i != specs_n; ++i) {
2852                         attr_h = spec;
2853                         flow_h = flow_spec;
2854                         if (memcmp(spec, flow_spec,
2855                                    RTE_MIN(attr_h->size, flow_h->size)))
2856                                 goto wrong_flow;
2857                         spec = (void *)((uintptr_t)spec + attr_h->size);
2858                         flow_spec = (void *)((uintptr_t)flow_spec +
2859                                              flow_h->size);
2860                 }
2861                 /* At this point, the flow match. */
2862                 break;
2863 wrong_flow:
2864                 /* The flow does not match. */
2865                 continue;
2866         }
2867         if (flow)
2868                 priv_flow_destroy(priv, &priv->flows, flow);
2869 exit:
2870         for (i = 0; i != hash_rxq_init_n; ++i) {
2871                 if (parser.queue[i].ibv_attr)
2872                         rte_free(parser.queue[i].ibv_attr);
2873         }
2874         return -ret;
2875 }
2876
2877 /**
2878  * Update queue for specific filter.
2879  *
2880  * @param priv
2881  *   Private structure.
2882  * @param fdir_filter
2883  *   Filter to be updated.
2884  *
2885  * @return
2886  *   0 on success, errno value on failure.
2887  */
2888 static int
2889 priv_fdir_filter_update(struct priv *priv,
2890                         const struct rte_eth_fdir_filter *fdir_filter)
2891 {
2892         int ret;
2893
2894         ret = priv_fdir_filter_delete(priv, fdir_filter);
2895         if (ret)
2896                 return ret;
2897         ret = priv_fdir_filter_add(priv, fdir_filter);
2898         return ret;
2899 }
2900
2901 /**
2902  * Flush all filters.
2903  *
2904  * @param priv
2905  *   Private structure.
2906  */
2907 static void
2908 priv_fdir_filter_flush(struct priv *priv)
2909 {
2910         priv_flow_flush(priv, &priv->flows);
2911 }
2912
2913 /**
2914  * Get flow director information.
2915  *
2916  * @param priv
2917  *   Private structure.
2918  * @param[out] fdir_info
2919  *   Resulting flow director information.
2920  */
2921 static void
2922 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2923 {
2924         struct rte_eth_fdir_masks *mask =
2925                 &priv->dev->data->dev_conf.fdir_conf.mask;
2926
2927         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2928         fdir_info->guarant_spc = 0;
2929         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2930         fdir_info->max_flexpayload = 0;
2931         fdir_info->flow_types_mask[0] = 0;
2932         fdir_info->flex_payload_unit = 0;
2933         fdir_info->max_flex_payload_segment_num = 0;
2934         fdir_info->flex_payload_limit = 0;
2935         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2936 }
2937
2938 /**
2939  * Deal with flow director operations.
2940  *
2941  * @param priv
2942  *   Pointer to private structure.
2943  * @param filter_op
2944  *   Operation to perform.
2945  * @param arg
2946  *   Pointer to operation-specific structure.
2947  *
2948  * @return
2949  *   0 on success, errno value on failure.
2950  */
2951 static int
2952 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2953 {
2954         enum rte_fdir_mode fdir_mode =
2955                 priv->dev->data->dev_conf.fdir_conf.mode;
2956         int ret = 0;
2957
2958         if (filter_op == RTE_ETH_FILTER_NOP)
2959                 return 0;
2960         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2961             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2962                 ERROR("%p: flow director mode %d not supported",
2963                       (void *)priv, fdir_mode);
2964                 return EINVAL;
2965         }
2966         switch (filter_op) {
2967         case RTE_ETH_FILTER_ADD:
2968                 ret = priv_fdir_filter_add(priv, arg);
2969                 break;
2970         case RTE_ETH_FILTER_UPDATE:
2971                 ret = priv_fdir_filter_update(priv, arg);
2972                 break;
2973         case RTE_ETH_FILTER_DELETE:
2974                 ret = priv_fdir_filter_delete(priv, arg);
2975                 break;
2976         case RTE_ETH_FILTER_FLUSH:
2977                 priv_fdir_filter_flush(priv);
2978                 break;
2979         case RTE_ETH_FILTER_INFO:
2980                 priv_fdir_info_get(priv, arg);
2981                 break;
2982         default:
2983                 DEBUG("%p: unknown operation %u", (void *)priv,
2984                       filter_op);
2985                 ret = EINVAL;
2986                 break;
2987         }
2988         return ret;
2989 }
2990
2991 /**
2992  * Manage filter operations.
2993  *
2994  * @param dev
2995  *   Pointer to Ethernet device structure.
2996  * @param filter_type
2997  *   Filter type.
2998  * @param filter_op
2999  *   Operation to perform.
3000  * @param arg
3001  *   Pointer to operation-specific structure.
3002  *
3003  * @return
3004  *   0 on success, negative errno value on failure.
3005  */
3006 int
3007 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3008                      enum rte_filter_type filter_type,
3009                      enum rte_filter_op filter_op,
3010                      void *arg)
3011 {
3012         int ret = EINVAL;
3013         struct priv *priv = dev->data->dev_private;
3014
3015         switch (filter_type) {
3016         case RTE_ETH_FILTER_GENERIC:
3017                 if (filter_op != RTE_ETH_FILTER_GET)
3018                         return -EINVAL;
3019                 *(const void **)arg = &mlx5_flow_ops;
3020                 return 0;
3021         case RTE_ETH_FILTER_FDIR:
3022                 priv_lock(priv);
3023                 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3024                 priv_unlock(priv);
3025                 break;
3026         default:
3027                 ERROR("%p: filter type (%d) not supported",
3028                       (void *)dev, filter_type);
3029                 break;
3030         }
3031         return -ret;
3032 }