net/mlx5: handle RSS hash configuration in RSS flow
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35 #include <string.h>
36
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46
47 #include <rte_ethdev.h>
48 #include <rte_flow.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
51
52 #include "mlx5.h"
53 #include "mlx5_prm.h"
54
55 /* Define minimal priority for control plane flows. */
56 #define MLX5_CTRL_FLOW_PRIORITY 4
57
58 /* Internet Protocol versions. */
59 #define MLX5_IPV4 4
60 #define MLX5_IPV6 6
61
62 static int
63 mlx5_flow_create_eth(const struct rte_flow_item *item,
64                      const void *default_mask,
65                      void *data);
66
67 static int
68 mlx5_flow_create_vlan(const struct rte_flow_item *item,
69                       const void *default_mask,
70                       void *data);
71
72 static int
73 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
74                       const void *default_mask,
75                       void *data);
76
77 static int
78 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
79                       const void *default_mask,
80                       void *data);
81
82 static int
83 mlx5_flow_create_udp(const struct rte_flow_item *item,
84                      const void *default_mask,
85                      void *data);
86
87 static int
88 mlx5_flow_create_tcp(const struct rte_flow_item *item,
89                      const void *default_mask,
90                      void *data);
91
92 static int
93 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
94                        const void *default_mask,
95                        void *data);
96
97 struct mlx5_flow_parse;
98
99 static void
100 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
101                       unsigned int size);
102
103 static int
104 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
105
106 /* Hash RX queue types. */
107 enum hash_rxq_type {
108         HASH_RXQ_TCPV4,
109         HASH_RXQ_UDPV4,
110         HASH_RXQ_IPV4,
111         HASH_RXQ_TCPV6,
112         HASH_RXQ_UDPV6,
113         HASH_RXQ_IPV6,
114         HASH_RXQ_ETH,
115 };
116
117 /* Initialization data for hash RX queue. */
118 struct hash_rxq_init {
119         uint64_t hash_fields; /* Fields that participate in the hash. */
120         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
121         unsigned int flow_priority; /* Flow priority to use. */
122         unsigned int ip_version; /* Internet protocol. */
123 };
124
125 /* Initialization data for hash RX queues. */
126 const struct hash_rxq_init hash_rxq_init[] = {
127         [HASH_RXQ_TCPV4] = {
128                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
129                                 IBV_RX_HASH_DST_IPV4 |
130                                 IBV_RX_HASH_SRC_PORT_TCP |
131                                 IBV_RX_HASH_DST_PORT_TCP),
132                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
133                 .flow_priority = 0,
134                 .ip_version = MLX5_IPV4,
135         },
136         [HASH_RXQ_UDPV4] = {
137                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
138                                 IBV_RX_HASH_DST_IPV4 |
139                                 IBV_RX_HASH_SRC_PORT_UDP |
140                                 IBV_RX_HASH_DST_PORT_UDP),
141                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
142                 .flow_priority = 0,
143                 .ip_version = MLX5_IPV4,
144         },
145         [HASH_RXQ_IPV4] = {
146                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
147                                 IBV_RX_HASH_DST_IPV4),
148                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
149                                 ETH_RSS_FRAG_IPV4),
150                 .flow_priority = 1,
151                 .ip_version = MLX5_IPV4,
152         },
153         [HASH_RXQ_TCPV6] = {
154                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
155                                 IBV_RX_HASH_DST_IPV6 |
156                                 IBV_RX_HASH_SRC_PORT_TCP |
157                                 IBV_RX_HASH_DST_PORT_TCP),
158                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
159                 .flow_priority = 0,
160                 .ip_version = MLX5_IPV6,
161         },
162         [HASH_RXQ_UDPV6] = {
163                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
164                                 IBV_RX_HASH_DST_IPV6 |
165                                 IBV_RX_HASH_SRC_PORT_UDP |
166                                 IBV_RX_HASH_DST_PORT_UDP),
167                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
168                 .flow_priority = 0,
169                 .ip_version = MLX5_IPV6,
170         },
171         [HASH_RXQ_IPV6] = {
172                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
173                                 IBV_RX_HASH_DST_IPV6),
174                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
175                                 ETH_RSS_FRAG_IPV6),
176                 .flow_priority = 1,
177                 .ip_version = MLX5_IPV6,
178         },
179         [HASH_RXQ_ETH] = {
180                 .hash_fields = 0,
181                 .dpdk_rss_hf = 0,
182                 .flow_priority = 2,
183         },
184 };
185
186 /* Number of entries in hash_rxq_init[]. */
187 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
188
189 /** Structure for Drop queue. */
190 struct mlx5_hrxq_drop {
191         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
192         struct ibv_qp *qp; /**< Verbs queue pair. */
193         struct ibv_wq *wq; /**< Verbs work queue. */
194         struct ibv_cq *cq; /**< Verbs completion queue. */
195 };
196
197 /* Flows structures. */
198 struct mlx5_flow {
199         uint64_t hash_fields; /**< Fields that participate in the hash. */
200         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
201         struct ibv_flow *ibv_flow; /**< Verbs flow. */
202         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
203 };
204
205 /* Drop flows structures. */
206 struct mlx5_flow_drop {
207         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
208         struct ibv_flow *ibv_flow; /**< Verbs flow. */
209 };
210
211 struct rte_flow {
212         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
213         uint32_t mark:1; /**< Set if the flow is marked. */
214         uint32_t drop:1; /**< Drop queue. */
215         uint16_t queues_n; /**< Number of entries in queue[]. */
216         uint16_t (*queues)[]; /**< Queues indexes to use. */
217         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
218         uint8_t rss_key[40]; /**< copy of the RSS key. */
219         union {
220                 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
221                 /**< Flow with Rx queue. */
222                 struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */
223         };
224 };
225
226 /** Static initializer for items. */
227 #define ITEMS(...) \
228         (const enum rte_flow_item_type []){ \
229                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
230         }
231
232 /** Structure to generate a simple graph of layers supported by the NIC. */
233 struct mlx5_flow_items {
234         /** List of possible actions for these items. */
235         const enum rte_flow_action_type *const actions;
236         /** Bit-masks corresponding to the possibilities for the item. */
237         const void *mask;
238         /**
239          * Default bit-masks to use when item->mask is not provided. When
240          * \default_mask is also NULL, the full supported bit-mask (\mask) is
241          * used instead.
242          */
243         const void *default_mask;
244         /** Bit-masks size in bytes. */
245         const unsigned int mask_sz;
246         /**
247          * Conversion function from rte_flow to NIC specific flow.
248          *
249          * @param item
250          *   rte_flow item to convert.
251          * @param default_mask
252          *   Default bit-masks to use when item->mask is not provided.
253          * @param data
254          *   Internal structure to store the conversion.
255          *
256          * @return
257          *   0 on success, negative value otherwise.
258          */
259         int (*convert)(const struct rte_flow_item *item,
260                        const void *default_mask,
261                        void *data);
262         /** Size in bytes of the destination structure. */
263         const unsigned int dst_sz;
264         /** List of possible following items.  */
265         const enum rte_flow_item_type *const items;
266 };
267
268 /** Valid action for this PMD. */
269 static const enum rte_flow_action_type valid_actions[] = {
270         RTE_FLOW_ACTION_TYPE_DROP,
271         RTE_FLOW_ACTION_TYPE_QUEUE,
272         RTE_FLOW_ACTION_TYPE_MARK,
273         RTE_FLOW_ACTION_TYPE_FLAG,
274         RTE_FLOW_ACTION_TYPE_END,
275 };
276
277 /** Graph of supported items and associated actions. */
278 static const struct mlx5_flow_items mlx5_flow_items[] = {
279         [RTE_FLOW_ITEM_TYPE_END] = {
280                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
281                                RTE_FLOW_ITEM_TYPE_VXLAN),
282         },
283         [RTE_FLOW_ITEM_TYPE_ETH] = {
284                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
285                                RTE_FLOW_ITEM_TYPE_IPV4,
286                                RTE_FLOW_ITEM_TYPE_IPV6),
287                 .actions = valid_actions,
288                 .mask = &(const struct rte_flow_item_eth){
289                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
290                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
291                         .type = -1,
292                 },
293                 .default_mask = &rte_flow_item_eth_mask,
294                 .mask_sz = sizeof(struct rte_flow_item_eth),
295                 .convert = mlx5_flow_create_eth,
296                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
297         },
298         [RTE_FLOW_ITEM_TYPE_VLAN] = {
299                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
300                                RTE_FLOW_ITEM_TYPE_IPV6),
301                 .actions = valid_actions,
302                 .mask = &(const struct rte_flow_item_vlan){
303                         .tci = -1,
304                 },
305                 .default_mask = &rte_flow_item_vlan_mask,
306                 .mask_sz = sizeof(struct rte_flow_item_vlan),
307                 .convert = mlx5_flow_create_vlan,
308                 .dst_sz = 0,
309         },
310         [RTE_FLOW_ITEM_TYPE_IPV4] = {
311                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
312                                RTE_FLOW_ITEM_TYPE_TCP),
313                 .actions = valid_actions,
314                 .mask = &(const struct rte_flow_item_ipv4){
315                         .hdr = {
316                                 .src_addr = -1,
317                                 .dst_addr = -1,
318                                 .type_of_service = -1,
319                                 .next_proto_id = -1,
320                         },
321                 },
322                 .default_mask = &rte_flow_item_ipv4_mask,
323                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
324                 .convert = mlx5_flow_create_ipv4,
325                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
326         },
327         [RTE_FLOW_ITEM_TYPE_IPV6] = {
328                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
329                                RTE_FLOW_ITEM_TYPE_TCP),
330                 .actions = valid_actions,
331                 .mask = &(const struct rte_flow_item_ipv6){
332                         .hdr = {
333                                 .src_addr = {
334                                         0xff, 0xff, 0xff, 0xff,
335                                         0xff, 0xff, 0xff, 0xff,
336                                         0xff, 0xff, 0xff, 0xff,
337                                         0xff, 0xff, 0xff, 0xff,
338                                 },
339                                 .dst_addr = {
340                                         0xff, 0xff, 0xff, 0xff,
341                                         0xff, 0xff, 0xff, 0xff,
342                                         0xff, 0xff, 0xff, 0xff,
343                                         0xff, 0xff, 0xff, 0xff,
344                                 },
345                                 .vtc_flow = -1,
346                                 .proto = -1,
347                                 .hop_limits = -1,
348                         },
349                 },
350                 .default_mask = &rte_flow_item_ipv6_mask,
351                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
352                 .convert = mlx5_flow_create_ipv6,
353                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
354         },
355         [RTE_FLOW_ITEM_TYPE_UDP] = {
356                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
357                 .actions = valid_actions,
358                 .mask = &(const struct rte_flow_item_udp){
359                         .hdr = {
360                                 .src_port = -1,
361                                 .dst_port = -1,
362                         },
363                 },
364                 .default_mask = &rte_flow_item_udp_mask,
365                 .mask_sz = sizeof(struct rte_flow_item_udp),
366                 .convert = mlx5_flow_create_udp,
367                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
368         },
369         [RTE_FLOW_ITEM_TYPE_TCP] = {
370                 .actions = valid_actions,
371                 .mask = &(const struct rte_flow_item_tcp){
372                         .hdr = {
373                                 .src_port = -1,
374                                 .dst_port = -1,
375                         },
376                 },
377                 .default_mask = &rte_flow_item_tcp_mask,
378                 .mask_sz = sizeof(struct rte_flow_item_tcp),
379                 .convert = mlx5_flow_create_tcp,
380                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
381         },
382         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
383                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
384                 .actions = valid_actions,
385                 .mask = &(const struct rte_flow_item_vxlan){
386                         .vni = "\xff\xff\xff",
387                 },
388                 .default_mask = &rte_flow_item_vxlan_mask,
389                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
390                 .convert = mlx5_flow_create_vxlan,
391                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
392         },
393 };
394
395 /** Structure to pass to the conversion function. */
396 struct mlx5_flow_parse {
397         uint32_t inner; /**< Set once VXLAN is encountered. */
398         uint32_t create:1;
399         /**< Whether resources should remain after a validate. */
400         uint32_t drop:1; /**< Target is a drop queue. */
401         uint32_t mark:1; /**< Mark is present in the flow. */
402         uint32_t mark_id; /**< Mark identifier. */
403         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
404         uint16_t queues_n; /**< Number of entries in queue[]. */
405         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
406         uint8_t rss_key[40]; /**< copy of the RSS key. */
407         enum hash_rxq_type layer; /**< Last pattern layer detected. */
408         union {
409                 struct {
410                         struct ibv_flow_attr *ibv_attr;
411                         /**< Pointer to Verbs attributes. */
412                         unsigned int offset;
413                         /**< Current position or total size of the attribute. */
414                 } queue[RTE_DIM(hash_rxq_init)];
415                 struct {
416                         struct ibv_flow_attr *ibv_attr;
417                         /**< Pointer to Verbs attributes. */
418                         unsigned int offset;
419                         /**< Current position or total size of the attribute. */
420                 } drop_q;
421         };
422 };
423
424 static const struct rte_flow_ops mlx5_flow_ops = {
425         .validate = mlx5_flow_validate,
426         .create = mlx5_flow_create,
427         .destroy = mlx5_flow_destroy,
428         .flush = mlx5_flow_flush,
429         .query = NULL,
430         .isolate = mlx5_flow_isolate,
431 };
432
433 /**
434  * Manage filter operations.
435  *
436  * @param dev
437  *   Pointer to Ethernet device structure.
438  * @param filter_type
439  *   Filter type.
440  * @param filter_op
441  *   Operation to perform.
442  * @param arg
443  *   Pointer to operation-specific structure.
444  *
445  * @return
446  *   0 on success, negative errno value on failure.
447  */
448 int
449 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
450                      enum rte_filter_type filter_type,
451                      enum rte_filter_op filter_op,
452                      void *arg)
453 {
454         int ret = EINVAL;
455
456         if (filter_type == RTE_ETH_FILTER_GENERIC) {
457                 if (filter_op != RTE_ETH_FILTER_GET)
458                         return -EINVAL;
459                 *(const void **)arg = &mlx5_flow_ops;
460                 return 0;
461         }
462         ERROR("%p: filter type (%d) not supported",
463               (void *)dev, filter_type);
464         return -ret;
465 }
466
467 /**
468  * Check support for a given item.
469  *
470  * @param item[in]
471  *   Item specification.
472  * @param mask[in]
473  *   Bit-masks covering supported fields to compare with spec, last and mask in
474  *   \item.
475  * @param size
476  *   Bit-Mask size in bytes.
477  *
478  * @return
479  *   0 on success.
480  */
481 static int
482 mlx5_flow_item_validate(const struct rte_flow_item *item,
483                         const uint8_t *mask, unsigned int size)
484 {
485         int ret = 0;
486
487         if (!item->spec && (item->mask || item->last))
488                 return -1;
489         if (item->spec && !item->mask) {
490                 unsigned int i;
491                 const uint8_t *spec = item->spec;
492
493                 for (i = 0; i < size; ++i)
494                         if ((spec[i] | mask[i]) != mask[i])
495                                 return -1;
496         }
497         if (item->last && !item->mask) {
498                 unsigned int i;
499                 const uint8_t *spec = item->last;
500
501                 for (i = 0; i < size; ++i)
502                         if ((spec[i] | mask[i]) != mask[i])
503                                 return -1;
504         }
505         if (item->mask) {
506                 unsigned int i;
507                 const uint8_t *spec = item->mask;
508
509                 for (i = 0; i < size; ++i)
510                         if ((spec[i] | mask[i]) != mask[i])
511                                 return -1;
512         }
513         if (item->spec && item->last) {
514                 uint8_t spec[size];
515                 uint8_t last[size];
516                 const uint8_t *apply = mask;
517                 unsigned int i;
518
519                 if (item->mask)
520                         apply = item->mask;
521                 for (i = 0; i < size; ++i) {
522                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
523                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
524                 }
525                 ret = memcmp(spec, last, size);
526         }
527         return ret;
528 }
529
530 /**
531  * Copy the RSS configuration from the user ones.
532  *
533  * @param priv
534  *   Pointer to private structure.
535  * @param parser
536  *   Internal parser structure.
537  * @param rss_conf
538  *   User RSS configuration to save.
539  *
540  * @return
541  *   0 on success, errno value on failure.
542  */
543 static int
544 priv_flow_convert_rss_conf(struct priv *priv,
545                            struct mlx5_flow_parse *parser,
546                            const struct rte_eth_rss_conf *rss_conf)
547 {
548         const struct rte_eth_rss_conf *rss =
549                 rss_conf ? rss_conf : &priv->rss_conf;
550
551         if (rss->rss_key_len > 40)
552                 return EINVAL;
553         parser->rss_conf.rss_key_len = rss->rss_key_len;
554         parser->rss_conf.rss_hf = rss->rss_hf;
555         memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
556         parser->rss_conf.rss_key = parser->rss_key;
557         return 0;
558 }
559
560 /**
561  * Extract attribute to the parser.
562  *
563  * @param priv
564  *   Pointer to private structure.
565  * @param[in] attr
566  *   Flow rule attributes.
567  * @param[out] error
568  *   Perform verbose error reporting if not NULL.
569  * @param[in, out] parser
570  *   Internal parser structure.
571  *
572  * @return
573  *   0 on success, a negative errno value otherwise and rte_errno is set.
574  */
575 static int
576 priv_flow_convert_attributes(struct priv *priv,
577                              const struct rte_flow_attr *attr,
578                              struct rte_flow_error *error,
579                              struct mlx5_flow_parse *parser)
580 {
581         (void)priv;
582         (void)parser;
583         if (attr->group) {
584                 rte_flow_error_set(error, ENOTSUP,
585                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
586                                    NULL,
587                                    "groups are not supported");
588                 return -rte_errno;
589         }
590         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
591                 rte_flow_error_set(error, ENOTSUP,
592                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
593                                    NULL,
594                                    "priorities are not supported");
595                 return -rte_errno;
596         }
597         if (attr->egress) {
598                 rte_flow_error_set(error, ENOTSUP,
599                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
600                                    NULL,
601                                    "egress is not supported");
602                 return -rte_errno;
603         }
604         if (!attr->ingress) {
605                 rte_flow_error_set(error, ENOTSUP,
606                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
607                                    NULL,
608                                    "only ingress is supported");
609                 return -rte_errno;
610         }
611         return 0;
612 }
613
614 /**
615  * Extract actions request to the parser.
616  *
617  * @param priv
618  *   Pointer to private structure.
619  * @param[in] actions
620  *   Associated actions (list terminated by the END action).
621  * @param[out] error
622  *   Perform verbose error reporting if not NULL.
623  * @param[in, out] parser
624  *   Internal parser structure.
625  *
626  * @return
627  *   0 on success, a negative errno value otherwise and rte_errno is set.
628  */
629 static int
630 priv_flow_convert_actions(struct priv *priv,
631                           const struct rte_flow_action actions[],
632                           struct rte_flow_error *error,
633                           struct mlx5_flow_parse *parser)
634 {
635         /*
636          * Add default RSS configuration necessary for Verbs to create QP even
637          * if no RSS is necessary.
638          */
639         priv_flow_convert_rss_conf(priv, parser,
640                                    (const struct rte_eth_rss_conf *)
641                                    &priv->rss_conf);
642         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
643                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
644                         continue;
645                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
646                         parser->drop = 1;
647                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
648                         const struct rte_flow_action_queue *queue =
649                                 (const struct rte_flow_action_queue *)
650                                 actions->conf;
651                         uint16_t n;
652                         uint16_t found = 0;
653
654                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
655                                 goto exit_action_not_supported;
656                         for (n = 0; n < parser->queues_n; ++n) {
657                                 if (parser->queues[n] == queue->index) {
658                                         found = 1;
659                                         break;
660                                 }
661                         }
662                         if (parser->queues_n > 1 && !found) {
663                                 rte_flow_error_set(error, ENOTSUP,
664                                            RTE_FLOW_ERROR_TYPE_ACTION,
665                                            actions,
666                                            "queue action not in RSS queues");
667                                 return -rte_errno;
668                         }
669                         if (!found) {
670                                 parser->queues_n = 1;
671                                 parser->queues[0] = queue->index;
672                         }
673                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
674                         const struct rte_flow_action_rss *rss =
675                                 (const struct rte_flow_action_rss *)
676                                 actions->conf;
677                         uint16_t n;
678
679                         if (!rss || !rss->num) {
680                                 rte_flow_error_set(error, EINVAL,
681                                                    RTE_FLOW_ERROR_TYPE_ACTION,
682                                                    actions,
683                                                    "no valid queues");
684                                 return -rte_errno;
685                         }
686                         if (parser->queues_n == 1) {
687                                 uint16_t found = 0;
688
689                                 assert(parser->queues_n);
690                                 for (n = 0; n < rss->num; ++n) {
691                                         if (parser->queues[0] ==
692                                             rss->queue[n]) {
693                                                 found = 1;
694                                                 break;
695                                         }
696                                 }
697                                 if (!found) {
698                                         rte_flow_error_set(error, ENOTSUP,
699                                                    RTE_FLOW_ERROR_TYPE_ACTION,
700                                                    actions,
701                                                    "queue action not in RSS"
702                                                    " queues");
703                                         return -rte_errno;
704                                 }
705                         }
706                         for (n = 0; n < rss->num; ++n) {
707                                 if (rss->queue[n] >= priv->rxqs_n) {
708                                         rte_flow_error_set(error, EINVAL,
709                                                    RTE_FLOW_ERROR_TYPE_ACTION,
710                                                    actions,
711                                                    "queue id > number of"
712                                                    " queues");
713                                         return -rte_errno;
714                                 }
715                         }
716                         for (n = 0; n < rss->num; ++n)
717                                 parser->queues[n] = rss->queue[n];
718                         parser->queues_n = rss->num;
719                         if (priv_flow_convert_rss_conf(priv, parser,
720                                                        rss->rss_conf)) {
721                                 rte_flow_error_set(error, EINVAL,
722                                                    RTE_FLOW_ERROR_TYPE_ACTION,
723                                                    actions,
724                                                    "wrong RSS configuration");
725                                 return -rte_errno;
726                         }
727                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
728                         const struct rte_flow_action_mark *mark =
729                                 (const struct rte_flow_action_mark *)
730                                 actions->conf;
731
732                         if (!mark) {
733                                 rte_flow_error_set(error, EINVAL,
734                                                    RTE_FLOW_ERROR_TYPE_ACTION,
735                                                    actions,
736                                                    "mark must be defined");
737                                 return -rte_errno;
738                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
739                                 rte_flow_error_set(error, ENOTSUP,
740                                                    RTE_FLOW_ERROR_TYPE_ACTION,
741                                                    actions,
742                                                    "mark must be between 0"
743                                                    " and 16777199");
744                                 return -rte_errno;
745                         }
746                         parser->mark = 1;
747                         parser->mark_id = mark->id;
748                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
749                         parser->mark = 1;
750                 } else {
751                         goto exit_action_not_supported;
752                 }
753         }
754         if (!parser->queues_n && !parser->drop) {
755                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
756                                    NULL, "no valid action");
757                 return -rte_errno;
758         }
759         return 0;
760 exit_action_not_supported:
761         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
762                            actions, "action not supported");
763         return -rte_errno;
764 }
765
766 /**
767  * Validate items.
768  *
769  * @param priv
770  *   Pointer to private structure.
771  * @param[in] items
772  *   Pattern specification (list terminated by the END pattern item).
773  * @param[out] error
774  *   Perform verbose error reporting if not NULL.
775  * @param[in, out] parser
776  *   Internal parser structure.
777  *
778  * @return
779  *   0 on success, a negative errno value otherwise and rte_errno is set.
780  */
781 static int
782 priv_flow_convert_items_validate(struct priv *priv,
783                                  const struct rte_flow_item items[],
784                                  struct rte_flow_error *error,
785                                  struct mlx5_flow_parse *parser)
786 {
787         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
788         unsigned int i;
789
790         (void)priv;
791         /* Initialise the offsets to start after verbs attribute. */
792         if (parser->drop) {
793                 parser->drop_q.offset = sizeof(struct ibv_flow_attr);
794         } else {
795                 for (i = 0; i != hash_rxq_init_n; ++i)
796                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
797         }
798         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
799                 const struct mlx5_flow_items *token = NULL;
800                 unsigned int n;
801                 int err;
802
803                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
804                         continue;
805                 for (i = 0;
806                      cur_item->items &&
807                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
808                      ++i) {
809                         if (cur_item->items[i] == items->type) {
810                                 token = &mlx5_flow_items[items->type];
811                                 break;
812                         }
813                 }
814                 if (!token)
815                         goto exit_item_not_supported;
816                 cur_item = token;
817                 err = mlx5_flow_item_validate(items,
818                                               (const uint8_t *)cur_item->mask,
819                                               cur_item->mask_sz);
820                 if (err)
821                         goto exit_item_not_supported;
822                 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
823                         if (parser->inner) {
824                                 rte_flow_error_set(error, ENOTSUP,
825                                                    RTE_FLOW_ERROR_TYPE_ITEM,
826                                                    items,
827                                                    "cannot recognize multiple"
828                                                    " VXLAN encapsulations");
829                                 return -rte_errno;
830                         }
831                         parser->inner = 1;
832                 }
833                 if (parser->drop) {
834                         parser->drop_q.offset += cur_item->dst_sz;
835                 } else if (parser->queues_n == 1) {
836                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
837                 } else {
838                         for (n = 0; n != hash_rxq_init_n; ++n)
839                                 parser->queue[n].offset += cur_item->dst_sz;
840                 }
841         }
842         if (parser->mark) {
843                 for (i = 0; i != hash_rxq_init_n; ++i)
844                         parser->queue[i].offset +=
845                                 sizeof(struct ibv_flow_spec_action_tag);
846         }
847         return 0;
848 exit_item_not_supported:
849         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
850                            items, "item not supported");
851         return -rte_errno;
852 }
853
854 /**
855  * Allocate memory space to store verbs flow attributes.
856  *
857  * @param priv
858  *   Pointer to private structure.
859  * @param[in] priority
860  *   Flow priority.
861  * @param[in] size
862  *   Amount of byte to allocate.
863  * @param[out] error
864  *   Perform verbose error reporting if not NULL.
865  *
866  * @return
867  *   A verbs flow attribute on success, NULL otherwise.
868  */
869 static struct ibv_flow_attr*
870 priv_flow_convert_allocate(struct priv *priv,
871                            unsigned int priority,
872                            unsigned int size,
873                            struct rte_flow_error *error)
874 {
875         struct ibv_flow_attr *ibv_attr;
876
877         (void)priv;
878         ibv_attr = rte_calloc(__func__, 1, size, 0);
879         if (!ibv_attr) {
880                 rte_flow_error_set(error, ENOMEM,
881                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
882                                    NULL,
883                                    "cannot allocate verbs spec attributes.");
884                 return NULL;
885         }
886         ibv_attr->priority = priority;
887         return ibv_attr;
888 }
889
890 /**
891  * Finalise verbs flow attributes.
892  *
893  * @param priv
894  *   Pointer to private structure.
895  * @param[in, out] parser
896  *   Internal parser structure.
897  */
898 static void
899 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
900 {
901         const unsigned int ipv4 =
902                 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
903         const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
904         const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
905         const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
906         const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
907         const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
908         unsigned int i;
909
910         (void)priv;
911         if (parser->layer == HASH_RXQ_ETH) {
912                 goto fill;
913         } else {
914                 /*
915                  * This layer becomes useless as the pattern define under
916                  * layers.
917                  */
918                 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
919                 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
920         }
921         /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
922         for (i = ohmin; i != (ohmax + 1); ++i) {
923                 if (!parser->queue[i].ibv_attr)
924                         continue;
925                 rte_free(parser->queue[i].ibv_attr);
926                 parser->queue[i].ibv_attr = NULL;
927         }
928         /* Remove impossible flow according to the RSS configuration. */
929         if (hash_rxq_init[parser->layer].dpdk_rss_hf &
930             parser->rss_conf.rss_hf) {
931                 /* Remove any other flow. */
932                 for (i = hmin; i != (hmax + 1); ++i) {
933                         if ((i == parser->layer) ||
934                              (!parser->queue[i].ibv_attr))
935                                 continue;
936                         rte_free(parser->queue[i].ibv_attr);
937                         parser->queue[i].ibv_attr = NULL;
938                 }
939         } else  if (!parser->queue[ip].ibv_attr) {
940                 /* no RSS possible with the current configuration. */
941                 parser->queues_n = 1;
942                 return;
943         }
944 fill:
945         /*
946          * Fill missing layers in verbs specifications, or compute the correct
947          * offset to allocate the memory space for the attributes and
948          * specifications.
949          */
950         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
951                 union {
952                         struct ibv_flow_spec_ipv4_ext ipv4;
953                         struct ibv_flow_spec_ipv6 ipv6;
954                         struct ibv_flow_spec_tcp_udp udp_tcp;
955                 } specs;
956                 void *dst;
957                 uint16_t size;
958
959                 if (i == parser->layer)
960                         continue;
961                 if (parser->layer == HASH_RXQ_ETH) {
962                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
963                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
964                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
965                                         .type = IBV_FLOW_SPEC_IPV4_EXT |
966                                                 parser->inner,
967                                         .size = size,
968                                 };
969                         } else {
970                                 size = sizeof(struct ibv_flow_spec_ipv6);
971                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
972                                         .type = IBV_FLOW_SPEC_IPV6 |
973                                                 parser->inner,
974                                         .size = size,
975                                 };
976                         }
977                         if (parser->queue[i].ibv_attr) {
978                                 dst = (void *)((uintptr_t)
979                                                parser->queue[i].ibv_attr +
980                                                parser->queue[i].offset);
981                                 memcpy(dst, &specs, size);
982                                 ++parser->queue[i].ibv_attr->num_of_specs;
983                         }
984                         parser->queue[i].offset += size;
985                 }
986                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
987                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
988                         size = sizeof(struct ibv_flow_spec_tcp_udp);
989                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
990                                 .type = ((i == HASH_RXQ_UDPV4 ||
991                                           i == HASH_RXQ_UDPV6) ?
992                                          IBV_FLOW_SPEC_UDP :
993                                          IBV_FLOW_SPEC_TCP) |
994                                         parser->inner,
995                                 .size = size,
996                         };
997                         if (parser->queue[i].ibv_attr) {
998                                 dst = (void *)((uintptr_t)
999                                                parser->queue[i].ibv_attr +
1000                                                parser->queue[i].offset);
1001                                 memcpy(dst, &specs, size);
1002                                 ++parser->queue[i].ibv_attr->num_of_specs;
1003                         }
1004                         parser->queue[i].offset += size;
1005                 }
1006         }
1007 }
1008
1009 /**
1010  * Validate and convert a flow supported by the NIC.
1011  *
1012  * @param priv
1013  *   Pointer to private structure.
1014  * @param[in] attr
1015  *   Flow rule attributes.
1016  * @param[in] pattern
1017  *   Pattern specification (list terminated by the END pattern item).
1018  * @param[in] actions
1019  *   Associated actions (list terminated by the END action).
1020  * @param[out] error
1021  *   Perform verbose error reporting if not NULL.
1022  * @param[in, out] parser
1023  *   Internal parser structure.
1024  *
1025  * @return
1026  *   0 on success, a negative errno value otherwise and rte_errno is set.
1027  */
1028 static int
1029 priv_flow_convert(struct priv *priv,
1030                   const struct rte_flow_attr *attr,
1031                   const struct rte_flow_item items[],
1032                   const struct rte_flow_action actions[],
1033                   struct rte_flow_error *error,
1034                   struct mlx5_flow_parse *parser)
1035 {
1036         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1037         unsigned int i;
1038         int ret;
1039
1040         /* First step. Validate the attributes, items and actions. */
1041         *parser = (struct mlx5_flow_parse){
1042                 .create = parser->create,
1043                 .layer = HASH_RXQ_ETH,
1044                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1045         };
1046         ret = priv_flow_convert_attributes(priv, attr, error, parser);
1047         if (ret)
1048                 return ret;
1049         ret = priv_flow_convert_actions(priv, actions, error, parser);
1050         if (ret)
1051                 return ret;
1052         ret = priv_flow_convert_items_validate(priv, items, error, parser);
1053         if (ret)
1054                 return ret;
1055         priv_flow_convert_finalise(priv, parser);
1056         /*
1057          * Second step.
1058          * Allocate the memory space to store verbs specifications.
1059          */
1060         if (parser->drop) {
1061                 parser->drop_q.ibv_attr =
1062                         priv_flow_convert_allocate(priv, attr->priority,
1063                                                    parser->drop_q.offset,
1064                                                    error);
1065                 if (!parser->drop_q.ibv_attr)
1066                         return ENOMEM;
1067                 parser->drop_q.offset = sizeof(struct ibv_flow_attr);
1068         } else if (parser->queues_n == 1) {
1069                 unsigned int priority =
1070                         attr->priority +
1071                         hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1072                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1073
1074                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1075                         priv_flow_convert_allocate(priv, priority,
1076                                                    offset, error);
1077                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1078                         return ENOMEM;
1079                 parser->queue[HASH_RXQ_ETH].offset =
1080                         sizeof(struct ibv_flow_attr);
1081         } else {
1082                 for (i = 0; i != hash_rxq_init_n; ++i) {
1083                         unsigned int priority =
1084                                 attr->priority +
1085                                 hash_rxq_init[i].flow_priority;
1086                         unsigned int offset;
1087
1088                         if (!(parser->rss_conf.rss_hf &
1089                               hash_rxq_init[i].dpdk_rss_hf) &&
1090                             (i != HASH_RXQ_ETH))
1091                                 continue;
1092                         offset = parser->queue[i].offset;
1093                         parser->queue[i].ibv_attr =
1094                                 priv_flow_convert_allocate(priv, priority,
1095                                                            offset, error);
1096                         if (!parser->queue[i].ibv_attr)
1097                                 goto exit_enomem;
1098                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1099                 }
1100         }
1101         /* Third step. Conversion parse, fill the specifications. */
1102         parser->inner = 0;
1103         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1104                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1105                         continue;
1106                 cur_item = &mlx5_flow_items[items->type];
1107                 ret = cur_item->convert(items,
1108                                         (cur_item->default_mask ?
1109                                          cur_item->default_mask :
1110                                          cur_item->mask),
1111                                         parser);
1112                 if (ret) {
1113                         rte_flow_error_set(error, ENOTSUP,
1114                                            RTE_FLOW_ERROR_TYPE_ITEM,
1115                                            items, "item not supported");
1116                         goto exit_free;
1117                 }
1118         }
1119         if (parser->mark)
1120                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1121         /*
1122          * Last step. Complete missing specification to reach the RSS
1123          * configuration.
1124          */
1125         if (parser->queues_n > 1)
1126                 priv_flow_convert_finalise(priv, parser);
1127 exit_free:
1128         /* Only verification is expected, all resources should be released. */
1129         if (!parser->create) {
1130                 if (parser->drop) {
1131                         rte_free(parser->drop_q.ibv_attr);
1132                         parser->drop_q.ibv_attr = NULL;
1133                 }
1134                 for (i = 0; i != hash_rxq_init_n; ++i) {
1135                         if (parser->queue[i].ibv_attr) {
1136                                 rte_free(parser->queue[i].ibv_attr);
1137                                 parser->queue[i].ibv_attr = NULL;
1138                         }
1139                 }
1140         }
1141         return ret;
1142 exit_enomem:
1143         for (i = 0; i != hash_rxq_init_n; ++i) {
1144                 if (parser->queue[i].ibv_attr) {
1145                         rte_free(parser->queue[i].ibv_attr);
1146                         parser->queue[i].ibv_attr = NULL;
1147                 }
1148         }
1149         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1150                            NULL, "cannot allocate verbs spec attributes.");
1151         return ret;
1152 }
1153
1154 /**
1155  * Copy the specification created into the flow.
1156  *
1157  * @param parser
1158  *   Internal parser structure.
1159  * @param src
1160  *   Create specification.
1161  * @param size
1162  *   Size in bytes of the specification to copy.
1163  */
1164 static void
1165 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1166                       unsigned int size)
1167 {
1168         unsigned int i;
1169         void *dst;
1170
1171         if (parser->drop) {
1172                 dst = (void *)((uintptr_t)parser->drop_q.ibv_attr +
1173                                 parser->drop_q.offset);
1174                 memcpy(dst, src, size);
1175                 ++parser->drop_q.ibv_attr->num_of_specs;
1176                 parser->drop_q.offset += size;
1177                 return;
1178         }
1179         for (i = 0; i != hash_rxq_init_n; ++i) {
1180                 if (!parser->queue[i].ibv_attr)
1181                         continue;
1182                 /* Specification must be the same l3 type or none. */
1183                 if (parser->layer == HASH_RXQ_ETH ||
1184                     (hash_rxq_init[parser->layer].ip_version ==
1185                      hash_rxq_init[i].ip_version) ||
1186                     (hash_rxq_init[i].ip_version == 0)) {
1187                         dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1188                                         parser->queue[i].offset);
1189                         memcpy(dst, src, size);
1190                         ++parser->queue[i].ibv_attr->num_of_specs;
1191                         parser->queue[i].offset += size;
1192                 }
1193         }
1194 }
1195
1196 /**
1197  * Convert Ethernet item to Verbs specification.
1198  *
1199  * @param item[in]
1200  *   Item specification.
1201  * @param default_mask[in]
1202  *   Default bit-masks to use when item->mask is not provided.
1203  * @param data[in, out]
1204  *   User structure.
1205  */
1206 static int
1207 mlx5_flow_create_eth(const struct rte_flow_item *item,
1208                      const void *default_mask,
1209                      void *data)
1210 {
1211         const struct rte_flow_item_eth *spec = item->spec;
1212         const struct rte_flow_item_eth *mask = item->mask;
1213         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1214         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1215         struct ibv_flow_spec_eth eth = {
1216                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1217                 .size = eth_size,
1218         };
1219
1220         parser->layer = HASH_RXQ_ETH;
1221         if (spec) {
1222                 unsigned int i;
1223
1224                 if (!mask)
1225                         mask = default_mask;
1226                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1227                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1228                 eth.val.ether_type = spec->type;
1229                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1230                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1231                 eth.mask.ether_type = mask->type;
1232                 /* Remove unwanted bits from values. */
1233                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1234                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1235                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1236                 }
1237                 eth.val.ether_type &= eth.mask.ether_type;
1238         }
1239         mlx5_flow_create_copy(parser, &eth, eth_size);
1240         return 0;
1241 }
1242
1243 /**
1244  * Convert VLAN item to Verbs specification.
1245  *
1246  * @param item[in]
1247  *   Item specification.
1248  * @param default_mask[in]
1249  *   Default bit-masks to use when item->mask is not provided.
1250  * @param data[in, out]
1251  *   User structure.
1252  */
1253 static int
1254 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1255                       const void *default_mask,
1256                       void *data)
1257 {
1258         const struct rte_flow_item_vlan *spec = item->spec;
1259         const struct rte_flow_item_vlan *mask = item->mask;
1260         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1261         struct ibv_flow_spec_eth *eth;
1262         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1263
1264         if (spec) {
1265                 unsigned int i;
1266                 if (!mask)
1267                         mask = default_mask;
1268
1269                 if (parser->drop) {
1270                         eth = (void *)((uintptr_t)parser->drop_q.ibv_attr +
1271                                        parser->drop_q.offset - eth_size);
1272                         eth->val.vlan_tag = spec->tci;
1273                         eth->mask.vlan_tag = mask->tci;
1274                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1275                         return 0;
1276                 }
1277                 for (i = 0; i != hash_rxq_init_n; ++i) {
1278                         if (!parser->queue[i].ibv_attr)
1279                                 continue;
1280
1281                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1282                                        parser->queue[i].offset - eth_size);
1283                         eth->val.vlan_tag = spec->tci;
1284                         eth->mask.vlan_tag = mask->tci;
1285                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1286                 }
1287         }
1288         return 0;
1289 }
1290
1291 /**
1292  * Convert IPv4 item to Verbs specification.
1293  *
1294  * @param item[in]
1295  *   Item specification.
1296  * @param default_mask[in]
1297  *   Default bit-masks to use when item->mask is not provided.
1298  * @param data[in, out]
1299  *   User structure.
1300  */
1301 static int
1302 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1303                       const void *default_mask,
1304                       void *data)
1305 {
1306         const struct rte_flow_item_ipv4 *spec = item->spec;
1307         const struct rte_flow_item_ipv4 *mask = item->mask;
1308         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1309         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1310         struct ibv_flow_spec_ipv4_ext ipv4 = {
1311                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1312                 .size = ipv4_size,
1313         };
1314
1315         parser->layer = HASH_RXQ_IPV4;
1316         if (spec) {
1317                 if (!mask)
1318                         mask = default_mask;
1319                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1320                         .src_ip = spec->hdr.src_addr,
1321                         .dst_ip = spec->hdr.dst_addr,
1322                         .proto = spec->hdr.next_proto_id,
1323                         .tos = spec->hdr.type_of_service,
1324                 };
1325                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1326                         .src_ip = mask->hdr.src_addr,
1327                         .dst_ip = mask->hdr.dst_addr,
1328                         .proto = mask->hdr.next_proto_id,
1329                         .tos = mask->hdr.type_of_service,
1330                 };
1331                 /* Remove unwanted bits from values. */
1332                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1333                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1334                 ipv4.val.proto &= ipv4.mask.proto;
1335                 ipv4.val.tos &= ipv4.mask.tos;
1336         }
1337         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1338         return 0;
1339 }
1340
1341 /**
1342  * Convert IPv6 item to Verbs specification.
1343  *
1344  * @param item[in]
1345  *   Item specification.
1346  * @param default_mask[in]
1347  *   Default bit-masks to use when item->mask is not provided.
1348  * @param data[in, out]
1349  *   User structure.
1350  */
1351 static int
1352 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1353                       const void *default_mask,
1354                       void *data)
1355 {
1356         const struct rte_flow_item_ipv6 *spec = item->spec;
1357         const struct rte_flow_item_ipv6 *mask = item->mask;
1358         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1359         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1360         struct ibv_flow_spec_ipv6 ipv6 = {
1361                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1362                 .size = ipv6_size,
1363         };
1364
1365         parser->layer = HASH_RXQ_IPV6;
1366         if (spec) {
1367                 unsigned int i;
1368
1369                 if (!mask)
1370                         mask = default_mask;
1371                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1372                        RTE_DIM(ipv6.val.src_ip));
1373                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1374                        RTE_DIM(ipv6.val.dst_ip));
1375                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1376                        RTE_DIM(ipv6.mask.src_ip));
1377                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1378                        RTE_DIM(ipv6.mask.dst_ip));
1379                 ipv6.mask.flow_label = mask->hdr.vtc_flow;
1380                 ipv6.mask.next_hdr = mask->hdr.proto;
1381                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1382                 /* Remove unwanted bits from values. */
1383                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1384                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1385                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1386                 }
1387                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1388                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1389                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1390         }
1391         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1392         return 0;
1393 }
1394
1395 /**
1396  * Convert UDP item to Verbs specification.
1397  *
1398  * @param item[in]
1399  *   Item specification.
1400  * @param default_mask[in]
1401  *   Default bit-masks to use when item->mask is not provided.
1402  * @param data[in, out]
1403  *   User structure.
1404  */
1405 static int
1406 mlx5_flow_create_udp(const struct rte_flow_item *item,
1407                      const void *default_mask,
1408                      void *data)
1409 {
1410         const struct rte_flow_item_udp *spec = item->spec;
1411         const struct rte_flow_item_udp *mask = item->mask;
1412         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1413         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1414         struct ibv_flow_spec_tcp_udp udp = {
1415                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1416                 .size = udp_size,
1417         };
1418
1419         if (parser->layer == HASH_RXQ_IPV4)
1420                 parser->layer = HASH_RXQ_UDPV4;
1421         else
1422                 parser->layer = HASH_RXQ_UDPV6;
1423         if (spec) {
1424                 if (!mask)
1425                         mask = default_mask;
1426                 udp.val.dst_port = spec->hdr.dst_port;
1427                 udp.val.src_port = spec->hdr.src_port;
1428                 udp.mask.dst_port = mask->hdr.dst_port;
1429                 udp.mask.src_port = mask->hdr.src_port;
1430                 /* Remove unwanted bits from values. */
1431                 udp.val.src_port &= udp.mask.src_port;
1432                 udp.val.dst_port &= udp.mask.dst_port;
1433         }
1434         mlx5_flow_create_copy(parser, &udp, udp_size);
1435         return 0;
1436 }
1437
1438 /**
1439  * Convert TCP item to Verbs specification.
1440  *
1441  * @param item[in]
1442  *   Item specification.
1443  * @param default_mask[in]
1444  *   Default bit-masks to use when item->mask is not provided.
1445  * @param data[in, out]
1446  *   User structure.
1447  */
1448 static int
1449 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1450                      const void *default_mask,
1451                      void *data)
1452 {
1453         const struct rte_flow_item_tcp *spec = item->spec;
1454         const struct rte_flow_item_tcp *mask = item->mask;
1455         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1456         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1457         struct ibv_flow_spec_tcp_udp tcp = {
1458                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1459                 .size = tcp_size,
1460         };
1461
1462         if (parser->layer == HASH_RXQ_IPV4)
1463                 parser->layer = HASH_RXQ_TCPV4;
1464         else
1465                 parser->layer = HASH_RXQ_TCPV6;
1466         if (spec) {
1467                 if (!mask)
1468                         mask = default_mask;
1469                 tcp.val.dst_port = spec->hdr.dst_port;
1470                 tcp.val.src_port = spec->hdr.src_port;
1471                 tcp.mask.dst_port = mask->hdr.dst_port;
1472                 tcp.mask.src_port = mask->hdr.src_port;
1473                 /* Remove unwanted bits from values. */
1474                 tcp.val.src_port &= tcp.mask.src_port;
1475                 tcp.val.dst_port &= tcp.mask.dst_port;
1476         }
1477         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1478         return 0;
1479 }
1480
1481 /**
1482  * Convert VXLAN item to Verbs specification.
1483  *
1484  * @param item[in]
1485  *   Item specification.
1486  * @param default_mask[in]
1487  *   Default bit-masks to use when item->mask is not provided.
1488  * @param data[in, out]
1489  *   User structure.
1490  */
1491 static int
1492 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1493                        const void *default_mask,
1494                        void *data)
1495 {
1496         const struct rte_flow_item_vxlan *spec = item->spec;
1497         const struct rte_flow_item_vxlan *mask = item->mask;
1498         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1499         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1500         struct ibv_flow_spec_tunnel vxlan = {
1501                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1502                 .size = size,
1503         };
1504         union vni {
1505                 uint32_t vlan_id;
1506                 uint8_t vni[4];
1507         } id;
1508
1509         id.vni[0] = 0;
1510         parser->inner = IBV_FLOW_SPEC_INNER;
1511         if (spec) {
1512                 if (!mask)
1513                         mask = default_mask;
1514                 memcpy(&id.vni[1], spec->vni, 3);
1515                 vxlan.val.tunnel_id = id.vlan_id;
1516                 memcpy(&id.vni[1], mask->vni, 3);
1517                 vxlan.mask.tunnel_id = id.vlan_id;
1518                 /* Remove unwanted bits from values. */
1519                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1520         }
1521         mlx5_flow_create_copy(parser, &vxlan, size);
1522         return 0;
1523 }
1524
1525 /**
1526  * Convert mark/flag action to Verbs specification.
1527  *
1528  * @param parser
1529  *   Internal parser structure.
1530  * @param mark_id
1531  *   Mark identifier.
1532  */
1533 static int
1534 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1535 {
1536         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1537         struct ibv_flow_spec_action_tag tag = {
1538                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1539                 .size = size,
1540                 .tag_id = mlx5_flow_mark_set(mark_id),
1541         };
1542
1543         assert(parser->mark);
1544         mlx5_flow_create_copy(parser, &tag, size);
1545         return 0;
1546 }
1547
1548 /**
1549  * Complete flow rule creation with a drop queue.
1550  *
1551  * @param priv
1552  *   Pointer to private structure.
1553  * @param parser
1554  *   Internal parser structure.
1555  * @param flow
1556  *   Pointer to the rte_flow.
1557  * @param[out] error
1558  *   Perform verbose error reporting if not NULL.
1559  *
1560  * @return
1561  *   0 on success, errno value on failure.
1562  */
1563 static int
1564 priv_flow_create_action_queue_drop(struct priv *priv,
1565                                    struct mlx5_flow_parse *parser,
1566                                    struct rte_flow *flow,
1567                                    struct rte_flow_error *error)
1568 {
1569         struct ibv_flow_spec_action_drop *drop;
1570         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1571         int err = 0;
1572
1573         assert(priv->pd);
1574         assert(priv->ctx);
1575         flow->drop = 1;
1576         drop = (void *)((uintptr_t)parser->drop_q.ibv_attr +
1577                         parser->drop_q.offset);
1578         *drop = (struct ibv_flow_spec_action_drop){
1579                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1580                         .size = size,
1581         };
1582         ++parser->drop_q.ibv_attr->num_of_specs;
1583         parser->drop_q.offset += size;
1584         if (!priv->dev->data->dev_started)
1585                 return 0;
1586         flow->drxq.ibv_attr = parser->drop_q.ibv_attr;
1587         parser->drop_q.ibv_attr = NULL;
1588         flow->drxq.ibv_flow = ibv_create_flow(priv->flow_drop_queue->qp,
1589                                               flow->drxq.ibv_attr);
1590         if (!flow->drxq.ibv_flow) {
1591                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1592                                    NULL, "flow rule creation failure");
1593                 err = ENOMEM;
1594                 goto error;
1595         }
1596         return 0;
1597 error:
1598         assert(flow);
1599         if (flow->drxq.ibv_flow) {
1600                 claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
1601                 flow->drxq.ibv_flow = NULL;
1602         }
1603         if (flow->drxq.ibv_attr) {
1604                 rte_free(flow->drxq.ibv_attr);
1605                 flow->drxq.ibv_attr = NULL;
1606         }
1607         return err;
1608 }
1609
1610 /**
1611  * Create hash Rx queues when RSS is enabled.
1612  *
1613  * @param priv
1614  *   Pointer to private structure.
1615  * @param parser
1616  *   Internal parser structure.
1617  * @param flow
1618  *   Pointer to the rte_flow.
1619  * @param[out] error
1620  *   Perform verbose error reporting if not NULL.
1621  *
1622  * @return
1623  *   0 on success, a errno value otherwise and rte_errno is set.
1624  */
1625 static int
1626 priv_flow_create_action_queue_rss(struct priv *priv,
1627                                   struct mlx5_flow_parse *parser,
1628                                   struct rte_flow *flow,
1629                                   struct rte_flow_error *error)
1630 {
1631         unsigned int i;
1632
1633         for (i = 0; i != hash_rxq_init_n; ++i) {
1634                 uint64_t hash_fields;
1635
1636                 if (!parser->queue[i].ibv_attr)
1637                         continue;
1638                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1639                 parser->queue[i].ibv_attr = NULL;
1640                 hash_fields = hash_rxq_init[i].hash_fields;
1641                 flow->frxq[i].hrxq =
1642                         mlx5_priv_hrxq_get(priv,
1643                                            parser->rss_conf.rss_key,
1644                                            parser->rss_conf.rss_key_len,
1645                                            hash_fields,
1646                                            parser->queues,
1647                                            hash_fields ? parser->queues_n : 1);
1648                 if (flow->frxq[i].hrxq)
1649                         continue;
1650                 flow->frxq[i].hrxq =
1651                         mlx5_priv_hrxq_new(priv,
1652                                            parser->rss_conf.rss_key,
1653                                            parser->rss_conf.rss_key_len,
1654                                            hash_fields,
1655                                            parser->queues,
1656                                            hash_fields ? parser->queues_n : 1);
1657                 if (!flow->frxq[i].hrxq) {
1658                         rte_flow_error_set(error, ENOMEM,
1659                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1660                                            NULL, "cannot create hash rxq");
1661                         return ENOMEM;
1662                 }
1663         }
1664         return 0;
1665 }
1666
1667 /**
1668  * Complete flow rule creation.
1669  *
1670  * @param priv
1671  *   Pointer to private structure.
1672  * @param parser
1673  *   Internal parser structure.
1674  * @param flow
1675  *   Pointer to the rte_flow.
1676  * @param[out] error
1677  *   Perform verbose error reporting if not NULL.
1678  *
1679  * @return
1680  *   0 on success, a errno value otherwise and rte_errno is set.
1681  */
1682 static int
1683 priv_flow_create_action_queue(struct priv *priv,
1684                               struct mlx5_flow_parse *parser,
1685                               struct rte_flow *flow,
1686                               struct rte_flow_error *error)
1687 {
1688         int err = 0;
1689         unsigned int i;
1690
1691         assert(priv->pd);
1692         assert(priv->ctx);
1693         assert(!parser->drop);
1694         err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1695         if (err)
1696                 goto error;
1697         if (!priv->dev->data->dev_started)
1698                 return 0;
1699         for (i = 0; i != hash_rxq_init_n; ++i) {
1700                 if (!flow->frxq[i].hrxq)
1701                         continue;
1702                 flow->frxq[i].ibv_flow =
1703                         ibv_create_flow(flow->frxq[i].hrxq->qp,
1704                                         flow->frxq[i].ibv_attr);
1705                 if (!flow->frxq[i].ibv_flow) {
1706                         rte_flow_error_set(error, ENOMEM,
1707                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1708                                            NULL, "flow rule creation failure");
1709                         err = ENOMEM;
1710                         goto error;
1711                 }
1712                 DEBUG("%p type %d QP %p ibv_flow %p",
1713                       (void *)flow, i,
1714                       (void *)flow->frxq[i].hrxq,
1715                       (void *)flow->frxq[i].ibv_flow);
1716         }
1717         for (i = 0; i != parser->queues_n; ++i) {
1718                 struct mlx5_rxq_data *q =
1719                         (*priv->rxqs)[parser->queues[i]];
1720
1721                 q->mark |= parser->mark;
1722         }
1723         return 0;
1724 error:
1725         assert(flow);
1726         for (i = 0; i != hash_rxq_init_n; ++i) {
1727                 if (flow->frxq[i].ibv_flow) {
1728                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1729
1730                         claim_zero(ibv_destroy_flow(ibv_flow));
1731                 }
1732                 if (flow->frxq[i].hrxq)
1733                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1734                 if (flow->frxq[i].ibv_attr)
1735                         rte_free(flow->frxq[i].ibv_attr);
1736         }
1737         return err;
1738 }
1739
1740 /**
1741  * Convert a flow.
1742  *
1743  * @param priv
1744  *   Pointer to private structure.
1745  * @param list
1746  *   Pointer to a TAILQ flow list.
1747  * @param[in] attr
1748  *   Flow rule attributes.
1749  * @param[in] pattern
1750  *   Pattern specification (list terminated by the END pattern item).
1751  * @param[in] actions
1752  *   Associated actions (list terminated by the END action).
1753  * @param[out] error
1754  *   Perform verbose error reporting if not NULL.
1755  *
1756  * @return
1757  *   A flow on success, NULL otherwise.
1758  */
1759 static struct rte_flow *
1760 priv_flow_create(struct priv *priv,
1761                  struct mlx5_flows *list,
1762                  const struct rte_flow_attr *attr,
1763                  const struct rte_flow_item items[],
1764                  const struct rte_flow_action actions[],
1765                  struct rte_flow_error *error)
1766 {
1767         struct mlx5_flow_parse parser = { .create = 1, };
1768         struct rte_flow *flow = NULL;
1769         unsigned int i;
1770         int err;
1771
1772         err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1773         if (err)
1774                 goto exit;
1775         flow = rte_calloc(__func__, 1,
1776                           sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1777                           0);
1778         if (!flow) {
1779                 rte_flow_error_set(error, ENOMEM,
1780                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1781                                    NULL,
1782                                    "cannot allocate flow memory");
1783                 return NULL;
1784         }
1785         /* Copy queues configuration. */
1786         flow->queues = (uint16_t (*)[])(flow + 1);
1787         memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1788         flow->queues_n = parser.queues_n;
1789         /* Copy RSS configuration. */
1790         flow->rss_conf = parser.rss_conf;
1791         flow->rss_conf.rss_key = flow->rss_key;
1792         memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1793         /* finalise the flow. */
1794         if (parser.drop)
1795                 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1796                                                          error);
1797         else
1798                 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1799         if (err)
1800                 goto exit;
1801         TAILQ_INSERT_TAIL(list, flow, next);
1802         DEBUG("Flow created %p", (void *)flow);
1803         return flow;
1804 exit:
1805         if (parser.drop) {
1806                 rte_free(parser.drop_q.ibv_attr);
1807         } else {
1808                 for (i = 0; i != hash_rxq_init_n; ++i) {
1809                         if (parser.queue[i].ibv_attr)
1810                                 rte_free(parser.queue[i].ibv_attr);
1811                 }
1812         }
1813         rte_free(flow);
1814         return NULL;
1815 }
1816
1817 /**
1818  * Validate a flow supported by the NIC.
1819  *
1820  * @see rte_flow_validate()
1821  * @see rte_flow_ops
1822  */
1823 int
1824 mlx5_flow_validate(struct rte_eth_dev *dev,
1825                    const struct rte_flow_attr *attr,
1826                    const struct rte_flow_item items[],
1827                    const struct rte_flow_action actions[],
1828                    struct rte_flow_error *error)
1829 {
1830         struct priv *priv = dev->data->dev_private;
1831         int ret;
1832         struct mlx5_flow_parse parser = { .create = 0, };
1833
1834         priv_lock(priv);
1835         ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1836         priv_unlock(priv);
1837         return ret;
1838 }
1839
1840 /**
1841  * Create a flow.
1842  *
1843  * @see rte_flow_create()
1844  * @see rte_flow_ops
1845  */
1846 struct rte_flow *
1847 mlx5_flow_create(struct rte_eth_dev *dev,
1848                  const struct rte_flow_attr *attr,
1849                  const struct rte_flow_item items[],
1850                  const struct rte_flow_action actions[],
1851                  struct rte_flow_error *error)
1852 {
1853         struct priv *priv = dev->data->dev_private;
1854         struct rte_flow *flow;
1855
1856         priv_lock(priv);
1857         flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1858                                 error);
1859         priv_unlock(priv);
1860         return flow;
1861 }
1862
1863 /**
1864  * Destroy a flow.
1865  *
1866  * @param priv
1867  *   Pointer to private structure.
1868  * @param list
1869  *   Pointer to a TAILQ flow list.
1870  * @param[in] flow
1871  *   Flow to destroy.
1872  */
1873 static void
1874 priv_flow_destroy(struct priv *priv,
1875                   struct mlx5_flows *list,
1876                   struct rte_flow *flow)
1877 {
1878         unsigned int i;
1879
1880         if (flow->drop || !flow->mark)
1881                 goto free;
1882         for (i = 0; i != flow->queues_n; ++i) {
1883                 struct rte_flow *tmp;
1884                 int mark = 0;
1885
1886                 /*
1887                  * To remove the mark from the queue, the queue must not be
1888                  * present in any other marked flow (RSS or not).
1889                  */
1890                 TAILQ_FOREACH(tmp, list, next) {
1891                         unsigned int j;
1892                         uint16_t *tqs = NULL;
1893                         uint16_t tq_n = 0;
1894
1895                         if (!tmp->mark)
1896                                 continue;
1897                         for (j = 0; j != hash_rxq_init_n; ++j) {
1898                                 if (!tmp->frxq[j].hrxq)
1899                                         continue;
1900                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
1901                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1902                         }
1903                         if (!tq_n)
1904                                 continue;
1905                         for (j = 0; (j != tq_n) && !mark; j++)
1906                                 if (tqs[j] == (*flow->queues)[i])
1907                                         mark = 1;
1908                 }
1909                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
1910         }
1911 free:
1912         if (flow->drop) {
1913                 if (flow->drxq.ibv_flow)
1914                         claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
1915                 rte_free(flow->drxq.ibv_attr);
1916         } else {
1917                 for (i = 0; i != hash_rxq_init_n; ++i) {
1918                         struct mlx5_flow *frxq = &flow->frxq[i];
1919
1920                         if (frxq->ibv_flow)
1921                                 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
1922                         if (frxq->hrxq)
1923                                 mlx5_priv_hrxq_release(priv, frxq->hrxq);
1924                         if (frxq->ibv_attr)
1925                                 rte_free(frxq->ibv_attr);
1926                 }
1927         }
1928         TAILQ_REMOVE(list, flow, next);
1929         DEBUG("Flow destroyed %p", (void *)flow);
1930         rte_free(flow);
1931 }
1932
1933 /**
1934  * Destroy all flows.
1935  *
1936  * @param priv
1937  *   Pointer to private structure.
1938  * @param list
1939  *   Pointer to a TAILQ flow list.
1940  */
1941 void
1942 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
1943 {
1944         while (!TAILQ_EMPTY(list)) {
1945                 struct rte_flow *flow;
1946
1947                 flow = TAILQ_FIRST(list);
1948                 priv_flow_destroy(priv, list, flow);
1949         }
1950 }
1951
1952 /**
1953  * Create drop queue.
1954  *
1955  * @param priv
1956  *   Pointer to private structure.
1957  *
1958  * @return
1959  *   0 on success.
1960  */
1961 int
1962 priv_flow_create_drop_queue(struct priv *priv)
1963 {
1964         struct mlx5_hrxq_drop *fdq = NULL;
1965
1966         assert(priv->pd);
1967         assert(priv->ctx);
1968         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1969         if (!fdq) {
1970                 WARN("cannot allocate memory for drop queue");
1971                 goto error;
1972         }
1973         fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
1974         if (!fdq->cq) {
1975                 WARN("cannot allocate CQ for drop queue");
1976                 goto error;
1977         }
1978         fdq->wq = ibv_create_wq(priv->ctx,
1979                         &(struct ibv_wq_init_attr){
1980                         .wq_type = IBV_WQT_RQ,
1981                         .max_wr = 1,
1982                         .max_sge = 1,
1983                         .pd = priv->pd,
1984                         .cq = fdq->cq,
1985                         });
1986         if (!fdq->wq) {
1987                 WARN("cannot allocate WQ for drop queue");
1988                 goto error;
1989         }
1990         fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
1991                         &(struct ibv_rwq_ind_table_init_attr){
1992                         .log_ind_tbl_size = 0,
1993                         .ind_tbl = &fdq->wq,
1994                         .comp_mask = 0,
1995                         });
1996         if (!fdq->ind_table) {
1997                 WARN("cannot allocate indirection table for drop queue");
1998                 goto error;
1999         }
2000         fdq->qp = ibv_create_qp_ex(priv->ctx,
2001                 &(struct ibv_qp_init_attr_ex){
2002                         .qp_type = IBV_QPT_RAW_PACKET,
2003                         .comp_mask =
2004                                 IBV_QP_INIT_ATTR_PD |
2005                                 IBV_QP_INIT_ATTR_IND_TABLE |
2006                                 IBV_QP_INIT_ATTR_RX_HASH,
2007                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2008                                 .rx_hash_function =
2009                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2010                                 .rx_hash_key_len = rss_hash_default_key_len,
2011                                 .rx_hash_key = rss_hash_default_key,
2012                                 .rx_hash_fields_mask = 0,
2013                                 },
2014                         .rwq_ind_tbl = fdq->ind_table,
2015                         .pd = priv->pd
2016                 });
2017         if (!fdq->qp) {
2018                 WARN("cannot allocate QP for drop queue");
2019                 goto error;
2020         }
2021         priv->flow_drop_queue = fdq;
2022         return 0;
2023 error:
2024         if (fdq->qp)
2025                 claim_zero(ibv_destroy_qp(fdq->qp));
2026         if (fdq->ind_table)
2027                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2028         if (fdq->wq)
2029                 claim_zero(ibv_destroy_wq(fdq->wq));
2030         if (fdq->cq)
2031                 claim_zero(ibv_destroy_cq(fdq->cq));
2032         if (fdq)
2033                 rte_free(fdq);
2034         priv->flow_drop_queue = NULL;
2035         return -1;
2036 }
2037
2038 /**
2039  * Delete drop queue.
2040  *
2041  * @param priv
2042  *   Pointer to private structure.
2043  */
2044 void
2045 priv_flow_delete_drop_queue(struct priv *priv)
2046 {
2047         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2048
2049         if (!fdq)
2050                 return;
2051         if (fdq->qp)
2052                 claim_zero(ibv_destroy_qp(fdq->qp));
2053         if (fdq->ind_table)
2054                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2055         if (fdq->wq)
2056                 claim_zero(ibv_destroy_wq(fdq->wq));
2057         if (fdq->cq)
2058                 claim_zero(ibv_destroy_cq(fdq->cq));
2059         rte_free(fdq);
2060         priv->flow_drop_queue = NULL;
2061 }
2062
2063 /**
2064  * Remove all flows.
2065  *
2066  * @param priv
2067  *   Pointer to private structure.
2068  * @param list
2069  *   Pointer to a TAILQ flow list.
2070  */
2071 void
2072 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2073 {
2074         struct rte_flow *flow;
2075
2076         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2077                 unsigned int i;
2078
2079                 if (flow->drop) {
2080                         if (!flow->drxq.ibv_flow)
2081                                 continue;
2082                         claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
2083                         flow->drxq.ibv_flow = NULL;
2084                         /* Next flow. */
2085                         continue;
2086                 }
2087                 if (flow->mark) {
2088                         struct mlx5_ind_table_ibv *ind_tbl = NULL;
2089
2090                         for (i = 0; i != hash_rxq_init_n; ++i) {
2091                                 if (!flow->frxq[i].hrxq)
2092                                         continue;
2093                                 ind_tbl = flow->frxq[i].hrxq->ind_table;
2094                         }
2095                         assert(ind_tbl);
2096                         for (i = 0; i != ind_tbl->queues_n; ++i)
2097                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2098                 }
2099                 for (i = 0; i != hash_rxq_init_n; ++i) {
2100                         if (!flow->frxq[i].ibv_flow)
2101                                 continue;
2102                         claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2103                         flow->frxq[i].ibv_flow = NULL;
2104                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2105                         flow->frxq[i].hrxq = NULL;
2106                 }
2107                 DEBUG("Flow %p removed", (void *)flow);
2108         }
2109 }
2110
2111 /**
2112  * Add all flows.
2113  *
2114  * @param priv
2115  *   Pointer to private structure.
2116  * @param list
2117  *   Pointer to a TAILQ flow list.
2118  *
2119  * @return
2120  *   0 on success, a errno value otherwise and rte_errno is set.
2121  */
2122 int
2123 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2124 {
2125         struct rte_flow *flow;
2126
2127         TAILQ_FOREACH(flow, list, next) {
2128                 unsigned int i;
2129
2130                 if (flow->drop) {
2131                         flow->drxq.ibv_flow =
2132                                 ibv_create_flow(priv->flow_drop_queue->qp,
2133                                                 flow->drxq.ibv_attr);
2134                         if (!flow->drxq.ibv_flow) {
2135                                 DEBUG("Flow %p cannot be applied",
2136                                       (void *)flow);
2137                                 rte_errno = EINVAL;
2138                                 return rte_errno;
2139                         }
2140                         DEBUG("Flow %p applied", (void *)flow);
2141                         /* Next flow. */
2142                         continue;
2143                 }
2144                 for (i = 0; i != hash_rxq_init_n; ++i) {
2145                         if (!flow->frxq[i].ibv_attr)
2146                                 continue;
2147                         flow->frxq[i].hrxq =
2148                                 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2149                                                    flow->rss_conf.rss_key_len,
2150                                                    hash_rxq_init[i].hash_fields,
2151                                                    (*flow->queues),
2152                                                    flow->queues_n);
2153                         if (flow->frxq[i].hrxq)
2154                                 goto flow_create;
2155                         flow->frxq[i].hrxq =
2156                                 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2157                                                    flow->rss_conf.rss_key_len,
2158                                                    hash_rxq_init[i].hash_fields,
2159                                                    (*flow->queues),
2160                                                    flow->queues_n);
2161                         if (!flow->frxq[i].hrxq) {
2162                                 DEBUG("Flow %p cannot be applied",
2163                                       (void *)flow);
2164                                 rte_errno = EINVAL;
2165                                 return rte_errno;
2166                         }
2167 flow_create:
2168                         flow->frxq[i].ibv_flow =
2169                                 ibv_create_flow(flow->frxq[i].hrxq->qp,
2170                                                 flow->frxq[i].ibv_attr);
2171                         if (!flow->frxq[i].ibv_flow) {
2172                                 DEBUG("Flow %p cannot be applied",
2173                                       (void *)flow);
2174                                 rte_errno = EINVAL;
2175                                 return rte_errno;
2176                         }
2177                         DEBUG("Flow %p applied", (void *)flow);
2178                 }
2179                 if (!flow->mark)
2180                         continue;
2181                 for (i = 0; i != flow->queues_n; ++i)
2182                         (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2183         }
2184         return 0;
2185 }
2186
2187 /**
2188  * Verify the flow list is empty
2189  *
2190  * @param priv
2191  *  Pointer to private structure.
2192  *
2193  * @return the number of flows not released.
2194  */
2195 int
2196 priv_flow_verify(struct priv *priv)
2197 {
2198         struct rte_flow *flow;
2199         int ret = 0;
2200
2201         TAILQ_FOREACH(flow, &priv->flows, next) {
2202                 DEBUG("%p: flow %p still referenced", (void *)priv,
2203                       (void *)flow);
2204                 ++ret;
2205         }
2206         return ret;
2207 }
2208
2209 /**
2210  * Enable a control flow configured from the control plane.
2211  *
2212  * @param dev
2213  *   Pointer to Ethernet device.
2214  * @param eth_spec
2215  *   An Ethernet flow spec to apply.
2216  * @param eth_mask
2217  *   An Ethernet flow mask to apply.
2218  * @param vlan_spec
2219  *   A VLAN flow spec to apply.
2220  * @param vlan_mask
2221  *   A VLAN flow mask to apply.
2222  *
2223  * @return
2224  *   0 on success.
2225  */
2226 int
2227 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2228                     struct rte_flow_item_eth *eth_spec,
2229                     struct rte_flow_item_eth *eth_mask,
2230                     struct rte_flow_item_vlan *vlan_spec,
2231                     struct rte_flow_item_vlan *vlan_mask)
2232 {
2233         struct priv *priv = dev->data->dev_private;
2234         const struct rte_flow_attr attr = {
2235                 .ingress = 1,
2236                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2237         };
2238         struct rte_flow_item items[] = {
2239                 {
2240                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2241                         .spec = eth_spec,
2242                         .last = NULL,
2243                         .mask = eth_mask,
2244                 },
2245                 {
2246                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2247                                 RTE_FLOW_ITEM_TYPE_END,
2248                         .spec = vlan_spec,
2249                         .last = NULL,
2250                         .mask = vlan_mask,
2251                 },
2252                 {
2253                         .type = RTE_FLOW_ITEM_TYPE_END,
2254                 },
2255         };
2256         struct rte_flow_action actions[] = {
2257                 {
2258                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2259                 },
2260                 {
2261                         .type = RTE_FLOW_ACTION_TYPE_END,
2262                 },
2263         };
2264         struct rte_flow *flow;
2265         struct rte_flow_error error;
2266         unsigned int i;
2267         union {
2268                 struct rte_flow_action_rss rss;
2269                 struct {
2270                         const struct rte_eth_rss_conf *rss_conf;
2271                         uint16_t num;
2272                         uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2273                 } local;
2274         } action_rss;
2275
2276         if (!priv->reta_idx_n)
2277                 return EINVAL;
2278         for (i = 0; i != priv->reta_idx_n; ++i)
2279                 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2280         action_rss.local.rss_conf = &priv->rss_conf;
2281         action_rss.local.num = priv->reta_idx_n;
2282         actions[0].conf = (const void *)&action_rss.rss;
2283         flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2284                                 &error);
2285         if (!flow)
2286                 return rte_errno;
2287         return 0;
2288 }
2289
2290 /**
2291  * Enable a flow control configured from the control plane.
2292  *
2293  * @param dev
2294  *   Pointer to Ethernet device.
2295  * @param eth_spec
2296  *   An Ethernet flow spec to apply.
2297  * @param eth_mask
2298  *   An Ethernet flow mask to apply.
2299  *
2300  * @return
2301  *   0 on success.
2302  */
2303 int
2304 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2305                struct rte_flow_item_eth *eth_spec,
2306                struct rte_flow_item_eth *eth_mask)
2307 {
2308         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2309 }
2310
2311 /**
2312  * Destroy a flow.
2313  *
2314  * @see rte_flow_destroy()
2315  * @see rte_flow_ops
2316  */
2317 int
2318 mlx5_flow_destroy(struct rte_eth_dev *dev,
2319                   struct rte_flow *flow,
2320                   struct rte_flow_error *error)
2321 {
2322         struct priv *priv = dev->data->dev_private;
2323
2324         (void)error;
2325         priv_lock(priv);
2326         priv_flow_destroy(priv, &priv->flows, flow);
2327         priv_unlock(priv);
2328         return 0;
2329 }
2330
2331 /**
2332  * Destroy all flows.
2333  *
2334  * @see rte_flow_flush()
2335  * @see rte_flow_ops
2336  */
2337 int
2338 mlx5_flow_flush(struct rte_eth_dev *dev,
2339                 struct rte_flow_error *error)
2340 {
2341         struct priv *priv = dev->data->dev_private;
2342
2343         (void)error;
2344         priv_lock(priv);
2345         priv_flow_flush(priv, &priv->flows);
2346         priv_unlock(priv);
2347         return 0;
2348 }
2349
2350 /**
2351  * Isolated mode.
2352  *
2353  * @see rte_flow_isolate()
2354  * @see rte_flow_ops
2355  */
2356 int
2357 mlx5_flow_isolate(struct rte_eth_dev *dev,
2358                   int enable,
2359                   struct rte_flow_error *error)
2360 {
2361         struct priv *priv = dev->data->dev_private;
2362
2363         priv_lock(priv);
2364         if (dev->data->dev_started) {
2365                 rte_flow_error_set(error, EBUSY,
2366                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2367                                    NULL,
2368                                    "port must be stopped first");
2369                 priv_unlock(priv);
2370                 return -rte_errno;
2371         }
2372         priv->isolated = !!enable;
2373         priv_unlock(priv);
2374         return 0;
2375 }