b3395b8c8c1ae52f5ce696fcdd09d34bca5fa88c
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_flow.h"
32 #include "mlx5_glue.h"
33 #include "mlx5_prm.h"
34 #include "mlx5_rxtx.h"
35
36 #define VERBS_SPEC_INNER(item_flags) \
37         (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
38
39 /**
40  * Create Verbs flow counter with Verbs library.
41  *
42  * @param[in] dev
43  *   Pointer to the Ethernet device structure.
44  * @param[in, out] counter
45  *   mlx5 flow counter object, contains the counter id,
46  *   handle of created Verbs flow counter is returned
47  *   in cs field (if counters are supported).
48  *
49  * @return
50  *   0 On success else a negative errno value is returned
51  *   and rte_errno is set.
52  */
53 static int
54 flow_verbs_counter_create(struct rte_eth_dev *dev,
55                           struct mlx5_flow_counter *counter)
56 {
57 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
58         struct mlx5_priv *priv = dev->data->dev_private;
59         struct ibv_context *ctx = priv->sh->ctx;
60         struct ibv_counter_set_init_attr init = {
61                          .counter_set_id = counter->id};
62
63         counter->cs = mlx5_glue->create_counter_set(ctx, &init);
64         if (!counter->cs) {
65                 rte_errno = ENOTSUP;
66                 return -ENOTSUP;
67         }
68         return 0;
69 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
70         struct mlx5_priv *priv = dev->data->dev_private;
71         struct ibv_context *ctx = priv->sh->ctx;
72         struct ibv_counters_init_attr init = {0};
73         struct ibv_counter_attach_attr attach;
74         int ret;
75
76         memset(&attach, 0, sizeof(attach));
77         counter->cs = mlx5_glue->create_counters(ctx, &init);
78         if (!counter->cs) {
79                 rte_errno = ENOTSUP;
80                 return -ENOTSUP;
81         }
82         attach.counter_desc = IBV_COUNTER_PACKETS;
83         attach.index = 0;
84         ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
85         if (!ret) {
86                 attach.counter_desc = IBV_COUNTER_BYTES;
87                 attach.index = 1;
88                 ret = mlx5_glue->attach_counters
89                                         (counter->cs, &attach, NULL);
90         }
91         if (ret) {
92                 claim_zero(mlx5_glue->destroy_counters(counter->cs));
93                 counter->cs = NULL;
94                 rte_errno = ret;
95                 return -ret;
96         }
97         return 0;
98 #else
99         (void)dev;
100         (void)counter;
101         rte_errno = ENOTSUP;
102         return -ENOTSUP;
103 #endif
104 }
105
106 /**
107  * Get a flow counter.
108  *
109  * @param[in] dev
110  *   Pointer to the Ethernet device structure.
111  * @param[in] shared
112  *   Indicate if this counter is shared with other flows.
113  * @param[in] id
114  *   Counter identifier.
115  *
116  * @return
117  *   A pointer to the counter, NULL otherwise and rte_errno is set.
118  */
119 static struct mlx5_flow_counter *
120 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
121 {
122         struct mlx5_priv *priv = dev->data->dev_private;
123         struct mlx5_flow_counter *cnt;
124         int ret;
125
126         if (shared) {
127                 TAILQ_FOREACH(cnt, &priv->sh->cmng.flow_counters, next) {
128                         if (cnt->shared && cnt->id == id) {
129                                 cnt->ref_cnt++;
130                                 return cnt;
131                         }
132                 }
133         }
134         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
135         if (!cnt) {
136                 rte_errno = ENOMEM;
137                 return NULL;
138         }
139         cnt->id = id;
140         cnt->shared = shared;
141         cnt->ref_cnt = 1;
142         cnt->hits = 0;
143         cnt->bytes = 0;
144         /* Create counter with Verbs. */
145         ret = flow_verbs_counter_create(dev, cnt);
146         if (!ret) {
147                 TAILQ_INSERT_HEAD(&priv->sh->cmng.flow_counters, cnt, next);
148                 return cnt;
149         }
150         /* Some error occurred in Verbs library. */
151         rte_free(cnt);
152         rte_errno = -ret;
153         return NULL;
154 }
155
156 /**
157  * Release a flow counter.
158  *
159  * @param[in] dev
160  *   Pointer to the Ethernet device structure.
161  * @param[in] counter
162  *   Pointer to the counter handler.
163  */
164 static void
165 flow_verbs_counter_release(struct rte_eth_dev *dev,
166                            struct mlx5_flow_counter *counter)
167 {
168         struct mlx5_priv *priv = dev->data->dev_private;
169
170         if (--counter->ref_cnt == 0) {
171 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
172                 claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
173 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
174                 claim_zero(mlx5_glue->destroy_counters(counter->cs));
175 #endif
176                 TAILQ_REMOVE(&priv->sh->cmng.flow_counters, counter, next);
177                 rte_free(counter);
178         }
179 }
180
181 /**
182  * Query a flow counter via Verbs library call.
183  *
184  * @see rte_flow_query()
185  * @see rte_flow_ops
186  */
187 static int
188 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
189                          struct rte_flow *flow, void *data,
190                          struct rte_flow_error *error)
191 {
192 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
193         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
194         if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
195                 struct rte_flow_query_count *qc = data;
196                 uint64_t counters[2] = {0, 0};
197 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
198                 struct ibv_query_counter_set_attr query_cs_attr = {
199                         .cs = flow->counter->cs,
200                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
201                 };
202                 struct ibv_counter_set_data query_out = {
203                         .out = counters,
204                         .outlen = 2 * sizeof(uint64_t),
205                 };
206                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
207                                                        &query_out);
208 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
209                 int err = mlx5_glue->query_counters
210                                (flow->counter->cs, counters,
211                                 RTE_DIM(counters),
212                                 IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
213 #endif
214                 if (err)
215                         return rte_flow_error_set
216                                 (error, err,
217                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
218                                  NULL,
219                                  "cannot read counter");
220                 qc->hits_set = 1;
221                 qc->bytes_set = 1;
222                 qc->hits = counters[0] - flow->counter->hits;
223                 qc->bytes = counters[1] - flow->counter->bytes;
224                 if (qc->reset) {
225                         flow->counter->hits = counters[0];
226                         flow->counter->bytes = counters[1];
227                 }
228                 return 0;
229         }
230         return rte_flow_error_set(error, EINVAL,
231                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
232                                   NULL,
233                                   "flow does not have counter");
234 #else
235         (void)flow;
236         (void)data;
237         return rte_flow_error_set(error, ENOTSUP,
238                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
239                                   NULL,
240                                   "counters are not available");
241 #endif
242 }
243
244 /**
245  * Add a verbs item specification into @p verbs.
246  *
247  * @param[out] verbs
248  *   Pointer to verbs structure.
249  * @param[in] src
250  *   Create specification.
251  * @param[in] size
252  *   Size in bytes of the specification to copy.
253  */
254 static void
255 flow_verbs_spec_add(struct mlx5_flow_verbs *verbs, void *src, unsigned int size)
256 {
257         void *dst;
258
259         if (!verbs)
260                 return;
261         assert(verbs->specs);
262         dst = (void *)(verbs->specs + verbs->size);
263         memcpy(dst, src, size);
264         ++verbs->attr->num_of_specs;
265         verbs->size += size;
266 }
267
268 /**
269  * Convert the @p item into a Verbs specification. This function assumes that
270  * the input is valid and that there is space to insert the requested item
271  * into the flow.
272  *
273  * @param[in, out] dev_flow
274  *   Pointer to dev_flow structure.
275  * @param[in] item
276  *   Item specification.
277  * @param[in] item_flags
278  *   Parsed item flags.
279  */
280 static void
281 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
282                               const struct rte_flow_item *item,
283                               uint64_t item_flags)
284 {
285         const struct rte_flow_item_eth *spec = item->spec;
286         const struct rte_flow_item_eth *mask = item->mask;
287         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
288         struct ibv_flow_spec_eth eth = {
289                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
290                 .size = size,
291         };
292
293         if (!mask)
294                 mask = &rte_flow_item_eth_mask;
295         if (spec) {
296                 unsigned int i;
297
298                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes,
299                         RTE_ETHER_ADDR_LEN);
300                 memcpy(&eth.val.src_mac, spec->src.addr_bytes,
301                         RTE_ETHER_ADDR_LEN);
302                 eth.val.ether_type = spec->type;
303                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes,
304                         RTE_ETHER_ADDR_LEN);
305                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes,
306                         RTE_ETHER_ADDR_LEN);
307                 eth.mask.ether_type = mask->type;
308                 /* Remove unwanted bits from values. */
309                 for (i = 0; i < RTE_ETHER_ADDR_LEN; ++i) {
310                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
311                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
312                 }
313                 eth.val.ether_type &= eth.mask.ether_type;
314         }
315         flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
316 }
317
318 /**
319  * Update the VLAN tag in the Verbs Ethernet specification.
320  * This function assumes that the input is valid and there is space to add
321  * the requested item.
322  *
323  * @param[in, out] attr
324  *   Pointer to Verbs attributes structure.
325  * @param[in] eth
326  *   Verbs structure containing the VLAN information to copy.
327  */
328 static void
329 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
330                             struct ibv_flow_spec_eth *eth)
331 {
332         unsigned int i;
333         const enum ibv_flow_spec_type search = eth->type;
334         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
335                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
336
337         for (i = 0; i != attr->num_of_specs; ++i) {
338                 if (hdr->type == search) {
339                         struct ibv_flow_spec_eth *e =
340                                 (struct ibv_flow_spec_eth *)hdr;
341
342                         e->val.vlan_tag = eth->val.vlan_tag;
343                         e->mask.vlan_tag = eth->mask.vlan_tag;
344                         e->val.ether_type = eth->val.ether_type;
345                         e->mask.ether_type = eth->mask.ether_type;
346                         break;
347                 }
348                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
349         }
350 }
351
352 /**
353  * Convert the @p item into a Verbs specification. This function assumes that
354  * the input is valid and that there is space to insert the requested item
355  * into the flow.
356  *
357  * @param[in, out] dev_flow
358  *   Pointer to dev_flow structure.
359  * @param[in] item
360  *   Item specification.
361  * @param[in] item_flags
362  *   Parsed item flags.
363  */
364 static void
365 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
366                                const struct rte_flow_item *item,
367                                uint64_t item_flags)
368 {
369         const struct rte_flow_item_vlan *spec = item->spec;
370         const struct rte_flow_item_vlan *mask = item->mask;
371         unsigned int size = sizeof(struct ibv_flow_spec_eth);
372         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
373         struct ibv_flow_spec_eth eth = {
374                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
375                 .size = size,
376         };
377         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
378                                       MLX5_FLOW_LAYER_OUTER_L2;
379
380         if (!mask)
381                 mask = &rte_flow_item_vlan_mask;
382         if (spec) {
383                 eth.val.vlan_tag = spec->tci;
384                 eth.mask.vlan_tag = mask->tci;
385                 eth.val.vlan_tag &= eth.mask.vlan_tag;
386                 eth.val.ether_type = spec->inner_type;
387                 eth.mask.ether_type = mask->inner_type;
388                 eth.val.ether_type &= eth.mask.ether_type;
389         }
390         if (!(item_flags & l2m))
391                 flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
392         else
393                 flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
394 }
395
396 /**
397  * Convert the @p item into a Verbs specification. This function assumes that
398  * the input is valid and that there is space to insert the requested item
399  * into the flow.
400  *
401  * @param[in, out] dev_flow
402  *   Pointer to dev_flow structure.
403  * @param[in] item
404  *   Item specification.
405  * @param[in] item_flags
406  *   Parsed item flags.
407  */
408 static void
409 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
410                                const struct rte_flow_item *item,
411                                uint64_t item_flags)
412 {
413         const struct rte_flow_item_ipv4 *spec = item->spec;
414         const struct rte_flow_item_ipv4 *mask = item->mask;
415         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
416         struct ibv_flow_spec_ipv4_ext ipv4 = {
417                 .type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
418                 .size = size,
419         };
420
421         if (!mask)
422                 mask = &rte_flow_item_ipv4_mask;
423         if (spec) {
424                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
425                         .src_ip = spec->hdr.src_addr,
426                         .dst_ip = spec->hdr.dst_addr,
427                         .proto = spec->hdr.next_proto_id,
428                         .tos = spec->hdr.type_of_service,
429                 };
430                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
431                         .src_ip = mask->hdr.src_addr,
432                         .dst_ip = mask->hdr.dst_addr,
433                         .proto = mask->hdr.next_proto_id,
434                         .tos = mask->hdr.type_of_service,
435                 };
436                 /* Remove unwanted bits from values. */
437                 ipv4.val.src_ip &= ipv4.mask.src_ip;
438                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
439                 ipv4.val.proto &= ipv4.mask.proto;
440                 ipv4.val.tos &= ipv4.mask.tos;
441         }
442         flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
443 }
444
445 /**
446  * Convert the @p item into a Verbs specification. This function assumes that
447  * the input is valid and that there is space to insert the requested item
448  * into the flow.
449  *
450  * @param[in, out] dev_flow
451  *   Pointer to dev_flow structure.
452  * @param[in] item
453  *   Item specification.
454  * @param[in] item_flags
455  *   Parsed item flags.
456  */
457 static void
458 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
459                                const struct rte_flow_item *item,
460                                uint64_t item_flags)
461 {
462         const struct rte_flow_item_ipv6 *spec = item->spec;
463         const struct rte_flow_item_ipv6 *mask = item->mask;
464         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
465         struct ibv_flow_spec_ipv6 ipv6 = {
466                 .type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
467                 .size = size,
468         };
469
470         if (!mask)
471                 mask = &rte_flow_item_ipv6_mask;
472         if (spec) {
473                 unsigned int i;
474                 uint32_t vtc_flow_val;
475                 uint32_t vtc_flow_mask;
476
477                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
478                        RTE_DIM(ipv6.val.src_ip));
479                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
480                        RTE_DIM(ipv6.val.dst_ip));
481                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
482                        RTE_DIM(ipv6.mask.src_ip));
483                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
484                        RTE_DIM(ipv6.mask.dst_ip));
485                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
486                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
487                 ipv6.val.flow_label =
488                         rte_cpu_to_be_32((vtc_flow_val & RTE_IPV6_HDR_FL_MASK) >>
489                                          RTE_IPV6_HDR_FL_SHIFT);
490                 ipv6.val.traffic_class = (vtc_flow_val & RTE_IPV6_HDR_TC_MASK) >>
491                                          RTE_IPV6_HDR_TC_SHIFT;
492                 ipv6.val.next_hdr = spec->hdr.proto;
493                 ipv6.val.hop_limit = spec->hdr.hop_limits;
494                 ipv6.mask.flow_label =
495                         rte_cpu_to_be_32((vtc_flow_mask & RTE_IPV6_HDR_FL_MASK) >>
496                                          RTE_IPV6_HDR_FL_SHIFT);
497                 ipv6.mask.traffic_class = (vtc_flow_mask & RTE_IPV6_HDR_TC_MASK) >>
498                                           RTE_IPV6_HDR_TC_SHIFT;
499                 ipv6.mask.next_hdr = mask->hdr.proto;
500                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
501                 /* Remove unwanted bits from values. */
502                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
503                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
504                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
505                 }
506                 ipv6.val.flow_label &= ipv6.mask.flow_label;
507                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
508                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
509                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
510         }
511         flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
512 }
513
514 /**
515  * Convert the @p item into a Verbs specification. This function assumes that
516  * the input is valid and that there is space to insert the requested item
517  * into the flow.
518  *
519  * @param[in, out] dev_flow
520  *   Pointer to dev_flow structure.
521  * @param[in] item
522  *   Item specification.
523  * @param[in] item_flags
524  *   Parsed item flags.
525  */
526 static void
527 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
528                               const struct rte_flow_item *item,
529                               uint64_t item_flags __rte_unused)
530 {
531         const struct rte_flow_item_tcp *spec = item->spec;
532         const struct rte_flow_item_tcp *mask = item->mask;
533         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
534         struct ibv_flow_spec_tcp_udp tcp = {
535                 .type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
536                 .size = size,
537         };
538
539         if (!mask)
540                 mask = &rte_flow_item_tcp_mask;
541         if (spec) {
542                 tcp.val.dst_port = spec->hdr.dst_port;
543                 tcp.val.src_port = spec->hdr.src_port;
544                 tcp.mask.dst_port = mask->hdr.dst_port;
545                 tcp.mask.src_port = mask->hdr.src_port;
546                 /* Remove unwanted bits from values. */
547                 tcp.val.src_port &= tcp.mask.src_port;
548                 tcp.val.dst_port &= tcp.mask.dst_port;
549         }
550         flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
551 }
552
553 /**
554  * Convert the @p item into a Verbs specification. This function assumes that
555  * the input is valid and that there is space to insert the requested item
556  * into the flow.
557  *
558  * @param[in, out] dev_flow
559  *   Pointer to dev_flow structure.
560  * @param[in] item
561  *   Item specification.
562  * @param[in] item_flags
563  *   Parsed item flags.
564  */
565 static void
566 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
567                               const struct rte_flow_item *item,
568                               uint64_t item_flags __rte_unused)
569 {
570         const struct rte_flow_item_udp *spec = item->spec;
571         const struct rte_flow_item_udp *mask = item->mask;
572         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
573         struct ibv_flow_spec_tcp_udp udp = {
574                 .type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
575                 .size = size,
576         };
577
578         if (!mask)
579                 mask = &rte_flow_item_udp_mask;
580         if (spec) {
581                 udp.val.dst_port = spec->hdr.dst_port;
582                 udp.val.src_port = spec->hdr.src_port;
583                 udp.mask.dst_port = mask->hdr.dst_port;
584                 udp.mask.src_port = mask->hdr.src_port;
585                 /* Remove unwanted bits from values. */
586                 udp.val.src_port &= udp.mask.src_port;
587                 udp.val.dst_port &= udp.mask.dst_port;
588         }
589         flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
590 }
591
592 /**
593  * Convert the @p item into a Verbs specification. This function assumes that
594  * the input is valid and that there is space to insert the requested item
595  * into the flow.
596  *
597  * @param[in, out] dev_flow
598  *   Pointer to dev_flow structure.
599  * @param[in] item
600  *   Item specification.
601  * @param[in] item_flags
602  *   Parsed item flags.
603  */
604 static void
605 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
606                                 const struct rte_flow_item *item,
607                                 uint64_t item_flags __rte_unused)
608 {
609         const struct rte_flow_item_vxlan *spec = item->spec;
610         const struct rte_flow_item_vxlan *mask = item->mask;
611         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
612         struct ibv_flow_spec_tunnel vxlan = {
613                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
614                 .size = size,
615         };
616         union vni {
617                 uint32_t vlan_id;
618                 uint8_t vni[4];
619         } id = { .vlan_id = 0, };
620
621         if (!mask)
622                 mask = &rte_flow_item_vxlan_mask;
623         if (spec) {
624                 memcpy(&id.vni[1], spec->vni, 3);
625                 vxlan.val.tunnel_id = id.vlan_id;
626                 memcpy(&id.vni[1], mask->vni, 3);
627                 vxlan.mask.tunnel_id = id.vlan_id;
628                 /* Remove unwanted bits from values. */
629                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
630         }
631         flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
632 }
633
634 /**
635  * Convert the @p item into a Verbs specification. This function assumes that
636  * the input is valid and that there is space to insert the requested item
637  * into the flow.
638  *
639  * @param[in, out] dev_flow
640  *   Pointer to dev_flow structure.
641  * @param[in] item
642  *   Item specification.
643  * @param[in] item_flags
644  *   Parsed item flags.
645  */
646 static void
647 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
648                                     const struct rte_flow_item *item,
649                                     uint64_t item_flags __rte_unused)
650 {
651         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
652         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
653         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
654         struct ibv_flow_spec_tunnel vxlan_gpe = {
655                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
656                 .size = size,
657         };
658         union vni {
659                 uint32_t vlan_id;
660                 uint8_t vni[4];
661         } id = { .vlan_id = 0, };
662
663         if (!mask)
664                 mask = &rte_flow_item_vxlan_gpe_mask;
665         if (spec) {
666                 memcpy(&id.vni[1], spec->vni, 3);
667                 vxlan_gpe.val.tunnel_id = id.vlan_id;
668                 memcpy(&id.vni[1], mask->vni, 3);
669                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
670                 /* Remove unwanted bits from values. */
671                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
672         }
673         flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
674 }
675
676 /**
677  * Update the protocol in Verbs IPv4/IPv6 spec.
678  *
679  * @param[in, out] attr
680  *   Pointer to Verbs attributes structure.
681  * @param[in] search
682  *   Specification type to search in order to update the IP protocol.
683  * @param[in] protocol
684  *   Protocol value to set if none is present in the specification.
685  */
686 static void
687 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
688                                        enum ibv_flow_spec_type search,
689                                        uint8_t protocol)
690 {
691         unsigned int i;
692         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
693                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
694
695         if (!attr)
696                 return;
697         for (i = 0; i != attr->num_of_specs; ++i) {
698                 if (hdr->type == search) {
699                         union {
700                                 struct ibv_flow_spec_ipv4_ext *ipv4;
701                                 struct ibv_flow_spec_ipv6 *ipv6;
702                         } ip;
703
704                         switch (search) {
705                         case IBV_FLOW_SPEC_IPV4_EXT:
706                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
707                                 if (!ip.ipv4->val.proto) {
708                                         ip.ipv4->val.proto = protocol;
709                                         ip.ipv4->mask.proto = 0xff;
710                                 }
711                                 break;
712                         case IBV_FLOW_SPEC_IPV6:
713                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
714                                 if (!ip.ipv6->val.next_hdr) {
715                                         ip.ipv6->val.next_hdr = protocol;
716                                         ip.ipv6->mask.next_hdr = 0xff;
717                                 }
718                                 break;
719                         default:
720                                 break;
721                         }
722                         break;
723                 }
724                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
725         }
726 }
727
728 /**
729  * Convert the @p item into a Verbs specification. This function assumes that
730  * the input is valid and that there is space to insert the requested item
731  * into the flow.
732  *
733  * @param[in, out] dev_flow
734  *   Pointer to dev_flow structure.
735  * @param[in] item
736  *   Item specification.
737  * @param[in] item_flags
738  *   Parsed item flags.
739  */
740 static void
741 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
742                               const struct rte_flow_item *item __rte_unused,
743                               uint64_t item_flags)
744 {
745         struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
746 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
747         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
748         struct ibv_flow_spec_tunnel tunnel = {
749                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
750                 .size = size,
751         };
752 #else
753         const struct rte_flow_item_gre *spec = item->spec;
754         const struct rte_flow_item_gre *mask = item->mask;
755         unsigned int size = sizeof(struct ibv_flow_spec_gre);
756         struct ibv_flow_spec_gre tunnel = {
757                 .type = IBV_FLOW_SPEC_GRE,
758                 .size = size,
759         };
760
761         if (!mask)
762                 mask = &rte_flow_item_gre_mask;
763         if (spec) {
764                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
765                 tunnel.val.protocol = spec->protocol;
766                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
767                 tunnel.mask.protocol = mask->protocol;
768                 /* Remove unwanted bits from values. */
769                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
770                 tunnel.val.protocol &= tunnel.mask.protocol;
771                 tunnel.val.key &= tunnel.mask.key;
772         }
773 #endif
774         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
775                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
776                                                        IBV_FLOW_SPEC_IPV4_EXT,
777                                                        IPPROTO_GRE);
778         else
779                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
780                                                        IBV_FLOW_SPEC_IPV6,
781                                                        IPPROTO_GRE);
782         flow_verbs_spec_add(verbs, &tunnel, size);
783 }
784
785 /**
786  * Convert the @p action into a Verbs specification. This function assumes that
787  * the input is valid and that there is space to insert the requested action
788  * into the flow. This function also return the action that was added.
789  *
790  * @param[in, out] dev_flow
791  *   Pointer to dev_flow structure.
792  * @param[in] item
793  *   Item specification.
794  * @param[in] item_flags
795  *   Parsed item flags.
796  */
797 static void
798 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
799                                const struct rte_flow_item *item __rte_unused,
800                                uint64_t item_flags __rte_unused)
801 {
802 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
803         const struct rte_flow_item_mpls *spec = item->spec;
804         const struct rte_flow_item_mpls *mask = item->mask;
805         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
806         struct ibv_flow_spec_mpls mpls = {
807                 .type = IBV_FLOW_SPEC_MPLS,
808                 .size = size,
809         };
810
811         if (!mask)
812                 mask = &rte_flow_item_mpls_mask;
813         if (spec) {
814                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
815                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
816                 /* Remove unwanted bits from values.  */
817                 mpls.val.label &= mpls.mask.label;
818         }
819         flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
820 #endif
821 }
822
823 /**
824  * Convert the @p action into a Verbs specification. This function assumes that
825  * the input is valid and that there is space to insert the requested action
826  * into the flow.
827  *
828  * @param[in] dev_flow
829  *   Pointer to mlx5_flow.
830  * @param[in] action
831  *   Action configuration.
832  */
833 static void
834 flow_verbs_translate_action_drop
835         (struct mlx5_flow *dev_flow,
836          const struct rte_flow_action *action __rte_unused)
837 {
838         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
839         struct ibv_flow_spec_action_drop drop = {
840                         .type = IBV_FLOW_SPEC_ACTION_DROP,
841                         .size = size,
842         };
843
844         flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
845 }
846
847 /**
848  * Convert the @p action into a Verbs specification. This function assumes that
849  * the input is valid and that there is space to insert the requested action
850  * into the flow.
851  *
852  * @param[in] dev_flow
853  *   Pointer to mlx5_flow.
854  * @param[in] action
855  *   Action configuration.
856  */
857 static void
858 flow_verbs_translate_action_queue(struct mlx5_flow *dev_flow,
859                                   const struct rte_flow_action *action)
860 {
861         const struct rte_flow_action_queue *queue = action->conf;
862         struct rte_flow *flow = dev_flow->flow;
863
864         if (flow->queue)
865                 (*flow->queue)[0] = queue->index;
866         flow->rss.queue_num = 1;
867 }
868
869 /**
870  * Convert the @p action into a Verbs specification. This function assumes that
871  * the input is valid and that there is space to insert the requested action
872  * into the flow.
873  *
874  * @param[in] action
875  *   Action configuration.
876  * @param[in, out] action_flags
877  *   Pointer to the detected actions.
878  * @param[in] dev_flow
879  *   Pointer to mlx5_flow.
880  */
881 static void
882 flow_verbs_translate_action_rss(struct mlx5_flow *dev_flow,
883                                 const struct rte_flow_action *action)
884 {
885         const struct rte_flow_action_rss *rss = action->conf;
886         const uint8_t *rss_key;
887         struct rte_flow *flow = dev_flow->flow;
888
889         if (flow->queue)
890                 memcpy((*flow->queue), rss->queue,
891                        rss->queue_num * sizeof(uint16_t));
892         flow->rss.queue_num = rss->queue_num;
893         /* NULL RSS key indicates default RSS key. */
894         rss_key = !rss->key ? rss_hash_default_key : rss->key;
895         memcpy(flow->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
896         /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
897         flow->rss.types = !rss->types ? ETH_RSS_IP : rss->types;
898         flow->rss.level = rss->level;
899 }
900
901 /**
902  * Convert the @p action into a Verbs specification. This function assumes that
903  * the input is valid and that there is space to insert the requested action
904  * into the flow.
905  *
906  * @param[in] dev_flow
907  *   Pointer to mlx5_flow.
908  * @param[in] action
909  *   Action configuration.
910  */
911 static void
912 flow_verbs_translate_action_flag
913         (struct mlx5_flow *dev_flow,
914          const struct rte_flow_action *action __rte_unused)
915 {
916         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
917         struct ibv_flow_spec_action_tag tag = {
918                 .type = IBV_FLOW_SPEC_ACTION_TAG,
919                 .size = size,
920                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
921         };
922
923         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
924 }
925
926 /**
927  * Convert the @p action into a Verbs specification. This function assumes that
928  * the input is valid and that there is space to insert the requested action
929  * into the flow.
930  *
931  * @param[in] dev_flow
932  *   Pointer to mlx5_flow.
933  * @param[in] action
934  *   Action configuration.
935  */
936 static void
937 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
938                                  const struct rte_flow_action *action)
939 {
940         const struct rte_flow_action_mark *mark = action->conf;
941         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
942         struct ibv_flow_spec_action_tag tag = {
943                 .type = IBV_FLOW_SPEC_ACTION_TAG,
944                 .size = size,
945                 .tag_id = mlx5_flow_mark_set(mark->id),
946         };
947
948         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
949 }
950
951 /**
952  * Convert the @p action into a Verbs specification. This function assumes that
953  * the input is valid and that there is space to insert the requested action
954  * into the flow.
955  *
956  * @param[in] dev
957  *   Pointer to the Ethernet device structure.
958  * @param[in] action
959  *   Action configuration.
960  * @param[in] dev_flow
961  *   Pointer to mlx5_flow.
962  * @param[out] error
963  *   Pointer to error structure.
964  *
965  * @return
966  *   0 On success else a negative errno value is returned and rte_errno is set.
967  */
968 static int
969 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
970                                   const struct rte_flow_action *action,
971                                   struct rte_eth_dev *dev,
972                                   struct rte_flow_error *error)
973 {
974         const struct rte_flow_action_count *count = action->conf;
975         struct rte_flow *flow = dev_flow->flow;
976 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
977         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
978         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
979         struct ibv_flow_spec_counter_action counter = {
980                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
981                 .size = size,
982         };
983 #endif
984
985         if (!flow->counter) {
986                 flow->counter = flow_verbs_counter_new(dev, count->shared,
987                                                        count->id);
988                 if (!flow->counter)
989                         return rte_flow_error_set(error, rte_errno,
990                                                   RTE_FLOW_ERROR_TYPE_ACTION,
991                                                   action,
992                                                   "cannot get counter"
993                                                   " context.");
994         }
995 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
996         counter.counter_set_handle = flow->counter->cs->handle;
997         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
998 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
999         counter.counters = flow->counter->cs;
1000         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1001 #endif
1002         return 0;
1003 }
1004
1005 /**
1006  * Internal validation function. For validating both actions and items.
1007  *
1008  * @param[in] dev
1009  *   Pointer to the Ethernet device structure.
1010  * @param[in] attr
1011  *   Pointer to the flow attributes.
1012  * @param[in] items
1013  *   Pointer to the list of items.
1014  * @param[in] actions
1015  *   Pointer to the list of actions.
1016  * @param[out] error
1017  *   Pointer to the error structure.
1018  *
1019  * @return
1020  *   0 on success, a negative errno value otherwise and rte_errno is set.
1021  */
1022 static int
1023 flow_verbs_validate(struct rte_eth_dev *dev,
1024                     const struct rte_flow_attr *attr,
1025                     const struct rte_flow_item items[],
1026                     const struct rte_flow_action actions[],
1027                     struct rte_flow_error *error)
1028 {
1029         int ret;
1030         uint64_t action_flags = 0;
1031         uint64_t item_flags = 0;
1032         uint64_t last_item = 0;
1033         uint8_t next_protocol = 0xff;
1034
1035         if (items == NULL)
1036                 return -1;
1037         ret = mlx5_flow_validate_attributes(dev, attr, error);
1038         if (ret < 0)
1039                 return ret;
1040         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1041                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1042                 int ret = 0;
1043
1044                 switch (items->type) {
1045                 case RTE_FLOW_ITEM_TYPE_VOID:
1046                         break;
1047                 case RTE_FLOW_ITEM_TYPE_ETH:
1048                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1049                                                           error);
1050                         if (ret < 0)
1051                                 return ret;
1052                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1053                                              MLX5_FLOW_LAYER_OUTER_L2;
1054                         break;
1055                 case RTE_FLOW_ITEM_TYPE_VLAN:
1056                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1057                                                            error);
1058                         if (ret < 0)
1059                                 return ret;
1060                         last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1061                                               MLX5_FLOW_LAYER_INNER_VLAN) :
1062                                              (MLX5_FLOW_LAYER_OUTER_L2 |
1063                                               MLX5_FLOW_LAYER_OUTER_VLAN);
1064                         break;
1065                 case RTE_FLOW_ITEM_TYPE_IPV4:
1066                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1067                                                            NULL, error);
1068                         if (ret < 0)
1069                                 return ret;
1070                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1071                                              MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1072                         if (items->mask != NULL &&
1073                             ((const struct rte_flow_item_ipv4 *)
1074                              items->mask)->hdr.next_proto_id) {
1075                                 next_protocol =
1076                                         ((const struct rte_flow_item_ipv4 *)
1077                                          (items->spec))->hdr.next_proto_id;
1078                                 next_protocol &=
1079                                         ((const struct rte_flow_item_ipv4 *)
1080                                          (items->mask))->hdr.next_proto_id;
1081                         } else {
1082                                 /* Reset for inner layer. */
1083                                 next_protocol = 0xff;
1084                         }
1085                         break;
1086                 case RTE_FLOW_ITEM_TYPE_IPV6:
1087                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1088                                                            NULL, error);
1089                         if (ret < 0)
1090                                 return ret;
1091                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1092                                              MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1093                         if (items->mask != NULL &&
1094                             ((const struct rte_flow_item_ipv6 *)
1095                              items->mask)->hdr.proto) {
1096                                 next_protocol =
1097                                         ((const struct rte_flow_item_ipv6 *)
1098                                          items->spec)->hdr.proto;
1099                                 next_protocol &=
1100                                         ((const struct rte_flow_item_ipv6 *)
1101                                          items->mask)->hdr.proto;
1102                         } else {
1103                                 /* Reset for inner layer. */
1104                                 next_protocol = 0xff;
1105                         }
1106                         break;
1107                 case RTE_FLOW_ITEM_TYPE_UDP:
1108                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1109                                                           next_protocol,
1110                                                           error);
1111                         if (ret < 0)
1112                                 return ret;
1113                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1114                                              MLX5_FLOW_LAYER_OUTER_L4_UDP;
1115                         break;
1116                 case RTE_FLOW_ITEM_TYPE_TCP:
1117                         ret = mlx5_flow_validate_item_tcp
1118                                                 (items, item_flags,
1119                                                  next_protocol,
1120                                                  &rte_flow_item_tcp_mask,
1121                                                  error);
1122                         if (ret < 0)
1123                                 return ret;
1124                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1125                                              MLX5_FLOW_LAYER_OUTER_L4_TCP;
1126                         break;
1127                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1128                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1129                                                             error);
1130                         if (ret < 0)
1131                                 return ret;
1132                         last_item = MLX5_FLOW_LAYER_VXLAN;
1133                         break;
1134                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1135                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
1136                                                                 item_flags,
1137                                                                 dev, error);
1138                         if (ret < 0)
1139                                 return ret;
1140                         last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1141                         break;
1142                 case RTE_FLOW_ITEM_TYPE_GRE:
1143                         ret = mlx5_flow_validate_item_gre(items, item_flags,
1144                                                           next_protocol, error);
1145                         if (ret < 0)
1146                                 return ret;
1147                         last_item = MLX5_FLOW_LAYER_GRE;
1148                         break;
1149                 case RTE_FLOW_ITEM_TYPE_MPLS:
1150                         ret = mlx5_flow_validate_item_mpls(dev, items,
1151                                                            item_flags,
1152                                                            last_item, error);
1153                         if (ret < 0)
1154                                 return ret;
1155                         last_item = MLX5_FLOW_LAYER_MPLS;
1156                         break;
1157                 default:
1158                         return rte_flow_error_set(error, ENOTSUP,
1159                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1160                                                   NULL, "item not supported");
1161                 }
1162                 item_flags |= last_item;
1163         }
1164         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1165                 switch (actions->type) {
1166                 case RTE_FLOW_ACTION_TYPE_VOID:
1167                         break;
1168                 case RTE_FLOW_ACTION_TYPE_FLAG:
1169                         ret = mlx5_flow_validate_action_flag(action_flags,
1170                                                              attr,
1171                                                              error);
1172                         if (ret < 0)
1173                                 return ret;
1174                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1175                         break;
1176                 case RTE_FLOW_ACTION_TYPE_MARK:
1177                         ret = mlx5_flow_validate_action_mark(actions,
1178                                                              action_flags,
1179                                                              attr,
1180                                                              error);
1181                         if (ret < 0)
1182                                 return ret;
1183                         action_flags |= MLX5_FLOW_ACTION_MARK;
1184                         break;
1185                 case RTE_FLOW_ACTION_TYPE_DROP:
1186                         ret = mlx5_flow_validate_action_drop(action_flags,
1187                                                              attr,
1188                                                              error);
1189                         if (ret < 0)
1190                                 return ret;
1191                         action_flags |= MLX5_FLOW_ACTION_DROP;
1192                         break;
1193                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1194                         ret = mlx5_flow_validate_action_queue(actions,
1195                                                               action_flags, dev,
1196                                                               attr,
1197                                                               error);
1198                         if (ret < 0)
1199                                 return ret;
1200                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1201                         break;
1202                 case RTE_FLOW_ACTION_TYPE_RSS:
1203                         ret = mlx5_flow_validate_action_rss(actions,
1204                                                             action_flags, dev,
1205                                                             attr, item_flags,
1206                                                             error);
1207                         if (ret < 0)
1208                                 return ret;
1209                         action_flags |= MLX5_FLOW_ACTION_RSS;
1210                         break;
1211                 case RTE_FLOW_ACTION_TYPE_COUNT:
1212                         ret = mlx5_flow_validate_action_count(dev, attr, error);
1213                         if (ret < 0)
1214                                 return ret;
1215                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1216                         break;
1217                 default:
1218                         return rte_flow_error_set(error, ENOTSUP,
1219                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1220                                                   actions,
1221                                                   "action not supported");
1222                 }
1223         }
1224         if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1225                 return rte_flow_error_set(error, EINVAL,
1226                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1227                                           "no fate action is found");
1228         return 0;
1229 }
1230
1231 /**
1232  * Calculate the required bytes that are needed for the action part of the verbs
1233  * flow.
1234  *
1235  * @param[in] actions
1236  *   Pointer to the list of actions.
1237  *
1238  * @return
1239  *   The size of the memory needed for all actions.
1240  */
1241 static int
1242 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1243 {
1244         int size = 0;
1245
1246         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1247                 switch (actions->type) {
1248                 case RTE_FLOW_ACTION_TYPE_VOID:
1249                         break;
1250                 case RTE_FLOW_ACTION_TYPE_FLAG:
1251                         size += sizeof(struct ibv_flow_spec_action_tag);
1252                         break;
1253                 case RTE_FLOW_ACTION_TYPE_MARK:
1254                         size += sizeof(struct ibv_flow_spec_action_tag);
1255                         break;
1256                 case RTE_FLOW_ACTION_TYPE_DROP:
1257                         size += sizeof(struct ibv_flow_spec_action_drop);
1258                         break;
1259                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1260                         break;
1261                 case RTE_FLOW_ACTION_TYPE_RSS:
1262                         break;
1263                 case RTE_FLOW_ACTION_TYPE_COUNT:
1264 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1265         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1266                         size += sizeof(struct ibv_flow_spec_counter_action);
1267 #endif
1268                         break;
1269                 default:
1270                         break;
1271                 }
1272         }
1273         return size;
1274 }
1275
1276 /**
1277  * Calculate the required bytes that are needed for the item part of the verbs
1278  * flow.
1279  *
1280  * @param[in] items
1281  *   Pointer to the list of items.
1282  *
1283  * @return
1284  *   The size of the memory needed for all items.
1285  */
1286 static int
1287 flow_verbs_get_items_size(const struct rte_flow_item items[])
1288 {
1289         int size = 0;
1290
1291         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1292                 switch (items->type) {
1293                 case RTE_FLOW_ITEM_TYPE_VOID:
1294                         break;
1295                 case RTE_FLOW_ITEM_TYPE_ETH:
1296                         size += sizeof(struct ibv_flow_spec_eth);
1297                         break;
1298                 case RTE_FLOW_ITEM_TYPE_VLAN:
1299                         size += sizeof(struct ibv_flow_spec_eth);
1300                         break;
1301                 case RTE_FLOW_ITEM_TYPE_IPV4:
1302                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
1303                         break;
1304                 case RTE_FLOW_ITEM_TYPE_IPV6:
1305                         size += sizeof(struct ibv_flow_spec_ipv6);
1306                         break;
1307                 case RTE_FLOW_ITEM_TYPE_UDP:
1308                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1309                         break;
1310                 case RTE_FLOW_ITEM_TYPE_TCP:
1311                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1312                         break;
1313                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1314                         size += sizeof(struct ibv_flow_spec_tunnel);
1315                         break;
1316                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1317                         size += sizeof(struct ibv_flow_spec_tunnel);
1318                         break;
1319 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1320                 case RTE_FLOW_ITEM_TYPE_GRE:
1321                         size += sizeof(struct ibv_flow_spec_gre);
1322                         break;
1323                 case RTE_FLOW_ITEM_TYPE_MPLS:
1324                         size += sizeof(struct ibv_flow_spec_mpls);
1325                         break;
1326 #else
1327                 case RTE_FLOW_ITEM_TYPE_GRE:
1328                         size += sizeof(struct ibv_flow_spec_tunnel);
1329                         break;
1330 #endif
1331                 default:
1332                         break;
1333                 }
1334         }
1335         return size;
1336 }
1337
1338 /**
1339  * Internal preparation function. Allocate mlx5_flow with the required size.
1340  * The required size is calculate based on the actions and items. This function
1341  * also returns the detected actions and items for later use.
1342  *
1343  * @param[in] attr
1344  *   Pointer to the flow attributes.
1345  * @param[in] items
1346  *   Pointer to the list of items.
1347  * @param[in] actions
1348  *   Pointer to the list of actions.
1349  * @param[out] error
1350  *   Pointer to the error structure.
1351  *
1352  * @return
1353  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1354  *   is set.
1355  */
1356 static struct mlx5_flow *
1357 flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
1358                    const struct rte_flow_item items[],
1359                    const struct rte_flow_action actions[],
1360                    struct rte_flow_error *error)
1361 {
1362         uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
1363         struct mlx5_flow *flow;
1364
1365         size += flow_verbs_get_actions_size(actions);
1366         size += flow_verbs_get_items_size(items);
1367         flow = rte_calloc(__func__, 1, size, 0);
1368         if (!flow) {
1369                 rte_flow_error_set(error, ENOMEM,
1370                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1371                                    "not enough memory to create flow");
1372                 return NULL;
1373         }
1374         flow->verbs.attr = (void *)(flow + 1);
1375         flow->verbs.specs =
1376                 (uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
1377         return flow;
1378 }
1379
1380 /**
1381  * Fill the flow with verb spec.
1382  *
1383  * @param[in] dev
1384  *   Pointer to Ethernet device.
1385  * @param[in, out] dev_flow
1386  *   Pointer to the mlx5 flow.
1387  * @param[in] attr
1388  *   Pointer to the flow attributes.
1389  * @param[in] items
1390  *   Pointer to the list of items.
1391  * @param[in] actions
1392  *   Pointer to the list of actions.
1393  * @param[out] error
1394  *   Pointer to the error structure.
1395  *
1396  * @return
1397  *   0 on success, else a negative errno value otherwise and rte_errno is set.
1398  */
1399 static int
1400 flow_verbs_translate(struct rte_eth_dev *dev,
1401                      struct mlx5_flow *dev_flow,
1402                      const struct rte_flow_attr *attr,
1403                      const struct rte_flow_item items[],
1404                      const struct rte_flow_action actions[],
1405                      struct rte_flow_error *error)
1406 {
1407         struct rte_flow *flow = dev_flow->flow;
1408         uint64_t item_flags = 0;
1409         uint64_t action_flags = 0;
1410         uint64_t priority = attr->priority;
1411         uint32_t subpriority = 0;
1412         struct mlx5_priv *priv = dev->data->dev_private;
1413
1414         if (priority == MLX5_FLOW_PRIO_RSVD)
1415                 priority = priv->config.flow_prio - 1;
1416         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1417                 int ret;
1418
1419                 switch (actions->type) {
1420                 case RTE_FLOW_ACTION_TYPE_VOID:
1421                         break;
1422                 case RTE_FLOW_ACTION_TYPE_FLAG:
1423                         flow_verbs_translate_action_flag(dev_flow, actions);
1424                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1425                         break;
1426                 case RTE_FLOW_ACTION_TYPE_MARK:
1427                         flow_verbs_translate_action_mark(dev_flow, actions);
1428                         action_flags |= MLX5_FLOW_ACTION_MARK;
1429                         break;
1430                 case RTE_FLOW_ACTION_TYPE_DROP:
1431                         flow_verbs_translate_action_drop(dev_flow, actions);
1432                         action_flags |= MLX5_FLOW_ACTION_DROP;
1433                         break;
1434                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1435                         flow_verbs_translate_action_queue(dev_flow, actions);
1436                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1437                         break;
1438                 case RTE_FLOW_ACTION_TYPE_RSS:
1439                         flow_verbs_translate_action_rss(dev_flow, actions);
1440                         action_flags |= MLX5_FLOW_ACTION_RSS;
1441                         break;
1442                 case RTE_FLOW_ACTION_TYPE_COUNT:
1443                         ret = flow_verbs_translate_action_count(dev_flow,
1444                                                                 actions,
1445                                                                 dev, error);
1446                         if (ret < 0)
1447                                 return ret;
1448                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1449                         break;
1450                 default:
1451                         return rte_flow_error_set(error, ENOTSUP,
1452                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1453                                                   actions,
1454                                                   "action not supported");
1455                 }
1456         }
1457         flow->actions = action_flags;
1458         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1459                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1460
1461                 switch (items->type) {
1462                 case RTE_FLOW_ITEM_TYPE_VOID:
1463                         break;
1464                 case RTE_FLOW_ITEM_TYPE_ETH:
1465                         flow_verbs_translate_item_eth(dev_flow, items,
1466                                                       item_flags);
1467                         subpriority = MLX5_PRIORITY_MAP_L2;
1468                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1469                                                MLX5_FLOW_LAYER_OUTER_L2;
1470                         break;
1471                 case RTE_FLOW_ITEM_TYPE_VLAN:
1472                         flow_verbs_translate_item_vlan(dev_flow, items,
1473                                                        item_flags);
1474                         subpriority = MLX5_PRIORITY_MAP_L2;
1475                         item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1476                                                 MLX5_FLOW_LAYER_INNER_VLAN) :
1477                                                (MLX5_FLOW_LAYER_OUTER_L2 |
1478                                                 MLX5_FLOW_LAYER_OUTER_VLAN);
1479                         break;
1480                 case RTE_FLOW_ITEM_TYPE_IPV4:
1481                         flow_verbs_translate_item_ipv4(dev_flow, items,
1482                                                        item_flags);
1483                         subpriority = MLX5_PRIORITY_MAP_L3;
1484                         dev_flow->verbs.hash_fields |=
1485                                 mlx5_flow_hashfields_adjust
1486                                         (dev_flow, tunnel,
1487                                          MLX5_IPV4_LAYER_TYPES,
1488                                          MLX5_IPV4_IBV_RX_HASH);
1489                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1490                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1491                         break;
1492                 case RTE_FLOW_ITEM_TYPE_IPV6:
1493                         flow_verbs_translate_item_ipv6(dev_flow, items,
1494                                                        item_flags);
1495                         subpriority = MLX5_PRIORITY_MAP_L3;
1496                         dev_flow->verbs.hash_fields |=
1497                                 mlx5_flow_hashfields_adjust
1498                                         (dev_flow, tunnel,
1499                                          MLX5_IPV6_LAYER_TYPES,
1500                                          MLX5_IPV6_IBV_RX_HASH);
1501                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1502                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1503                         break;
1504                 case RTE_FLOW_ITEM_TYPE_TCP:
1505                         flow_verbs_translate_item_tcp(dev_flow, items,
1506                                                       item_flags);
1507                         subpriority = MLX5_PRIORITY_MAP_L4;
1508                         dev_flow->verbs.hash_fields |=
1509                                 mlx5_flow_hashfields_adjust
1510                                         (dev_flow, tunnel, ETH_RSS_TCP,
1511                                          (IBV_RX_HASH_SRC_PORT_TCP |
1512                                           IBV_RX_HASH_DST_PORT_TCP));
1513                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1514                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1515                         break;
1516                 case RTE_FLOW_ITEM_TYPE_UDP:
1517                         flow_verbs_translate_item_udp(dev_flow, items,
1518                                                       item_flags);
1519                         subpriority = MLX5_PRIORITY_MAP_L4;
1520                         dev_flow->verbs.hash_fields |=
1521                                 mlx5_flow_hashfields_adjust
1522                                         (dev_flow, tunnel, ETH_RSS_UDP,
1523                                          (IBV_RX_HASH_SRC_PORT_UDP |
1524                                           IBV_RX_HASH_DST_PORT_UDP));
1525                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1526                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1527                         break;
1528                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1529                         flow_verbs_translate_item_vxlan(dev_flow, items,
1530                                                         item_flags);
1531                         subpriority = MLX5_PRIORITY_MAP_L2;
1532                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1533                         break;
1534                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1535                         flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1536                                                             item_flags);
1537                         subpriority = MLX5_PRIORITY_MAP_L2;
1538                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1539                         break;
1540                 case RTE_FLOW_ITEM_TYPE_GRE:
1541                         flow_verbs_translate_item_gre(dev_flow, items,
1542                                                       item_flags);
1543                         subpriority = MLX5_PRIORITY_MAP_L2;
1544                         item_flags |= MLX5_FLOW_LAYER_GRE;
1545                         break;
1546                 case RTE_FLOW_ITEM_TYPE_MPLS:
1547                         flow_verbs_translate_item_mpls(dev_flow, items,
1548                                                        item_flags);
1549                         subpriority = MLX5_PRIORITY_MAP_L2;
1550                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1551                         break;
1552                 default:
1553                         return rte_flow_error_set(error, ENOTSUP,
1554                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1555                                                   NULL,
1556                                                   "item not supported");
1557                 }
1558         }
1559         dev_flow->layers = item_flags;
1560         dev_flow->verbs.attr->priority =
1561                 mlx5_flow_adjust_priority(dev, priority, subpriority);
1562         dev_flow->verbs.attr->port = (uint8_t)priv->ibv_port;
1563         return 0;
1564 }
1565
1566 /**
1567  * Remove the flow from the NIC but keeps it in memory.
1568  *
1569  * @param[in] dev
1570  *   Pointer to the Ethernet device structure.
1571  * @param[in, out] flow
1572  *   Pointer to flow structure.
1573  */
1574 static void
1575 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1576 {
1577         struct mlx5_flow_verbs *verbs;
1578         struct mlx5_flow *dev_flow;
1579
1580         if (!flow)
1581                 return;
1582         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1583                 verbs = &dev_flow->verbs;
1584                 if (verbs->flow) {
1585                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1586                         verbs->flow = NULL;
1587                 }
1588                 if (verbs->hrxq) {
1589                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1590                                 mlx5_hrxq_drop_release(dev);
1591                         else
1592                                 mlx5_hrxq_release(dev, verbs->hrxq);
1593                         verbs->hrxq = NULL;
1594                 }
1595         }
1596 }
1597
1598 /**
1599  * Remove the flow from the NIC and the memory.
1600  *
1601  * @param[in] dev
1602  *   Pointer to the Ethernet device structure.
1603  * @param[in, out] flow
1604  *   Pointer to flow structure.
1605  */
1606 static void
1607 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1608 {
1609         struct mlx5_flow *dev_flow;
1610
1611         if (!flow)
1612                 return;
1613         flow_verbs_remove(dev, flow);
1614         while (!LIST_EMPTY(&flow->dev_flows)) {
1615                 dev_flow = LIST_FIRST(&flow->dev_flows);
1616                 LIST_REMOVE(dev_flow, next);
1617                 rte_free(dev_flow);
1618         }
1619         if (flow->counter) {
1620                 flow_verbs_counter_release(dev, flow->counter);
1621                 flow->counter = NULL;
1622         }
1623 }
1624
1625 /**
1626  * Apply the flow to the NIC.
1627  *
1628  * @param[in] dev
1629  *   Pointer to the Ethernet device structure.
1630  * @param[in, out] flow
1631  *   Pointer to flow structure.
1632  * @param[out] error
1633  *   Pointer to error structure.
1634  *
1635  * @return
1636  *   0 on success, a negative errno value otherwise and rte_errno is set.
1637  */
1638 static int
1639 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1640                  struct rte_flow_error *error)
1641 {
1642         struct mlx5_flow_verbs *verbs;
1643         struct mlx5_flow *dev_flow;
1644         int err;
1645
1646         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1647                 verbs = &dev_flow->verbs;
1648                 if (flow->actions & MLX5_FLOW_ACTION_DROP) {
1649                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
1650                         if (!verbs->hrxq) {
1651                                 rte_flow_error_set
1652                                         (error, errno,
1653                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1654                                          "cannot get drop hash queue");
1655                                 goto error;
1656                         }
1657                 } else {
1658                         struct mlx5_hrxq *hrxq;
1659
1660                         hrxq = mlx5_hrxq_get(dev, flow->key,
1661                                              MLX5_RSS_HASH_KEY_LEN,
1662                                              verbs->hash_fields,
1663                                              (*flow->queue),
1664                                              flow->rss.queue_num);
1665                         if (!hrxq)
1666                                 hrxq = mlx5_hrxq_new(dev, flow->key,
1667                                                      MLX5_RSS_HASH_KEY_LEN,
1668                                                      verbs->hash_fields,
1669                                                      (*flow->queue),
1670                                                      flow->rss.queue_num,
1671                                                      !!(dev_flow->layers &
1672                                                       MLX5_FLOW_LAYER_TUNNEL));
1673                         if (!hrxq) {
1674                                 rte_flow_error_set
1675                                         (error, rte_errno,
1676                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1677                                          "cannot get hash queue");
1678                                 goto error;
1679                         }
1680                         verbs->hrxq = hrxq;
1681                 }
1682                 verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
1683                                                      verbs->attr);
1684                 if (!verbs->flow) {
1685                         rte_flow_error_set(error, errno,
1686                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1687                                            NULL,
1688                                            "hardware refuses to create flow");
1689                         goto error;
1690                 }
1691         }
1692         return 0;
1693 error:
1694         err = rte_errno; /* Save rte_errno before cleanup. */
1695         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1696                 verbs = &dev_flow->verbs;
1697                 if (verbs->hrxq) {
1698                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1699                                 mlx5_hrxq_drop_release(dev);
1700                         else
1701                                 mlx5_hrxq_release(dev, verbs->hrxq);
1702                         verbs->hrxq = NULL;
1703                 }
1704         }
1705         rte_errno = err; /* Restore rte_errno. */
1706         return -rte_errno;
1707 }
1708
1709 /**
1710  * Query a flow.
1711  *
1712  * @see rte_flow_query()
1713  * @see rte_flow_ops
1714  */
1715 static int
1716 flow_verbs_query(struct rte_eth_dev *dev,
1717                  struct rte_flow *flow,
1718                  const struct rte_flow_action *actions,
1719                  void *data,
1720                  struct rte_flow_error *error)
1721 {
1722         int ret = -EINVAL;
1723
1724         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1725                 switch (actions->type) {
1726                 case RTE_FLOW_ACTION_TYPE_VOID:
1727                         break;
1728                 case RTE_FLOW_ACTION_TYPE_COUNT:
1729                         ret = flow_verbs_counter_query(dev, flow, data, error);
1730                         break;
1731                 default:
1732                         return rte_flow_error_set(error, ENOTSUP,
1733                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1734                                                   actions,
1735                                                   "action not supported");
1736                 }
1737         }
1738         return ret;
1739 }
1740
1741 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1742         .validate = flow_verbs_validate,
1743         .prepare = flow_verbs_prepare,
1744         .translate = flow_verbs_translate,
1745         .apply = flow_verbs_apply,
1746         .remove = flow_verbs_remove,
1747         .destroy = flow_verbs_destroy,
1748         .query = flow_verbs_query,
1749 };