net/mlx5: add abstraction for multiple flow drivers
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_eth_ctrl.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_prm.h"
33 #include "mlx5_glue.h"
34 #include "mlx5_flow.h"
35
36 /**
37  * Get a flow counter.
38  *
39  * @param[in] dev
40  *   Pointer to the Ethernet device structure.
41  * @param[in] shared
42  *   Indicate if this counter is shared with other flows.
43  * @param[in] id
44  *   Counter identifier.
45  *
46  * @return
47  *   A pointer to the counter, NULL otherwise and rte_errno is set.
48  */
49 static struct mlx5_flow_counter *
50 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
51 {
52         struct priv *priv = dev->data->dev_private;
53         struct mlx5_flow_counter *cnt;
54
55         LIST_FOREACH(cnt, &priv->flow_counters, next) {
56                 if (!cnt->shared || cnt->shared != shared)
57                         continue;
58                 if (cnt->id != id)
59                         continue;
60                 cnt->ref_cnt++;
61                 return cnt;
62         }
63 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
64
65         struct mlx5_flow_counter tmpl = {
66                 .shared = shared,
67                 .id = id,
68                 .cs = mlx5_glue->create_counter_set
69                         (priv->ctx,
70                          &(struct ibv_counter_set_init_attr){
71                                  .counter_set_id = id,
72                          }),
73                 .hits = 0,
74                 .bytes = 0,
75         };
76
77         if (!tmpl.cs) {
78                 rte_errno = errno;
79                 return NULL;
80         }
81         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
82         if (!cnt) {
83                 rte_errno = ENOMEM;
84                 return NULL;
85         }
86         *cnt = tmpl;
87         LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
88         return cnt;
89 #endif
90         rte_errno = ENOTSUP;
91         return NULL;
92 }
93
94 /**
95  * Release a flow counter.
96  *
97  * @param[in] counter
98  *   Pointer to the counter handler.
99  */
100 static void
101 flow_verbs_counter_release(struct mlx5_flow_counter *counter)
102 {
103         if (--counter->ref_cnt == 0) {
104                 claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
105                 LIST_REMOVE(counter, next);
106                 rte_free(counter);
107         }
108 }
109
110 /**
111  * Add a verbs item specification into @p flow.
112  *
113  * @param[in, out] flow
114  *   Pointer to flow structure.
115  * @param[in] src
116  *   Create specification.
117  * @param[in] size
118  *   Size in bytes of the specification to copy.
119  */
120 static void
121 flow_verbs_spec_add(struct mlx5_flow *flow, void *src, unsigned int size)
122 {
123         struct mlx5_flow_verbs *verbs = &flow->verbs;
124
125         if (verbs->specs) {
126                 void *dst;
127
128                 dst = (void *)(verbs->specs + verbs->size);
129                 memcpy(dst, src, size);
130                 ++verbs->attr->num_of_specs;
131         }
132         verbs->size += size;
133 }
134
135 /**
136  * Convert the @p item into a Verbs specification. This function assumes that
137  * the input is valid and that there is space to insert the requested item
138  * into the flow.
139  *
140  * @param[in] item
141  *   Item specification.
142  * @param[in] item_flags
143  *   Bit field with all detected items.
144  * @param[in, out] dev_flow
145  *   Pointer to dev_flow structure.
146  */
147 static void
148 flow_verbs_translate_item_eth(const struct rte_flow_item *item,
149                               uint64_t *item_flags,
150                               struct mlx5_flow *dev_flow)
151 {
152         const struct rte_flow_item_eth *spec = item->spec;
153         const struct rte_flow_item_eth *mask = item->mask;
154         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
155         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
156         struct ibv_flow_spec_eth eth = {
157                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
158                 .size = size,
159         };
160
161         if (!mask)
162                 mask = &rte_flow_item_eth_mask;
163         if (spec) {
164                 unsigned int i;
165
166                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
167                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
168                 eth.val.ether_type = spec->type;
169                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
170                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
171                 eth.mask.ether_type = mask->type;
172                 /* Remove unwanted bits from values. */
173                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
174                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
175                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
176                 }
177                 eth.val.ether_type &= eth.mask.ether_type;
178                 dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
179         }
180         flow_verbs_spec_add(dev_flow, &eth, size);
181         *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
182                                 MLX5_FLOW_LAYER_OUTER_L2;
183 }
184
185 /**
186  * Update the VLAN tag in the Verbs Ethernet specification.
187  * This function assumes that the input is valid and there is space to add
188  * the requested item.
189  *
190  * @param[in, out] attr
191  *   Pointer to Verbs attributes structure.
192  * @param[in] eth
193  *   Verbs structure containing the VLAN information to copy.
194  */
195 static void
196 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
197                             struct ibv_flow_spec_eth *eth)
198 {
199         unsigned int i;
200         const enum ibv_flow_spec_type search = eth->type;
201         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
202                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
203
204         for (i = 0; i != attr->num_of_specs; ++i) {
205                 if (hdr->type == search) {
206                         struct ibv_flow_spec_eth *e =
207                                 (struct ibv_flow_spec_eth *)hdr;
208
209                         e->val.vlan_tag = eth->val.vlan_tag;
210                         e->mask.vlan_tag = eth->mask.vlan_tag;
211                         e->val.ether_type = eth->val.ether_type;
212                         e->mask.ether_type = eth->mask.ether_type;
213                         break;
214                 }
215                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
216         }
217 }
218
219 /**
220  * Convert the @p item into a Verbs specification. This function assumes that
221  * the input is valid and that there is space to insert the requested item
222  * into the flow.
223  *
224  * @param[in] item
225  *   Item specification.
226  * @param[in, out] item_flags
227  *   Bit mask that holds all detected items.
228  * @param[in, out] dev_flow
229  *   Pointer to dev_flow structure.
230  */
231 static void
232 flow_verbs_translate_item_vlan(const struct rte_flow_item *item,
233                                uint64_t *item_flags,
234                                struct mlx5_flow *dev_flow)
235 {
236         const struct rte_flow_item_vlan *spec = item->spec;
237         const struct rte_flow_item_vlan *mask = item->mask;
238         unsigned int size = sizeof(struct ibv_flow_spec_eth);
239         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
240         struct ibv_flow_spec_eth eth = {
241                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
242                 .size = size,
243         };
244         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
245                                       MLX5_FLOW_LAYER_OUTER_L2;
246
247         if (!mask)
248                 mask = &rte_flow_item_vlan_mask;
249         if (spec) {
250                 eth.val.vlan_tag = spec->tci;
251                 eth.mask.vlan_tag = mask->tci;
252                 eth.val.vlan_tag &= eth.mask.vlan_tag;
253                 eth.val.ether_type = spec->inner_type;
254                 eth.mask.ether_type = mask->inner_type;
255                 eth.val.ether_type &= eth.mask.ether_type;
256         }
257         if (!(*item_flags & l2m)) {
258                 dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
259                 flow_verbs_spec_add(dev_flow, &eth, size);
260         } else {
261                 flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
262                 size = 0; /* Only an update is done in eth specification. */
263         }
264         *item_flags |= tunnel ?
265                        (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
266                        (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
267 }
268
269 /**
270  * Convert the @p item into a Verbs specification. This function assumes that
271  * the input is valid and that there is space to insert the requested item
272  * into the flow.
273  *
274  * @param[in] item
275  *   Item specification.
276  * @param[in, out] item_flags
277  *   Bit mask that marks all detected items.
278  * @param[in, out] dev_flow
279  *   Pointer to sepacific flow structure.
280  */
281 static void
282 flow_verbs_translate_item_ipv4(const struct rte_flow_item *item,
283                                uint64_t *item_flags,
284                                struct mlx5_flow *dev_flow)
285 {
286         const struct rte_flow_item_ipv4 *spec = item->spec;
287         const struct rte_flow_item_ipv4 *mask = item->mask;
288         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
289         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
290         struct ibv_flow_spec_ipv4_ext ipv4 = {
291                 .type = IBV_FLOW_SPEC_IPV4_EXT |
292                         (tunnel ? IBV_FLOW_SPEC_INNER : 0),
293                 .size = size,
294         };
295
296         if (!mask)
297                 mask = &rte_flow_item_ipv4_mask;
298         *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
299                                 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
300         if (spec) {
301                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
302                         .src_ip = spec->hdr.src_addr,
303                         .dst_ip = spec->hdr.dst_addr,
304                         .proto = spec->hdr.next_proto_id,
305                         .tos = spec->hdr.type_of_service,
306                 };
307                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
308                         .src_ip = mask->hdr.src_addr,
309                         .dst_ip = mask->hdr.dst_addr,
310                         .proto = mask->hdr.next_proto_id,
311                         .tos = mask->hdr.type_of_service,
312                 };
313                 /* Remove unwanted bits from values. */
314                 ipv4.val.src_ip &= ipv4.mask.src_ip;
315                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
316                 ipv4.val.proto &= ipv4.mask.proto;
317                 ipv4.val.tos &= ipv4.mask.tos;
318         }
319         dev_flow->verbs.hash_fields |=
320                 mlx5_flow_hashfields_adjust(dev_flow, tunnel,
321                                             MLX5_IPV4_LAYER_TYPES,
322                                             MLX5_IPV4_IBV_RX_HASH);
323         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
324         flow_verbs_spec_add(dev_flow, &ipv4, size);
325 }
326
327 /**
328  * Convert the @p item into a Verbs specification. This function assumes that
329  * the input is valid and that there is space to insert the requested item
330  * into the flow.
331  *
332  * @param[in] item
333  *   Item specification.
334  * @param[in, out] item_flags
335  *   Bit mask that marks all detected items.
336  * @param[in, out] dev_flow
337  *   Pointer to sepacific flow structure.
338  */
339 static void
340 flow_verbs_translate_item_ipv6(const struct rte_flow_item *item,
341                                uint64_t *item_flags,
342                                struct mlx5_flow *dev_flow)
343 {
344         const struct rte_flow_item_ipv6 *spec = item->spec;
345         const struct rte_flow_item_ipv6 *mask = item->mask;
346         const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
347         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
348         struct ibv_flow_spec_ipv6 ipv6 = {
349                 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
350                 .size = size,
351         };
352
353         if (!mask)
354                 mask = &rte_flow_item_ipv6_mask;
355          *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
356                                  MLX5_FLOW_LAYER_OUTER_L3_IPV6;
357         if (spec) {
358                 unsigned int i;
359                 uint32_t vtc_flow_val;
360                 uint32_t vtc_flow_mask;
361
362                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
363                        RTE_DIM(ipv6.val.src_ip));
364                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
365                        RTE_DIM(ipv6.val.dst_ip));
366                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
367                        RTE_DIM(ipv6.mask.src_ip));
368                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
369                        RTE_DIM(ipv6.mask.dst_ip));
370                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
371                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
372                 ipv6.val.flow_label =
373                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
374                                          IPV6_HDR_FL_SHIFT);
375                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
376                                          IPV6_HDR_TC_SHIFT;
377                 ipv6.val.next_hdr = spec->hdr.proto;
378                 ipv6.val.hop_limit = spec->hdr.hop_limits;
379                 ipv6.mask.flow_label =
380                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
381                                          IPV6_HDR_FL_SHIFT);
382                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
383                                           IPV6_HDR_TC_SHIFT;
384                 ipv6.mask.next_hdr = mask->hdr.proto;
385                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
386                 /* Remove unwanted bits from values. */
387                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
388                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
389                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
390                 }
391                 ipv6.val.flow_label &= ipv6.mask.flow_label;
392                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
393                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
394                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
395         }
396         dev_flow->verbs.hash_fields |=
397                 mlx5_flow_hashfields_adjust(dev_flow, tunnel,
398                                             MLX5_IPV6_LAYER_TYPES,
399                                             MLX5_IPV6_IBV_RX_HASH);
400         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
401         flow_verbs_spec_add(dev_flow, &ipv6, size);
402 }
403
404 /**
405  * Convert the @p item into a Verbs specification. This function assumes that
406  * the input is valid and that there is space to insert the requested item
407  * into the flow.
408  *
409  * @param[in] item
410  *   Item specification.
411  * @param[in, out] item_flags
412  *   Bit mask that marks all detected items.
413  * @param[in, out] dev_flow
414  *   Pointer to sepacific flow structure.
415  */
416 static void
417 flow_verbs_translate_item_udp(const struct rte_flow_item *item,
418                               uint64_t *item_flags,
419                               struct mlx5_flow *dev_flow)
420 {
421         const struct rte_flow_item_udp *spec = item->spec;
422         const struct rte_flow_item_udp *mask = item->mask;
423         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
424         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
425         struct ibv_flow_spec_tcp_udp udp = {
426                 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
427                 .size = size,
428         };
429
430         if (!mask)
431                 mask = &rte_flow_item_udp_mask;
432         *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
433                                 MLX5_FLOW_LAYER_OUTER_L4_UDP;
434         if (spec) {
435                 udp.val.dst_port = spec->hdr.dst_port;
436                 udp.val.src_port = spec->hdr.src_port;
437                 udp.mask.dst_port = mask->hdr.dst_port;
438                 udp.mask.src_port = mask->hdr.src_port;
439                 /* Remove unwanted bits from values. */
440                 udp.val.src_port &= udp.mask.src_port;
441                 udp.val.dst_port &= udp.mask.dst_port;
442         }
443         dev_flow->verbs.hash_fields |=
444                 mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_UDP,
445                                             (IBV_RX_HASH_SRC_PORT_UDP |
446                                              IBV_RX_HASH_DST_PORT_UDP));
447         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
448         flow_verbs_spec_add(dev_flow, &udp, size);
449 }
450
451 /**
452  * Convert the @p item into a Verbs specification. This function assumes that
453  * the input is valid and that there is space to insert the requested item
454  * into the flow.
455  *
456  * @param[in] item
457  *   Item specification.
458  * @param[in, out] item_flags
459  *   Bit mask that marks all detected items.
460  * @param[in, out] dev_flow
461  *   Pointer to sepacific flow structure.
462  */
463 static void
464 flow_verbs_translate_item_tcp(const struct rte_flow_item *item,
465                               uint64_t *item_flags,
466                               struct mlx5_flow *dev_flow)
467 {
468         const struct rte_flow_item_tcp *spec = item->spec;
469         const struct rte_flow_item_tcp *mask = item->mask;
470         const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
471         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
472         struct ibv_flow_spec_tcp_udp tcp = {
473                 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
474                 .size = size,
475         };
476
477         if (!mask)
478                 mask = &rte_flow_item_tcp_mask;
479         *item_flags |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
480                                  MLX5_FLOW_LAYER_OUTER_L4_TCP;
481         if (spec) {
482                 tcp.val.dst_port = spec->hdr.dst_port;
483                 tcp.val.src_port = spec->hdr.src_port;
484                 tcp.mask.dst_port = mask->hdr.dst_port;
485                 tcp.mask.src_port = mask->hdr.src_port;
486                 /* Remove unwanted bits from values. */
487                 tcp.val.src_port &= tcp.mask.src_port;
488                 tcp.val.dst_port &= tcp.mask.dst_port;
489         }
490         dev_flow->verbs.hash_fields |=
491                 mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_TCP,
492                                             (IBV_RX_HASH_SRC_PORT_TCP |
493                                              IBV_RX_HASH_DST_PORT_TCP));
494         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
495         flow_verbs_spec_add(dev_flow, &tcp, size);
496 }
497
498 /**
499  * Convert the @p item into a Verbs specification. This function assumes that
500  * the input is valid and that there is space to insert the requested item
501  * into the flow.
502  *
503  * @param[in] item
504  *   Item specification.
505  * @param[in, out] item_flags
506  *   Bit mask that marks all detected items.
507  * @param[in, out] dev_flow
508  *   Pointer to sepacific flow structure.
509  */
510 static void
511 flow_verbs_translate_item_vxlan(const struct rte_flow_item *item,
512                                 uint64_t *item_flags,
513                                 struct mlx5_flow *dev_flow)
514 {
515         const struct rte_flow_item_vxlan *spec = item->spec;
516         const struct rte_flow_item_vxlan *mask = item->mask;
517         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
518         struct ibv_flow_spec_tunnel vxlan = {
519                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
520                 .size = size,
521         };
522         union vni {
523                 uint32_t vlan_id;
524                 uint8_t vni[4];
525         } id = { .vlan_id = 0, };
526
527         if (!mask)
528                 mask = &rte_flow_item_vxlan_mask;
529         if (spec) {
530                 memcpy(&id.vni[1], spec->vni, 3);
531                 vxlan.val.tunnel_id = id.vlan_id;
532                 memcpy(&id.vni[1], mask->vni, 3);
533                 vxlan.mask.tunnel_id = id.vlan_id;
534                 /* Remove unwanted bits from values. */
535                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
536         }
537         flow_verbs_spec_add(dev_flow, &vxlan, size);
538         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
539         *item_flags |= MLX5_FLOW_LAYER_VXLAN;
540 }
541
542 /**
543  * Convert the @p item into a Verbs specification. This function assumes that
544  * the input is valid and that there is space to insert the requested item
545  * into the flow.
546  *
547  * @param[in] item
548  *   Item specification.
549  * @param[in, out] item_flags
550  *   Bit mask that marks all detected items.
551  * @param[in, out] dev_flow
552  *   Pointer to sepacific flow structure.
553  */
554 static void
555 flow_verbs_translate_item_vxlan_gpe(const struct rte_flow_item *item,
556                                     uint64_t *item_flags,
557                                     struct mlx5_flow *dev_flow)
558 {
559         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
560         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
561         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
562         struct ibv_flow_spec_tunnel vxlan_gpe = {
563                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
564                 .size = size,
565         };
566         union vni {
567                 uint32_t vlan_id;
568                 uint8_t vni[4];
569         } id = { .vlan_id = 0, };
570
571         if (!mask)
572                 mask = &rte_flow_item_vxlan_gpe_mask;
573         if (spec) {
574                 memcpy(&id.vni[1], spec->vni, 3);
575                 vxlan_gpe.val.tunnel_id = id.vlan_id;
576                 memcpy(&id.vni[1], mask->vni, 3);
577                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
578                 /* Remove unwanted bits from values. */
579                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
580         }
581         flow_verbs_spec_add(dev_flow, &vxlan_gpe, size);
582         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
583         *item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
584 }
585
586 /**
587  * Update the protocol in Verbs IPv4/IPv6 spec.
588  *
589  * @param[in, out] attr
590  *   Pointer to Verbs attributes structure.
591  * @param[in] search
592  *   Specification type to search in order to update the IP protocol.
593  * @param[in] protocol
594  *   Protocol value to set if none is present in the specification.
595  */
596 static void
597 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
598                                        enum ibv_flow_spec_type search,
599                                        uint8_t protocol)
600 {
601         unsigned int i;
602         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
603                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
604
605         if (!attr)
606                 return;
607         for (i = 0; i != attr->num_of_specs; ++i) {
608                 if (hdr->type == search) {
609                         union {
610                                 struct ibv_flow_spec_ipv4_ext *ipv4;
611                                 struct ibv_flow_spec_ipv6 *ipv6;
612                         } ip;
613
614                         switch (search) {
615                         case IBV_FLOW_SPEC_IPV4_EXT:
616                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
617                                 if (!ip.ipv4->val.proto) {
618                                         ip.ipv4->val.proto = protocol;
619                                         ip.ipv4->mask.proto = 0xff;
620                                 }
621                                 break;
622                         case IBV_FLOW_SPEC_IPV6:
623                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
624                                 if (!ip.ipv6->val.next_hdr) {
625                                         ip.ipv6->val.next_hdr = protocol;
626                                         ip.ipv6->mask.next_hdr = 0xff;
627                                 }
628                                 break;
629                         default:
630                                 break;
631                         }
632                         break;
633                 }
634                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
635         }
636 }
637
638 /**
639  * Convert the @p item into a Verbs specification. This function assumes that
640  * the input is valid and that there is space to insert the requested item
641  * into the flow.
642  *
643  * @param[in] item
644  *   Item specification.
645  * @param[in, out] item_flags
646  *   Bit mask that marks all detected items.
647  * @param[in, out] dev_flow
648  *   Pointer to sepacific flow structure.
649  */
650 static void
651 flow_verbs_translate_item_gre(const struct rte_flow_item *item __rte_unused,
652                               uint64_t *item_flags,
653                               struct mlx5_flow *dev_flow)
654 {
655         struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
656 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
657         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
658         struct ibv_flow_spec_tunnel tunnel = {
659                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
660                 .size = size,
661         };
662 #else
663         const struct rte_flow_item_gre *spec = item->spec;
664         const struct rte_flow_item_gre *mask = item->mask;
665         unsigned int size = sizeof(struct ibv_flow_spec_gre);
666         struct ibv_flow_spec_gre tunnel = {
667                 .type = IBV_FLOW_SPEC_GRE,
668                 .size = size,
669         };
670
671         if (!mask)
672                 mask = &rte_flow_item_gre_mask;
673         if (spec) {
674                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
675                 tunnel.val.protocol = spec->protocol;
676                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
677                 tunnel.mask.protocol = mask->protocol;
678                 /* Remove unwanted bits from values. */
679                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
680                 tunnel.val.protocol &= tunnel.mask.protocol;
681                 tunnel.val.key &= tunnel.mask.key;
682         }
683 #endif
684         if (*item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
685                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
686                                                        IBV_FLOW_SPEC_IPV4_EXT,
687                                                        IPPROTO_GRE);
688         else
689                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
690                                                        IBV_FLOW_SPEC_IPV6,
691                                                        IPPROTO_GRE);
692         flow_verbs_spec_add(dev_flow, &tunnel, size);
693         verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
694         *item_flags |= MLX5_FLOW_LAYER_GRE;
695 }
696
697 /**
698  * Convert the @p action into a Verbs specification. This function assumes that
699  * the input is valid and that there is space to insert the requested action
700  * into the flow. This function also return the action that was added.
701  *
702  * @param[in] item
703  *   Item specification.
704  * @param[in, out] item_flags
705  *   Bit mask that marks all detected items.
706  * @param[in, out] dev_flow
707  *   Pointer to sepacific flow structure.
708  */
709 static void
710 flow_verbs_translate_item_mpls(const struct rte_flow_item *item __rte_unused,
711                                uint64_t *action_flags __rte_unused,
712                                struct mlx5_flow *dev_flow __rte_unused)
713 {
714 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
715         const struct rte_flow_item_mpls *spec = item->spec;
716         const struct rte_flow_item_mpls *mask = item->mask;
717         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
718         struct ibv_flow_spec_mpls mpls = {
719                 .type = IBV_FLOW_SPEC_MPLS,
720                 .size = size,
721         };
722
723         if (!mask)
724                 mask = &rte_flow_item_mpls_mask;
725         if (spec) {
726                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
727                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
728                 /* Remove unwanted bits from values.  */
729                 mpls.val.label &= mpls.mask.label;
730         }
731         flow_verbs_spec_add(dev_flow, &mpls, size);
732         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
733         *action_flags |= MLX5_FLOW_LAYER_MPLS;
734 #endif
735 }
736
737 /**
738  * Convert the @p action into a Verbs specification. This function assumes that
739  * the input is valid and that there is space to insert the requested action
740  * into the flow. This function also return the action that was added.
741  *
742  * @param[in, out] action_flags
743  *   Pointer to the detected actions.
744  * @param[in] dev_flow
745  *   Pointer to mlx5_flow.
746  */
747 static void
748 flow_verbs_translate_action_drop(uint64_t *action_flags,
749                                  struct mlx5_flow *dev_flow)
750 {
751         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
752         struct ibv_flow_spec_action_drop drop = {
753                         .type = IBV_FLOW_SPEC_ACTION_DROP,
754                         .size = size,
755         };
756
757         flow_verbs_spec_add(dev_flow, &drop, size);
758         *action_flags |= MLX5_FLOW_ACTION_DROP;
759 }
760
761 /**
762  * Convert the @p action into a Verbs specification. This function assumes that
763  * the input is valid and that there is space to insert the requested action
764  * into the flow. This function also return the action that was added.
765  *
766  * @param[in] action
767  *   Action configuration.
768  * @param[in, out] action_flags
769  *   Pointer to the detected actions.
770  * @param[in] dev_flow
771  *   Pointer to mlx5_flow.
772  */
773 static void
774 flow_verbs_translate_action_queue(const struct rte_flow_action *action,
775                                   uint64_t *action_flags,
776                                   struct mlx5_flow *dev_flow)
777 {
778         const struct rte_flow_action_queue *queue = action->conf;
779         struct rte_flow *flow = dev_flow->flow;
780
781         if (flow->queue)
782                 (*flow->queue)[0] = queue->index;
783         flow->rss.queue_num = 1;
784         *action_flags |= MLX5_FLOW_ACTION_QUEUE;
785 }
786
787 /**
788  * Convert the @p action into a Verbs specification. This function assumes that
789  * the input is valid and that there is space to insert the requested action
790  * into the flow. This function also return the action that was added.
791  *
792  * @param[in] action
793  *   Action configuration.
794  * @param[in, out] action_flags
795  *   Pointer to the detected actions.
796  * @param[in] dev_flow
797  *   Pointer to mlx5_flow.
798  */
799 static void
800 flow_verbs_translate_action_rss(const struct rte_flow_action *action,
801                                 uint64_t *action_flags,
802                                 struct mlx5_flow *dev_flow)
803 {
804         const struct rte_flow_action_rss *rss = action->conf;
805         struct rte_flow *flow = dev_flow->flow;
806
807         if (flow->queue)
808                 memcpy((*flow->queue), rss->queue,
809                        rss->queue_num * sizeof(uint16_t));
810         flow->rss.queue_num = rss->queue_num;
811         memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
812         flow->rss.types = rss->types;
813         flow->rss.level = rss->level;
814         *action_flags |= MLX5_FLOW_ACTION_RSS;
815 }
816
817 /**
818  * Convert the @p action into a Verbs specification. This function assumes that
819  * the input is valid and that there is space to insert the requested action
820  * into the flow. This function also return the action that was added.
821  *
822  * @param[in] action
823  *   Action configuration.
824  * @param[in, out] action_flags
825  *   Pointer to the detected actions.
826  * @param[in] dev_flow
827  *   Pointer to mlx5_flow.
828  */
829 static void
830 flow_verbs_translate_action_flag
831                         (const struct rte_flow_action *action __rte_unused,
832                          uint64_t *action_flags,
833                          struct mlx5_flow *dev_flow)
834 {
835         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
836         struct ibv_flow_spec_action_tag tag = {
837                 .type = IBV_FLOW_SPEC_ACTION_TAG,
838                 .size = size,
839                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
840         };
841         *action_flags |= MLX5_FLOW_ACTION_MARK;
842         flow_verbs_spec_add(dev_flow, &tag, size);
843 }
844
845 /**
846  * Update verbs specification to modify the flag to mark.
847  *
848  * @param[in, out] verbs
849  *   Pointer to the mlx5_flow_verbs structure.
850  * @param[in] mark_id
851  *   Mark identifier to replace the flag.
852  */
853 static void
854 flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
855 {
856         struct ibv_spec_header *hdr;
857         int i;
858
859         if (!verbs)
860                 return;
861         /* Update Verbs specification. */
862         hdr = (struct ibv_spec_header *)verbs->specs;
863         if (!hdr)
864                 return;
865         for (i = 0; i != verbs->attr->num_of_specs; ++i) {
866                 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
867                         struct ibv_flow_spec_action_tag *t =
868                                 (struct ibv_flow_spec_action_tag *)hdr;
869
870                         t->tag_id = mlx5_flow_mark_set(mark_id);
871                 }
872                 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
873         }
874 }
875
876 /**
877  * Convert the @p action into a Verbs specification. This function assumes that
878  * the input is valid and that there is space to insert the requested action
879  * into the flow. This function also return the action that was added.
880  *
881  * @param[in] action
882  *   Action configuration.
883  * @param[in, out] action_flags
884  *   Pointer to the detected actions.
885  * @param[in] dev_flow
886  *   Pointer to mlx5_flow.
887  */
888 static void
889 flow_verbs_translate_action_mark(const struct rte_flow_action *action,
890                                  uint64_t *action_flags,
891                                  struct mlx5_flow *dev_flow)
892 {
893         const struct rte_flow_action_mark *mark = action->conf;
894         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
895         struct ibv_flow_spec_action_tag tag = {
896                 .type = IBV_FLOW_SPEC_ACTION_TAG,
897                 .size = size,
898         };
899         struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
900
901         if (*action_flags & MLX5_FLOW_ACTION_FLAG) {
902                 flow_verbs_mark_update(verbs, mark->id);
903                 size = 0;
904         } else {
905                 tag.tag_id = mlx5_flow_mark_set(mark->id);
906                 flow_verbs_spec_add(dev_flow, &tag, size);
907         }
908         *action_flags |= MLX5_FLOW_ACTION_MARK;
909 }
910
911 /**
912  * Convert the @p action into a Verbs specification. This function assumes that
913  * the input is valid and that there is space to insert the requested action
914  * into the flow. This function also return the action that was added.
915  *
916  * @param[in] dev
917  *   Pointer to the Ethernet device structure.
918  * @param[in] action
919  *   Action configuration.
920  * @param[in, out] action_flags
921  *   Pointer to the detected actions.
922  * @param[in] dev_flow
923  *   Pointer to mlx5_flow.
924  * @param[out] error
925  *   Pointer to error structure.
926  *
927  * @return
928  *   0 On success else a negative errno value is returned and rte_errno is set.
929  */
930 static int
931 flow_verbs_translate_action_count(struct rte_eth_dev *dev,
932                                   const struct rte_flow_action *action,
933                                   uint64_t *action_flags,
934                                   struct mlx5_flow *dev_flow,
935                                   struct rte_flow_error *error)
936 {
937         const struct rte_flow_action_count *count = action->conf;
938         struct rte_flow *flow = dev_flow->flow;
939 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
940         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
941         struct ibv_flow_spec_counter_action counter = {
942                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
943                 .size = size,
944         };
945 #endif
946
947         if (!flow->counter) {
948                 flow->counter = flow_verbs_counter_new(dev, count->shared,
949                                                        count->id);
950                 if (!flow->counter)
951                         return rte_flow_error_set(error, rte_errno,
952                                                   RTE_FLOW_ERROR_TYPE_ACTION,
953                                                   action,
954                                                   "cannot get counter"
955                                                   " context.");
956         }
957         *action_flags |= MLX5_FLOW_ACTION_COUNT;
958 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
959         counter.counter_set_handle = flow->counter->cs->handle;
960         flow_verbs_spec_add(dev_flow, &counter, size);
961 #endif
962         return 0;
963 }
964
965 /**
966  * Internal validation function. For validating both actions and items.
967  *
968  * @param[in] dev
969  *   Pointer to the Ethernet device structure.
970  * @param[in] attr
971  *   Pointer to the flow attributes.
972  * @param[in] items
973  *   Pointer to the list of items.
974  * @param[in] actions
975  *   Pointer to the list of actions.
976  * @param[out] error
977  *   Pointer to the error structure.
978  *
979  * @return
980  *   0 on success, a negative errno value otherwise and rte_errno is set.
981  */
982 static int
983 flow_verbs_validate(struct rte_eth_dev *dev,
984                     const struct rte_flow_attr *attr,
985                     const struct rte_flow_item items[],
986                     const struct rte_flow_action actions[],
987                     struct rte_flow_error *error)
988 {
989         int ret;
990         uint32_t action_flags = 0;
991         uint32_t item_flags = 0;
992         int tunnel = 0;
993         uint8_t next_protocol = 0xff;
994
995         if (items == NULL)
996                 return -1;
997         ret = mlx5_flow_validate_attributes(dev, attr, error);
998         if (ret < 0)
999                 return ret;
1000         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1001                 int ret = 0;
1002                 switch (items->type) {
1003                 case RTE_FLOW_ITEM_TYPE_VOID:
1004                         break;
1005                 case RTE_FLOW_ITEM_TYPE_ETH:
1006                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1007                                                           error);
1008                         if (ret < 0)
1009                                 return ret;
1010                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1011                                                MLX5_FLOW_LAYER_OUTER_L2;
1012                         break;
1013                 case RTE_FLOW_ITEM_TYPE_VLAN:
1014                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1015                                                            error);
1016                         if (ret < 0)
1017                                 return ret;
1018                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1019                                                MLX5_FLOW_LAYER_OUTER_VLAN;
1020                         break;
1021                 case RTE_FLOW_ITEM_TYPE_IPV4:
1022                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1023                                                            error);
1024                         if (ret < 0)
1025                                 return ret;
1026                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1027                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1028                         if (items->mask != NULL &&
1029                             ((const struct rte_flow_item_ipv4 *)
1030                              items->mask)->hdr.next_proto_id)
1031                                 next_protocol =
1032                                         ((const struct rte_flow_item_ipv4 *)
1033                                          (items->spec))->hdr.next_proto_id;
1034                         break;
1035                 case RTE_FLOW_ITEM_TYPE_IPV6:
1036                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1037                                                            error);
1038                         if (ret < 0)
1039                                 return ret;
1040                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1041                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1042                         if (items->mask != NULL &&
1043                             ((const struct rte_flow_item_ipv6 *)
1044                              items->mask)->hdr.proto)
1045                                 next_protocol =
1046                                         ((const struct rte_flow_item_ipv6 *)
1047                                          items->spec)->hdr.proto;
1048                         break;
1049                 case RTE_FLOW_ITEM_TYPE_UDP:
1050                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1051                                                           next_protocol,
1052                                                           error);
1053                         if (ret < 0)
1054                                 return ret;
1055                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1056                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1057                         break;
1058                 case RTE_FLOW_ITEM_TYPE_TCP:
1059                         ret = mlx5_flow_validate_item_tcp(items, item_flags,
1060                                                           next_protocol, error);
1061                         if (ret < 0)
1062                                 return ret;
1063                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1064                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1065                         break;
1066                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1067                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1068                                                             error);
1069                         if (ret < 0)
1070                                 return ret;
1071                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1072                         break;
1073                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1074                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
1075                                                                 item_flags,
1076                                                                 dev, error);
1077                         if (ret < 0)
1078                                 return ret;
1079                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1080                         break;
1081                 case RTE_FLOW_ITEM_TYPE_GRE:
1082                         ret = mlx5_flow_validate_item_gre(items, item_flags,
1083                                                           next_protocol, error);
1084                         if (ret < 0)
1085                                 return ret;
1086                         item_flags |= MLX5_FLOW_LAYER_GRE;
1087                         break;
1088                 case RTE_FLOW_ITEM_TYPE_MPLS:
1089                         ret = mlx5_flow_validate_item_mpls(items, item_flags,
1090                                                            next_protocol,
1091                                                            error);
1092                         if (ret < 0)
1093                                 return ret;
1094                         if (next_protocol != 0xff &&
1095                             next_protocol != IPPROTO_MPLS)
1096                                 return rte_flow_error_set
1097                                         (error, EINVAL,
1098                                          RTE_FLOW_ERROR_TYPE_ITEM, items,
1099                                          "protocol filtering not compatible"
1100                                          " with MPLS layer");
1101                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1102                         break;
1103                 default:
1104                         return rte_flow_error_set(error, ENOTSUP,
1105                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1106                                                   NULL, "item not supported");
1107                 }
1108         }
1109         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1110                 tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1111                 switch (actions->type) {
1112                 case RTE_FLOW_ACTION_TYPE_VOID:
1113                         break;
1114                 case RTE_FLOW_ACTION_TYPE_FLAG:
1115                         ret = mlx5_flow_validate_action_flag(action_flags,
1116                                                              error);
1117                         if (ret < 0)
1118                                 return ret;
1119                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1120                         break;
1121                 case RTE_FLOW_ACTION_TYPE_MARK:
1122                         ret = mlx5_flow_validate_action_mark(actions,
1123                                                              action_flags,
1124                                                              error);
1125                         if (ret < 0)
1126                                 return ret;
1127                         action_flags |= MLX5_FLOW_ACTION_MARK;
1128                         break;
1129                 case RTE_FLOW_ACTION_TYPE_DROP:
1130                         ret = mlx5_flow_validate_action_drop(action_flags,
1131                                                              error);
1132                         if (ret < 0)
1133                                 return ret;
1134                         action_flags |= MLX5_FLOW_ACTION_DROP;
1135                         break;
1136                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1137                         ret = mlx5_flow_validate_action_queue(actions,
1138                                                               action_flags, dev,
1139                                                               error);
1140                         if (ret < 0)
1141                                 return ret;
1142                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1143                         break;
1144                 case RTE_FLOW_ACTION_TYPE_RSS:
1145                         ret = mlx5_flow_validate_action_rss(actions,
1146                                                             action_flags, dev,
1147                                                             error);
1148                         if (ret < 0)
1149                                 return ret;
1150                         action_flags |= MLX5_FLOW_ACTION_RSS;
1151                         break;
1152                 case RTE_FLOW_ACTION_TYPE_COUNT:
1153                         ret = mlx5_flow_validate_action_count(dev, error);
1154                         if (ret < 0)
1155                                 return ret;
1156                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1157                         break;
1158                 default:
1159                         return rte_flow_error_set(error, ENOTSUP,
1160                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1161                                                   actions,
1162                                                   "action not supported");
1163                 }
1164         }
1165         if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1166                 return rte_flow_error_set(error, EINVAL,
1167                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1168                                           "no fate action is found");
1169         return 0;
1170 }
1171
1172 /**
1173  * Calculate the required bytes that are needed for the action part of the verbs
1174  * flow, in addtion returns bit-fields with all the detected action, in order to
1175  * avoid another interation over the actions.
1176  *
1177  * @param[in] actions
1178  *   Pointer to the list of actions.
1179  * @param[out] action_flags
1180  *   Pointer to the detected actions.
1181  *
1182  * @return
1183  *   The size of the memory needed for all actions.
1184  */
1185 static int
1186 flow_verbs_get_actions_and_size(const struct rte_flow_action actions[],
1187                                 uint64_t *action_flags)
1188 {
1189         int size = 0;
1190         uint64_t detected_actions = 0;
1191
1192         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1193                 switch (actions->type) {
1194                 case RTE_FLOW_ACTION_TYPE_VOID:
1195                         break;
1196                 case RTE_FLOW_ACTION_TYPE_FLAG:
1197                         size += sizeof(struct ibv_flow_spec_action_tag);
1198                         detected_actions |= MLX5_FLOW_ACTION_FLAG;
1199                         break;
1200                 case RTE_FLOW_ACTION_TYPE_MARK:
1201                         size += sizeof(struct ibv_flow_spec_action_tag);
1202                         detected_actions |= MLX5_FLOW_ACTION_MARK;
1203                         break;
1204                 case RTE_FLOW_ACTION_TYPE_DROP:
1205                         size += sizeof(struct ibv_flow_spec_action_drop);
1206                         detected_actions |= MLX5_FLOW_ACTION_DROP;
1207                         break;
1208                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1209                         detected_actions |= MLX5_FLOW_ACTION_QUEUE;
1210                         break;
1211                 case RTE_FLOW_ACTION_TYPE_RSS:
1212                         detected_actions |= MLX5_FLOW_ACTION_RSS;
1213                         break;
1214                 case RTE_FLOW_ACTION_TYPE_COUNT:
1215 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1216                         size += sizeof(struct ibv_flow_spec_counter_action);
1217 #endif
1218                         detected_actions |= MLX5_FLOW_ACTION_COUNT;
1219                         break;
1220                 default:
1221                         break;
1222                 }
1223         }
1224         *action_flags = detected_actions;
1225         return size;
1226 }
1227
1228 /**
1229  * Calculate the required bytes that are needed for the item part of the verbs
1230  * flow, in addtion returns bit-fields with all the detected action, in order to
1231  * avoid another interation over the actions.
1232  *
1233  * @param[in] actions
1234  *   Pointer to the list of items.
1235  * @param[in, out] item_flags
1236  *   Pointer to the detected items.
1237  *
1238  * @return
1239  *   The size of the memory needed for all items.
1240  */
1241 static int
1242 flow_verbs_get_items_and_size(const struct rte_flow_item items[],
1243                               uint64_t *item_flags)
1244 {
1245         int size = 0;
1246         uint64_t detected_items = 0;
1247         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
1248
1249         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1250                 switch (items->type) {
1251                 case RTE_FLOW_ITEM_TYPE_VOID:
1252                         break;
1253                 case RTE_FLOW_ITEM_TYPE_ETH:
1254                         size += sizeof(struct ibv_flow_spec_eth);
1255                         detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1256                                                    MLX5_FLOW_LAYER_OUTER_L2;
1257                         break;
1258                 case RTE_FLOW_ITEM_TYPE_VLAN:
1259                         size += sizeof(struct ibv_flow_spec_eth);
1260                         detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1261                                                    MLX5_FLOW_LAYER_OUTER_VLAN;
1262                         break;
1263                 case RTE_FLOW_ITEM_TYPE_IPV4:
1264                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
1265                         detected_items |= tunnel ?
1266                                           MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1267                                           MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1268                         break;
1269                 case RTE_FLOW_ITEM_TYPE_IPV6:
1270                         size += sizeof(struct ibv_flow_spec_ipv6);
1271                         detected_items |= tunnel ?
1272                                           MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1273                                           MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1274                         break;
1275                 case RTE_FLOW_ITEM_TYPE_UDP:
1276                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1277                         detected_items |= tunnel ?
1278                                           MLX5_FLOW_LAYER_INNER_L4_UDP :
1279                                           MLX5_FLOW_LAYER_OUTER_L4_UDP;
1280                         break;
1281                 case RTE_FLOW_ITEM_TYPE_TCP:
1282                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1283                         detected_items |= tunnel ?
1284                                           MLX5_FLOW_LAYER_INNER_L4_TCP :
1285                                           MLX5_FLOW_LAYER_OUTER_L4_TCP;
1286                         break;
1287                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1288                         size += sizeof(struct ibv_flow_spec_tunnel);
1289                         detected_items |= MLX5_FLOW_LAYER_VXLAN;
1290                         break;
1291                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1292                         size += sizeof(struct ibv_flow_spec_tunnel);
1293                         detected_items |= MLX5_FLOW_LAYER_VXLAN_GPE;
1294                         break;
1295 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1296                 case RTE_FLOW_ITEM_TYPE_GRE:
1297                         size += sizeof(struct ibv_flow_spec_gre);
1298                         detected_items |= MLX5_FLOW_LAYER_GRE;
1299                         break;
1300                 case RTE_FLOW_ITEM_TYPE_MPLS:
1301                         size += sizeof(struct ibv_flow_spec_mpls);
1302                         detected_items |= MLX5_FLOW_LAYER_MPLS;
1303                         break;
1304 #else
1305                 case RTE_FLOW_ITEM_TYPE_GRE:
1306                         size += sizeof(struct ibv_flow_spec_tunnel);
1307                         detected_items |= MLX5_FLOW_LAYER_TUNNEL;
1308                         break;
1309 #endif
1310                 default:
1311                         break;
1312                 }
1313         }
1314         *item_flags = detected_items;
1315         return size;
1316 }
1317
1318 /**
1319  * Internal preparation function. Allocate mlx5_flow with the required size.
1320  * The required size is calculate based on the actions and items. This function
1321  * also returns the detected actions and items for later use.
1322  *
1323  * @param[in] attr
1324  *   Pointer to the flow attributes.
1325  * @param[in] items
1326  *   Pointer to the list of items.
1327  * @param[in] actions
1328  *   Pointer to the list of actions.
1329  * @param[out] item_flags
1330  *   Pointer to bit mask of all items detected.
1331  * @param[out] action_flags
1332  *   Pointer to bit mask of all actions detected.
1333  * @param[out] error
1334  *   Pointer to the error structure.
1335  *
1336  * @return
1337  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1338  *   is set.
1339  */
1340 static struct mlx5_flow *
1341 flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
1342                    const struct rte_flow_item items[],
1343                    const struct rte_flow_action actions[],
1344                    uint64_t *item_flags,
1345                    uint64_t *action_flags,
1346                    struct rte_flow_error *error)
1347 {
1348         uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
1349         struct mlx5_flow *flow;
1350
1351         size += flow_verbs_get_actions_and_size(actions, action_flags);
1352         size += flow_verbs_get_items_and_size(items, item_flags);
1353         flow = rte_calloc(__func__, 1, size, 0);
1354         if (!flow) {
1355                 rte_flow_error_set(error, ENOMEM,
1356                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1357                                    "not enough memory to create flow");
1358                 return NULL;
1359         }
1360         flow->verbs.attr = (void *)(flow + 1);
1361         flow->verbs.specs =
1362                 (uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
1363         return flow;
1364 }
1365
1366 /**
1367  * Fill the flow with verb spec.
1368  *
1369  * @param[in] dev
1370  *   Pointer to Ethernet device.
1371  * @param[in, out] dev_flow
1372  *   Pointer to the mlx5 flow.
1373  * @param[in] attr
1374  *   Pointer to the flow attributes.
1375  * @param[in] items
1376  *   Pointer to the list of items.
1377  * @param[in] actions
1378  *   Pointer to the list of actions.
1379  * @param[out] error
1380  *   Pointer to the error structure.
1381  *
1382  * @return
1383  *   0 on success, else a negative errno value otherwise and rte_ernno is set.
1384  */
1385 static int
1386 flow_verbs_translate(struct rte_eth_dev *dev,
1387                      struct mlx5_flow *dev_flow,
1388                      const struct rte_flow_attr *attr,
1389                      const struct rte_flow_item items[],
1390                      const struct rte_flow_action actions[],
1391                      struct rte_flow_error *error)
1392 {
1393         uint64_t action_flags = 0;
1394         uint64_t item_flags = 0;
1395         uint64_t priority = attr->priority;
1396         struct priv *priv = dev->data->dev_private;
1397
1398         if (priority == MLX5_FLOW_PRIO_RSVD)
1399                 priority = priv->config.flow_prio - 1;
1400         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1401                 int ret;
1402                 switch (actions->type) {
1403                 case RTE_FLOW_ACTION_TYPE_VOID:
1404                         break;
1405                 case RTE_FLOW_ACTION_TYPE_FLAG:
1406                         flow_verbs_translate_action_flag(actions,
1407                                                          &action_flags,
1408                                                          dev_flow);
1409                         break;
1410                 case RTE_FLOW_ACTION_TYPE_MARK:
1411                         flow_verbs_translate_action_mark(actions,
1412                                                          &action_flags,
1413                                                          dev_flow);
1414                         break;
1415                 case RTE_FLOW_ACTION_TYPE_DROP:
1416                         flow_verbs_translate_action_drop(&action_flags,
1417                                                          dev_flow);
1418                         break;
1419                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1420                         flow_verbs_translate_action_queue(actions,
1421                                                           &action_flags,
1422                                                           dev_flow);
1423                         break;
1424                 case RTE_FLOW_ACTION_TYPE_RSS:
1425                         flow_verbs_translate_action_rss(actions,
1426                                                         &action_flags,
1427                                                         dev_flow);
1428                         break;
1429                 case RTE_FLOW_ACTION_TYPE_COUNT:
1430                         ret = flow_verbs_translate_action_count(dev,
1431                                                                 actions,
1432                                                                 &action_flags,
1433                                                                 dev_flow,
1434                                                                 error);
1435                         if (ret < 0)
1436                                 return ret;
1437                         break;
1438                 default:
1439                         return rte_flow_error_set(error, ENOTSUP,
1440                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1441                                                   actions,
1442                                                   "action not supported");
1443                 }
1444         }
1445         dev_flow->flow->actions |= action_flags;
1446         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1447                 switch (items->type) {
1448                 case RTE_FLOW_ITEM_TYPE_VOID:
1449                         break;
1450                 case RTE_FLOW_ITEM_TYPE_ETH:
1451                         flow_verbs_translate_item_eth(items, &item_flags,
1452                                                       dev_flow);
1453                         break;
1454                 case RTE_FLOW_ITEM_TYPE_VLAN:
1455                         flow_verbs_translate_item_vlan(items, &item_flags,
1456                                                        dev_flow);
1457                         break;
1458                 case RTE_FLOW_ITEM_TYPE_IPV4:
1459                         flow_verbs_translate_item_ipv4(items, &item_flags,
1460                                                        dev_flow);
1461                         break;
1462                 case RTE_FLOW_ITEM_TYPE_IPV6:
1463                         flow_verbs_translate_item_ipv6(items, &item_flags,
1464                                                        dev_flow);
1465                         break;
1466                 case RTE_FLOW_ITEM_TYPE_UDP:
1467                         flow_verbs_translate_item_udp(items, &item_flags,
1468                                                       dev_flow);
1469                         break;
1470                 case RTE_FLOW_ITEM_TYPE_TCP:
1471                         flow_verbs_translate_item_tcp(items, &item_flags,
1472                                                       dev_flow);
1473                         break;
1474                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1475                         flow_verbs_translate_item_vxlan(items, &item_flags,
1476                                                         dev_flow);
1477                         break;
1478                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1479                         flow_verbs_translate_item_vxlan_gpe(items, &item_flags,
1480                                                             dev_flow);
1481                         break;
1482                 case RTE_FLOW_ITEM_TYPE_GRE:
1483                         flow_verbs_translate_item_gre(items, &item_flags,
1484                                                       dev_flow);
1485                         break;
1486                 case RTE_FLOW_ITEM_TYPE_MPLS:
1487                         flow_verbs_translate_item_mpls(items, &item_flags,
1488                                                        dev_flow);
1489                         break;
1490                 default:
1491                         return rte_flow_error_set(error, ENOTSUP,
1492                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1493                                                   NULL,
1494                                                   "item not supported");
1495                 }
1496         }
1497         dev_flow->verbs.attr->priority =
1498                 mlx5_flow_adjust_priority(dev, priority,
1499                                           dev_flow->verbs.attr->priority);
1500         return 0;
1501 }
1502
1503 /**
1504  * Remove the flow from the NIC but keeps it in memory.
1505  *
1506  * @param[in] dev
1507  *   Pointer to the Ethernet device structure.
1508  * @param[in, out] flow
1509  *   Pointer to flow structure.
1510  */
1511 static void
1512 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1513 {
1514         struct mlx5_flow_verbs *verbs;
1515         struct mlx5_flow *dev_flow;
1516
1517         if (!flow)
1518                 return;
1519         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1520                 verbs = &dev_flow->verbs;
1521                 if (verbs->flow) {
1522                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1523                         verbs->flow = NULL;
1524                 }
1525                 if (verbs->hrxq) {
1526                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1527                                 mlx5_hrxq_drop_release(dev);
1528                         else
1529                                 mlx5_hrxq_release(dev, verbs->hrxq);
1530                         verbs->hrxq = NULL;
1531                 }
1532         }
1533         if (flow->counter) {
1534                 flow_verbs_counter_release(flow->counter);
1535                 flow->counter = NULL;
1536         }
1537 }
1538
1539 /**
1540  * Remove the flow from the NIC and the memory.
1541  *
1542  * @param[in] dev
1543  *   Pointer to the Ethernet device structure.
1544  * @param[in, out] flow
1545  *   Pointer to flow structure.
1546  */
1547 static void
1548 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1549 {
1550         struct mlx5_flow *dev_flow;
1551
1552         if (!flow)
1553                 return;
1554         flow_verbs_remove(dev, flow);
1555         while (!LIST_EMPTY(&flow->dev_flows)) {
1556                 dev_flow = LIST_FIRST(&flow->dev_flows);
1557                 LIST_REMOVE(dev_flow, next);
1558                 rte_free(dev_flow);
1559         }
1560 }
1561
1562 /**
1563  * Apply the flow to the NIC.
1564  *
1565  * @param[in] dev
1566  *   Pointer to the Ethernet device structure.
1567  * @param[in, out] flow
1568  *   Pointer to flow structure.
1569  * @param[out] error
1570  *   Pointer to error structure.
1571  *
1572  * @return
1573  *   0 on success, a negative errno value otherwise and rte_errno is set.
1574  */
1575 static int
1576 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1577                  struct rte_flow_error *error)
1578 {
1579         struct mlx5_flow_verbs *verbs;
1580         struct mlx5_flow *dev_flow;
1581         int err;
1582
1583         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1584                 verbs = &dev_flow->verbs;
1585                 if (flow->actions & MLX5_FLOW_ACTION_DROP) {
1586                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
1587                         if (!verbs->hrxq) {
1588                                 rte_flow_error_set
1589                                         (error, errno,
1590                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1591                                          "cannot get drop hash queue");
1592                                 goto error;
1593                         }
1594                 } else {
1595                         struct mlx5_hrxq *hrxq;
1596
1597                         hrxq = mlx5_hrxq_get(dev, flow->key,
1598                                              MLX5_RSS_HASH_KEY_LEN,
1599                                              verbs->hash_fields,
1600                                              (*flow->queue),
1601                                              flow->rss.queue_num);
1602                         if (!hrxq)
1603                                 hrxq = mlx5_hrxq_new(dev, flow->key,
1604                                                      MLX5_RSS_HASH_KEY_LEN,
1605                                                      verbs->hash_fields,
1606                                                      (*flow->queue),
1607                                                      flow->rss.queue_num,
1608                                                      !!(flow->layers &
1609                                                       MLX5_FLOW_LAYER_TUNNEL));
1610                         if (!hrxq) {
1611                                 rte_flow_error_set
1612                                         (error, rte_errno,
1613                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1614                                          "cannot get hash queue");
1615                                 goto error;
1616                         }
1617                         verbs->hrxq = hrxq;
1618                 }
1619                 verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
1620                                                      verbs->attr);
1621                 if (!verbs->flow) {
1622                         rte_flow_error_set(error, errno,
1623                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1624                                            NULL,
1625                                            "hardware refuses to create flow");
1626                         goto error;
1627                 }
1628         }
1629         return 0;
1630 error:
1631         err = rte_errno; /* Save rte_errno before cleanup. */
1632         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1633                 verbs = &dev_flow->verbs;
1634                 if (verbs->hrxq) {
1635                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1636                                 mlx5_hrxq_drop_release(dev);
1637                         else
1638                                 mlx5_hrxq_release(dev, verbs->hrxq);
1639                         verbs->hrxq = NULL;
1640                 }
1641         }
1642         rte_errno = err; /* Restore rte_errno. */
1643         return -rte_errno;
1644 }
1645
1646 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1647         .validate = flow_verbs_validate,
1648         .prepare = flow_verbs_prepare,
1649         .translate = flow_verbs_translate,
1650         .apply = flow_verbs_apply,
1651         .remove = flow_verbs_remove,
1652         .destroy = flow_verbs_destroy,
1653 };