net/mlx5: support e-switch TCP-flags flow filter
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_eth_ctrl.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_prm.h"
33 #include "mlx5_glue.h"
34 #include "mlx5_flow.h"
35
36 /**
37  * Get a flow counter.
38  *
39  * @param[in] dev
40  *   Pointer to the Ethernet device structure.
41  * @param[in] shared
42  *   Indicate if this counter is shared with other flows.
43  * @param[in] id
44  *   Counter identifier.
45  *
46  * @return
47  *   A pointer to the counter, NULL otherwise and rte_errno is set.
48  */
49 static struct mlx5_flow_counter *
50 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
51 {
52         struct priv *priv = dev->data->dev_private;
53         struct mlx5_flow_counter *cnt;
54
55         LIST_FOREACH(cnt, &priv->flow_counters, next) {
56                 if (!cnt->shared || cnt->shared != shared)
57                         continue;
58                 if (cnt->id != id)
59                         continue;
60                 cnt->ref_cnt++;
61                 return cnt;
62         }
63 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
64
65         struct mlx5_flow_counter tmpl = {
66                 .shared = shared,
67                 .id = id,
68                 .cs = mlx5_glue->create_counter_set
69                         (priv->ctx,
70                          &(struct ibv_counter_set_init_attr){
71                                  .counter_set_id = id,
72                          }),
73                 .hits = 0,
74                 .bytes = 0,
75         };
76
77         if (!tmpl.cs) {
78                 rte_errno = errno;
79                 return NULL;
80         }
81         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
82         if (!cnt) {
83                 rte_errno = ENOMEM;
84                 return NULL;
85         }
86         *cnt = tmpl;
87         LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
88         return cnt;
89 #endif
90         rte_errno = ENOTSUP;
91         return NULL;
92 }
93
94 /**
95  * Release a flow counter.
96  *
97  * @param[in] counter
98  *   Pointer to the counter handler.
99  */
100 static void
101 flow_verbs_counter_release(struct mlx5_flow_counter *counter)
102 {
103         if (--counter->ref_cnt == 0) {
104                 claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
105                 LIST_REMOVE(counter, next);
106                 rte_free(counter);
107         }
108 }
109
110 /**
111  * Add a verbs item specification into @p flow.
112  *
113  * @param[in, out] flow
114  *   Pointer to flow structure.
115  * @param[in] src
116  *   Create specification.
117  * @param[in] size
118  *   Size in bytes of the specification to copy.
119  */
120 static void
121 flow_verbs_spec_add(struct mlx5_flow *flow, void *src, unsigned int size)
122 {
123         struct mlx5_flow_verbs *verbs = &flow->verbs;
124
125         if (verbs->specs) {
126                 void *dst;
127
128                 dst = (void *)(verbs->specs + verbs->size);
129                 memcpy(dst, src, size);
130                 ++verbs->attr->num_of_specs;
131         }
132         verbs->size += size;
133 }
134
135 /**
136  * Convert the @p item into a Verbs specification. This function assumes that
137  * the input is valid and that there is space to insert the requested item
138  * into the flow.
139  *
140  * @param[in] item
141  *   Item specification.
142  * @param[in] item_flags
143  *   Bit field with all detected items.
144  * @param[in, out] dev_flow
145  *   Pointer to dev_flow structure.
146  */
147 static void
148 flow_verbs_translate_item_eth(const struct rte_flow_item *item,
149                               uint64_t *item_flags,
150                               struct mlx5_flow *dev_flow)
151 {
152         const struct rte_flow_item_eth *spec = item->spec;
153         const struct rte_flow_item_eth *mask = item->mask;
154         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
155         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
156         struct ibv_flow_spec_eth eth = {
157                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
158                 .size = size,
159         };
160
161         if (!mask)
162                 mask = &rte_flow_item_eth_mask;
163         if (spec) {
164                 unsigned int i;
165
166                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
167                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
168                 eth.val.ether_type = spec->type;
169                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
170                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
171                 eth.mask.ether_type = mask->type;
172                 /* Remove unwanted bits from values. */
173                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
174                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
175                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
176                 }
177                 eth.val.ether_type &= eth.mask.ether_type;
178                 dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
179         }
180         flow_verbs_spec_add(dev_flow, &eth, size);
181         *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
182                                 MLX5_FLOW_LAYER_OUTER_L2;
183 }
184
185 /**
186  * Update the VLAN tag in the Verbs Ethernet specification.
187  * This function assumes that the input is valid and there is space to add
188  * the requested item.
189  *
190  * @param[in, out] attr
191  *   Pointer to Verbs attributes structure.
192  * @param[in] eth
193  *   Verbs structure containing the VLAN information to copy.
194  */
195 static void
196 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
197                             struct ibv_flow_spec_eth *eth)
198 {
199         unsigned int i;
200         const enum ibv_flow_spec_type search = eth->type;
201         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
202                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
203
204         for (i = 0; i != attr->num_of_specs; ++i) {
205                 if (hdr->type == search) {
206                         struct ibv_flow_spec_eth *e =
207                                 (struct ibv_flow_spec_eth *)hdr;
208
209                         e->val.vlan_tag = eth->val.vlan_tag;
210                         e->mask.vlan_tag = eth->mask.vlan_tag;
211                         e->val.ether_type = eth->val.ether_type;
212                         e->mask.ether_type = eth->mask.ether_type;
213                         break;
214                 }
215                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
216         }
217 }
218
219 /**
220  * Convert the @p item into a Verbs specification. This function assumes that
221  * the input is valid and that there is space to insert the requested item
222  * into the flow.
223  *
224  * @param[in] item
225  *   Item specification.
226  * @param[in, out] item_flags
227  *   Bit mask that holds all detected items.
228  * @param[in, out] dev_flow
229  *   Pointer to dev_flow structure.
230  */
231 static void
232 flow_verbs_translate_item_vlan(const struct rte_flow_item *item,
233                                uint64_t *item_flags,
234                                struct mlx5_flow *dev_flow)
235 {
236         const struct rte_flow_item_vlan *spec = item->spec;
237         const struct rte_flow_item_vlan *mask = item->mask;
238         unsigned int size = sizeof(struct ibv_flow_spec_eth);
239         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
240         struct ibv_flow_spec_eth eth = {
241                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
242                 .size = size,
243         };
244         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
245                                       MLX5_FLOW_LAYER_OUTER_L2;
246
247         if (!mask)
248                 mask = &rte_flow_item_vlan_mask;
249         if (spec) {
250                 eth.val.vlan_tag = spec->tci;
251                 eth.mask.vlan_tag = mask->tci;
252                 eth.val.vlan_tag &= eth.mask.vlan_tag;
253                 eth.val.ether_type = spec->inner_type;
254                 eth.mask.ether_type = mask->inner_type;
255                 eth.val.ether_type &= eth.mask.ether_type;
256         }
257         if (!(*item_flags & l2m)) {
258                 dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
259                 flow_verbs_spec_add(dev_flow, &eth, size);
260         } else {
261                 flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
262                 size = 0; /* Only an update is done in eth specification. */
263         }
264         *item_flags |= tunnel ?
265                        (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
266                        (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
267 }
268
269 /**
270  * Convert the @p item into a Verbs specification. This function assumes that
271  * the input is valid and that there is space to insert the requested item
272  * into the flow.
273  *
274  * @param[in] item
275  *   Item specification.
276  * @param[in, out] item_flags
277  *   Bit mask that marks all detected items.
278  * @param[in, out] dev_flow
279  *   Pointer to sepacific flow structure.
280  */
281 static void
282 flow_verbs_translate_item_ipv4(const struct rte_flow_item *item,
283                                uint64_t *item_flags,
284                                struct mlx5_flow *dev_flow)
285 {
286         const struct rte_flow_item_ipv4 *spec = item->spec;
287         const struct rte_flow_item_ipv4 *mask = item->mask;
288         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
289         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
290         struct ibv_flow_spec_ipv4_ext ipv4 = {
291                 .type = IBV_FLOW_SPEC_IPV4_EXT |
292                         (tunnel ? IBV_FLOW_SPEC_INNER : 0),
293                 .size = size,
294         };
295
296         if (!mask)
297                 mask = &rte_flow_item_ipv4_mask;
298         *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
299                                 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
300         if (spec) {
301                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
302                         .src_ip = spec->hdr.src_addr,
303                         .dst_ip = spec->hdr.dst_addr,
304                         .proto = spec->hdr.next_proto_id,
305                         .tos = spec->hdr.type_of_service,
306                 };
307                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
308                         .src_ip = mask->hdr.src_addr,
309                         .dst_ip = mask->hdr.dst_addr,
310                         .proto = mask->hdr.next_proto_id,
311                         .tos = mask->hdr.type_of_service,
312                 };
313                 /* Remove unwanted bits from values. */
314                 ipv4.val.src_ip &= ipv4.mask.src_ip;
315                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
316                 ipv4.val.proto &= ipv4.mask.proto;
317                 ipv4.val.tos &= ipv4.mask.tos;
318         }
319         dev_flow->verbs.hash_fields |=
320                 mlx5_flow_hashfields_adjust(dev_flow, tunnel,
321                                             MLX5_IPV4_LAYER_TYPES,
322                                             MLX5_IPV4_IBV_RX_HASH);
323         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
324         flow_verbs_spec_add(dev_flow, &ipv4, size);
325 }
326
327 /**
328  * Convert the @p item into a Verbs specification. This function assumes that
329  * the input is valid and that there is space to insert the requested item
330  * into the flow.
331  *
332  * @param[in] item
333  *   Item specification.
334  * @param[in, out] item_flags
335  *   Bit mask that marks all detected items.
336  * @param[in, out] dev_flow
337  *   Pointer to sepacific flow structure.
338  */
339 static void
340 flow_verbs_translate_item_ipv6(const struct rte_flow_item *item,
341                                uint64_t *item_flags,
342                                struct mlx5_flow *dev_flow)
343 {
344         const struct rte_flow_item_ipv6 *spec = item->spec;
345         const struct rte_flow_item_ipv6 *mask = item->mask;
346         const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
347         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
348         struct ibv_flow_spec_ipv6 ipv6 = {
349                 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
350                 .size = size,
351         };
352
353         if (!mask)
354                 mask = &rte_flow_item_ipv6_mask;
355          *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
356                                  MLX5_FLOW_LAYER_OUTER_L3_IPV6;
357         if (spec) {
358                 unsigned int i;
359                 uint32_t vtc_flow_val;
360                 uint32_t vtc_flow_mask;
361
362                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
363                        RTE_DIM(ipv6.val.src_ip));
364                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
365                        RTE_DIM(ipv6.val.dst_ip));
366                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
367                        RTE_DIM(ipv6.mask.src_ip));
368                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
369                        RTE_DIM(ipv6.mask.dst_ip));
370                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
371                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
372                 ipv6.val.flow_label =
373                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
374                                          IPV6_HDR_FL_SHIFT);
375                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
376                                          IPV6_HDR_TC_SHIFT;
377                 ipv6.val.next_hdr = spec->hdr.proto;
378                 ipv6.val.hop_limit = spec->hdr.hop_limits;
379                 ipv6.mask.flow_label =
380                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
381                                          IPV6_HDR_FL_SHIFT);
382                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
383                                           IPV6_HDR_TC_SHIFT;
384                 ipv6.mask.next_hdr = mask->hdr.proto;
385                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
386                 /* Remove unwanted bits from values. */
387                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
388                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
389                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
390                 }
391                 ipv6.val.flow_label &= ipv6.mask.flow_label;
392                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
393                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
394                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
395         }
396         dev_flow->verbs.hash_fields |=
397                 mlx5_flow_hashfields_adjust(dev_flow, tunnel,
398                                             MLX5_IPV6_LAYER_TYPES,
399                                             MLX5_IPV6_IBV_RX_HASH);
400         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
401         flow_verbs_spec_add(dev_flow, &ipv6, size);
402 }
403
404 /**
405  * Convert the @p item into a Verbs specification. This function assumes that
406  * the input is valid and that there is space to insert the requested item
407  * into the flow.
408  *
409  * @param[in] item
410  *   Item specification.
411  * @param[in, out] item_flags
412  *   Bit mask that marks all detected items.
413  * @param[in, out] dev_flow
414  *   Pointer to sepacific flow structure.
415  */
416 static void
417 flow_verbs_translate_item_udp(const struct rte_flow_item *item,
418                               uint64_t *item_flags,
419                               struct mlx5_flow *dev_flow)
420 {
421         const struct rte_flow_item_udp *spec = item->spec;
422         const struct rte_flow_item_udp *mask = item->mask;
423         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
424         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
425         struct ibv_flow_spec_tcp_udp udp = {
426                 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
427                 .size = size,
428         };
429
430         if (!mask)
431                 mask = &rte_flow_item_udp_mask;
432         *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
433                                 MLX5_FLOW_LAYER_OUTER_L4_UDP;
434         if (spec) {
435                 udp.val.dst_port = spec->hdr.dst_port;
436                 udp.val.src_port = spec->hdr.src_port;
437                 udp.mask.dst_port = mask->hdr.dst_port;
438                 udp.mask.src_port = mask->hdr.src_port;
439                 /* Remove unwanted bits from values. */
440                 udp.val.src_port &= udp.mask.src_port;
441                 udp.val.dst_port &= udp.mask.dst_port;
442         }
443         dev_flow->verbs.hash_fields |=
444                 mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_UDP,
445                                             (IBV_RX_HASH_SRC_PORT_UDP |
446                                              IBV_RX_HASH_DST_PORT_UDP));
447         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
448         flow_verbs_spec_add(dev_flow, &udp, size);
449 }
450
451 /**
452  * Convert the @p item into a Verbs specification. This function assumes that
453  * the input is valid and that there is space to insert the requested item
454  * into the flow.
455  *
456  * @param[in] item
457  *   Item specification.
458  * @param[in, out] item_flags
459  *   Bit mask that marks all detected items.
460  * @param[in, out] dev_flow
461  *   Pointer to sepacific flow structure.
462  */
463 static void
464 flow_verbs_translate_item_tcp(const struct rte_flow_item *item,
465                               uint64_t *item_flags,
466                               struct mlx5_flow *dev_flow)
467 {
468         const struct rte_flow_item_tcp *spec = item->spec;
469         const struct rte_flow_item_tcp *mask = item->mask;
470         const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
471         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
472         struct ibv_flow_spec_tcp_udp tcp = {
473                 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
474                 .size = size,
475         };
476
477         if (!mask)
478                 mask = &rte_flow_item_tcp_mask;
479         *item_flags |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
480                                  MLX5_FLOW_LAYER_OUTER_L4_TCP;
481         if (spec) {
482                 tcp.val.dst_port = spec->hdr.dst_port;
483                 tcp.val.src_port = spec->hdr.src_port;
484                 tcp.mask.dst_port = mask->hdr.dst_port;
485                 tcp.mask.src_port = mask->hdr.src_port;
486                 /* Remove unwanted bits from values. */
487                 tcp.val.src_port &= tcp.mask.src_port;
488                 tcp.val.dst_port &= tcp.mask.dst_port;
489         }
490         dev_flow->verbs.hash_fields |=
491                 mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_TCP,
492                                             (IBV_RX_HASH_SRC_PORT_TCP |
493                                              IBV_RX_HASH_DST_PORT_TCP));
494         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
495         flow_verbs_spec_add(dev_flow, &tcp, size);
496 }
497
498 /**
499  * Convert the @p item into a Verbs specification. This function assumes that
500  * the input is valid and that there is space to insert the requested item
501  * into the flow.
502  *
503  * @param[in] item
504  *   Item specification.
505  * @param[in, out] item_flags
506  *   Bit mask that marks all detected items.
507  * @param[in, out] dev_flow
508  *   Pointer to sepacific flow structure.
509  */
510 static void
511 flow_verbs_translate_item_vxlan(const struct rte_flow_item *item,
512                                 uint64_t *item_flags,
513                                 struct mlx5_flow *dev_flow)
514 {
515         const struct rte_flow_item_vxlan *spec = item->spec;
516         const struct rte_flow_item_vxlan *mask = item->mask;
517         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
518         struct ibv_flow_spec_tunnel vxlan = {
519                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
520                 .size = size,
521         };
522         union vni {
523                 uint32_t vlan_id;
524                 uint8_t vni[4];
525         } id = { .vlan_id = 0, };
526
527         if (!mask)
528                 mask = &rte_flow_item_vxlan_mask;
529         if (spec) {
530                 memcpy(&id.vni[1], spec->vni, 3);
531                 vxlan.val.tunnel_id = id.vlan_id;
532                 memcpy(&id.vni[1], mask->vni, 3);
533                 vxlan.mask.tunnel_id = id.vlan_id;
534                 /* Remove unwanted bits from values. */
535                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
536         }
537         flow_verbs_spec_add(dev_flow, &vxlan, size);
538         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
539         *item_flags |= MLX5_FLOW_LAYER_VXLAN;
540 }
541
542 /**
543  * Convert the @p item into a Verbs specification. This function assumes that
544  * the input is valid and that there is space to insert the requested item
545  * into the flow.
546  *
547  * @param[in] item
548  *   Item specification.
549  * @param[in, out] item_flags
550  *   Bit mask that marks all detected items.
551  * @param[in, out] dev_flow
552  *   Pointer to sepacific flow structure.
553  */
554 static void
555 flow_verbs_translate_item_vxlan_gpe(const struct rte_flow_item *item,
556                                     uint64_t *item_flags,
557                                     struct mlx5_flow *dev_flow)
558 {
559         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
560         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
561         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
562         struct ibv_flow_spec_tunnel vxlan_gpe = {
563                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
564                 .size = size,
565         };
566         union vni {
567                 uint32_t vlan_id;
568                 uint8_t vni[4];
569         } id = { .vlan_id = 0, };
570
571         if (!mask)
572                 mask = &rte_flow_item_vxlan_gpe_mask;
573         if (spec) {
574                 memcpy(&id.vni[1], spec->vni, 3);
575                 vxlan_gpe.val.tunnel_id = id.vlan_id;
576                 memcpy(&id.vni[1], mask->vni, 3);
577                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
578                 /* Remove unwanted bits from values. */
579                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
580         }
581         flow_verbs_spec_add(dev_flow, &vxlan_gpe, size);
582         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
583         *item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
584 }
585
586 /**
587  * Update the protocol in Verbs IPv4/IPv6 spec.
588  *
589  * @param[in, out] attr
590  *   Pointer to Verbs attributes structure.
591  * @param[in] search
592  *   Specification type to search in order to update the IP protocol.
593  * @param[in] protocol
594  *   Protocol value to set if none is present in the specification.
595  */
596 static void
597 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
598                                        enum ibv_flow_spec_type search,
599                                        uint8_t protocol)
600 {
601         unsigned int i;
602         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
603                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
604
605         if (!attr)
606                 return;
607         for (i = 0; i != attr->num_of_specs; ++i) {
608                 if (hdr->type == search) {
609                         union {
610                                 struct ibv_flow_spec_ipv4_ext *ipv4;
611                                 struct ibv_flow_spec_ipv6 *ipv6;
612                         } ip;
613
614                         switch (search) {
615                         case IBV_FLOW_SPEC_IPV4_EXT:
616                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
617                                 if (!ip.ipv4->val.proto) {
618                                         ip.ipv4->val.proto = protocol;
619                                         ip.ipv4->mask.proto = 0xff;
620                                 }
621                                 break;
622                         case IBV_FLOW_SPEC_IPV6:
623                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
624                                 if (!ip.ipv6->val.next_hdr) {
625                                         ip.ipv6->val.next_hdr = protocol;
626                                         ip.ipv6->mask.next_hdr = 0xff;
627                                 }
628                                 break;
629                         default:
630                                 break;
631                         }
632                         break;
633                 }
634                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
635         }
636 }
637
638 /**
639  * Convert the @p item into a Verbs specification. This function assumes that
640  * the input is valid and that there is space to insert the requested item
641  * into the flow.
642  *
643  * @param[in] item
644  *   Item specification.
645  * @param[in, out] item_flags
646  *   Bit mask that marks all detected items.
647  * @param[in, out] dev_flow
648  *   Pointer to sepacific flow structure.
649  */
650 static void
651 flow_verbs_translate_item_gre(const struct rte_flow_item *item __rte_unused,
652                               uint64_t *item_flags,
653                               struct mlx5_flow *dev_flow)
654 {
655         struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
656 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
657         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
658         struct ibv_flow_spec_tunnel tunnel = {
659                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
660                 .size = size,
661         };
662 #else
663         const struct rte_flow_item_gre *spec = item->spec;
664         const struct rte_flow_item_gre *mask = item->mask;
665         unsigned int size = sizeof(struct ibv_flow_spec_gre);
666         struct ibv_flow_spec_gre tunnel = {
667                 .type = IBV_FLOW_SPEC_GRE,
668                 .size = size,
669         };
670
671         if (!mask)
672                 mask = &rte_flow_item_gre_mask;
673         if (spec) {
674                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
675                 tunnel.val.protocol = spec->protocol;
676                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
677                 tunnel.mask.protocol = mask->protocol;
678                 /* Remove unwanted bits from values. */
679                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
680                 tunnel.val.protocol &= tunnel.mask.protocol;
681                 tunnel.val.key &= tunnel.mask.key;
682         }
683 #endif
684         if (*item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
685                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
686                                                        IBV_FLOW_SPEC_IPV4_EXT,
687                                                        IPPROTO_GRE);
688         else
689                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
690                                                        IBV_FLOW_SPEC_IPV6,
691                                                        IPPROTO_GRE);
692         flow_verbs_spec_add(dev_flow, &tunnel, size);
693         verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
694         *item_flags |= MLX5_FLOW_LAYER_GRE;
695 }
696
697 /**
698  * Convert the @p action into a Verbs specification. This function assumes that
699  * the input is valid and that there is space to insert the requested action
700  * into the flow. This function also return the action that was added.
701  *
702  * @param[in] item
703  *   Item specification.
704  * @param[in, out] item_flags
705  *   Bit mask that marks all detected items.
706  * @param[in, out] dev_flow
707  *   Pointer to sepacific flow structure.
708  */
709 static void
710 flow_verbs_translate_item_mpls(const struct rte_flow_item *item __rte_unused,
711                                uint64_t *action_flags __rte_unused,
712                                struct mlx5_flow *dev_flow __rte_unused)
713 {
714 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
715         const struct rte_flow_item_mpls *spec = item->spec;
716         const struct rte_flow_item_mpls *mask = item->mask;
717         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
718         struct ibv_flow_spec_mpls mpls = {
719                 .type = IBV_FLOW_SPEC_MPLS,
720                 .size = size,
721         };
722
723         if (!mask)
724                 mask = &rte_flow_item_mpls_mask;
725         if (spec) {
726                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
727                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
728                 /* Remove unwanted bits from values.  */
729                 mpls.val.label &= mpls.mask.label;
730         }
731         flow_verbs_spec_add(dev_flow, &mpls, size);
732         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
733         *action_flags |= MLX5_FLOW_LAYER_MPLS;
734 #endif
735 }
736
737 /**
738  * Convert the @p action into a Verbs specification. This function assumes that
739  * the input is valid and that there is space to insert the requested action
740  * into the flow. This function also return the action that was added.
741  *
742  * @param[in, out] action_flags
743  *   Pointer to the detected actions.
744  * @param[in] dev_flow
745  *   Pointer to mlx5_flow.
746  */
747 static void
748 flow_verbs_translate_action_drop(uint64_t *action_flags,
749                                  struct mlx5_flow *dev_flow)
750 {
751         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
752         struct ibv_flow_spec_action_drop drop = {
753                         .type = IBV_FLOW_SPEC_ACTION_DROP,
754                         .size = size,
755         };
756
757         flow_verbs_spec_add(dev_flow, &drop, size);
758         *action_flags |= MLX5_FLOW_ACTION_DROP;
759 }
760
761 /**
762  * Convert the @p action into a Verbs specification. This function assumes that
763  * the input is valid and that there is space to insert the requested action
764  * into the flow. This function also return the action that was added.
765  *
766  * @param[in] action
767  *   Action configuration.
768  * @param[in, out] action_flags
769  *   Pointer to the detected actions.
770  * @param[in] dev_flow
771  *   Pointer to mlx5_flow.
772  */
773 static void
774 flow_verbs_translate_action_queue(const struct rte_flow_action *action,
775                                   uint64_t *action_flags,
776                                   struct mlx5_flow *dev_flow)
777 {
778         const struct rte_flow_action_queue *queue = action->conf;
779         struct rte_flow *flow = dev_flow->flow;
780
781         if (flow->queue)
782                 (*flow->queue)[0] = queue->index;
783         flow->rss.queue_num = 1;
784         *action_flags |= MLX5_FLOW_ACTION_QUEUE;
785 }
786
787 /**
788  * Convert the @p action into a Verbs specification. This function assumes that
789  * the input is valid and that there is space to insert the requested action
790  * into the flow. This function also return the action that was added.
791  *
792  * @param[in] action
793  *   Action configuration.
794  * @param[in, out] action_flags
795  *   Pointer to the detected actions.
796  * @param[in] dev_flow
797  *   Pointer to mlx5_flow.
798  */
799 static void
800 flow_verbs_translate_action_rss(const struct rte_flow_action *action,
801                                 uint64_t *action_flags,
802                                 struct mlx5_flow *dev_flow)
803 {
804         const struct rte_flow_action_rss *rss = action->conf;
805         struct rte_flow *flow = dev_flow->flow;
806
807         if (flow->queue)
808                 memcpy((*flow->queue), rss->queue,
809                        rss->queue_num * sizeof(uint16_t));
810         flow->rss.queue_num = rss->queue_num;
811         memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
812         flow->rss.types = rss->types;
813         flow->rss.level = rss->level;
814         *action_flags |= MLX5_FLOW_ACTION_RSS;
815 }
816
817 /**
818  * Convert the @p action into a Verbs specification. This function assumes that
819  * the input is valid and that there is space to insert the requested action
820  * into the flow. This function also return the action that was added.
821  *
822  * @param[in] action
823  *   Action configuration.
824  * @param[in, out] action_flags
825  *   Pointer to the detected actions.
826  * @param[in] dev_flow
827  *   Pointer to mlx5_flow.
828  */
829 static void
830 flow_verbs_translate_action_flag
831                         (const struct rte_flow_action *action __rte_unused,
832                          uint64_t *action_flags,
833                          struct mlx5_flow *dev_flow)
834 {
835         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
836         struct ibv_flow_spec_action_tag tag = {
837                 .type = IBV_FLOW_SPEC_ACTION_TAG,
838                 .size = size,
839                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
840         };
841         *action_flags |= MLX5_FLOW_ACTION_MARK;
842         flow_verbs_spec_add(dev_flow, &tag, size);
843 }
844
845 /**
846  * Update verbs specification to modify the flag to mark.
847  *
848  * @param[in, out] verbs
849  *   Pointer to the mlx5_flow_verbs structure.
850  * @param[in] mark_id
851  *   Mark identifier to replace the flag.
852  */
853 static void
854 flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
855 {
856         struct ibv_spec_header *hdr;
857         int i;
858
859         if (!verbs)
860                 return;
861         /* Update Verbs specification. */
862         hdr = (struct ibv_spec_header *)verbs->specs;
863         if (!hdr)
864                 return;
865         for (i = 0; i != verbs->attr->num_of_specs; ++i) {
866                 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
867                         struct ibv_flow_spec_action_tag *t =
868                                 (struct ibv_flow_spec_action_tag *)hdr;
869
870                         t->tag_id = mlx5_flow_mark_set(mark_id);
871                 }
872                 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
873         }
874 }
875
876 /**
877  * Convert the @p action into a Verbs specification. This function assumes that
878  * the input is valid and that there is space to insert the requested action
879  * into the flow. This function also return the action that was added.
880  *
881  * @param[in] action
882  *   Action configuration.
883  * @param[in, out] action_flags
884  *   Pointer to the detected actions.
885  * @param[in] dev_flow
886  *   Pointer to mlx5_flow.
887  */
888 static void
889 flow_verbs_translate_action_mark(const struct rte_flow_action *action,
890                                  uint64_t *action_flags,
891                                  struct mlx5_flow *dev_flow)
892 {
893         const struct rte_flow_action_mark *mark = action->conf;
894         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
895         struct ibv_flow_spec_action_tag tag = {
896                 .type = IBV_FLOW_SPEC_ACTION_TAG,
897                 .size = size,
898         };
899         struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
900
901         if (*action_flags & MLX5_FLOW_ACTION_FLAG) {
902                 flow_verbs_mark_update(verbs, mark->id);
903                 size = 0;
904         } else {
905                 tag.tag_id = mlx5_flow_mark_set(mark->id);
906                 flow_verbs_spec_add(dev_flow, &tag, size);
907         }
908         *action_flags |= MLX5_FLOW_ACTION_MARK;
909 }
910
911 /**
912  * Convert the @p action into a Verbs specification. This function assumes that
913  * the input is valid and that there is space to insert the requested action
914  * into the flow. This function also return the action that was added.
915  *
916  * @param[in] dev
917  *   Pointer to the Ethernet device structure.
918  * @param[in] action
919  *   Action configuration.
920  * @param[in, out] action_flags
921  *   Pointer to the detected actions.
922  * @param[in] dev_flow
923  *   Pointer to mlx5_flow.
924  * @param[out] error
925  *   Pointer to error structure.
926  *
927  * @return
928  *   0 On success else a negative errno value is returned and rte_errno is set.
929  */
930 static int
931 flow_verbs_translate_action_count(struct rte_eth_dev *dev,
932                                   const struct rte_flow_action *action,
933                                   uint64_t *action_flags,
934                                   struct mlx5_flow *dev_flow,
935                                   struct rte_flow_error *error)
936 {
937         const struct rte_flow_action_count *count = action->conf;
938         struct rte_flow *flow = dev_flow->flow;
939 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
940         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
941         struct ibv_flow_spec_counter_action counter = {
942                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
943                 .size = size,
944         };
945 #endif
946
947         if (!flow->counter) {
948                 flow->counter = flow_verbs_counter_new(dev, count->shared,
949                                                        count->id);
950                 if (!flow->counter)
951                         return rte_flow_error_set(error, rte_errno,
952                                                   RTE_FLOW_ERROR_TYPE_ACTION,
953                                                   action,
954                                                   "cannot get counter"
955                                                   " context.");
956         }
957         *action_flags |= MLX5_FLOW_ACTION_COUNT;
958 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
959         counter.counter_set_handle = flow->counter->cs->handle;
960         flow_verbs_spec_add(dev_flow, &counter, size);
961 #endif
962         return 0;
963 }
964
965 /**
966  * Internal validation function. For validating both actions and items.
967  *
968  * @param[in] dev
969  *   Pointer to the Ethernet device structure.
970  * @param[in] attr
971  *   Pointer to the flow attributes.
972  * @param[in] items
973  *   Pointer to the list of items.
974  * @param[in] actions
975  *   Pointer to the list of actions.
976  * @param[out] error
977  *   Pointer to the error structure.
978  *
979  * @return
980  *   0 on success, a negative errno value otherwise and rte_errno is set.
981  */
982 static int
983 flow_verbs_validate(struct rte_eth_dev *dev,
984                     const struct rte_flow_attr *attr,
985                     const struct rte_flow_item items[],
986                     const struct rte_flow_action actions[],
987                     struct rte_flow_error *error)
988 {
989         int ret;
990         uint32_t action_flags = 0;
991         uint32_t item_flags = 0;
992         int tunnel = 0;
993         uint8_t next_protocol = 0xff;
994
995         if (items == NULL)
996                 return -1;
997         ret = mlx5_flow_validate_attributes(dev, attr, error);
998         if (ret < 0)
999                 return ret;
1000         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1001                 int ret = 0;
1002                 switch (items->type) {
1003                 case RTE_FLOW_ITEM_TYPE_VOID:
1004                         break;
1005                 case RTE_FLOW_ITEM_TYPE_ETH:
1006                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1007                                                           error);
1008                         if (ret < 0)
1009                                 return ret;
1010                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1011                                                MLX5_FLOW_LAYER_OUTER_L2;
1012                         break;
1013                 case RTE_FLOW_ITEM_TYPE_VLAN:
1014                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1015                                                            error);
1016                         if (ret < 0)
1017                                 return ret;
1018                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1019                                                MLX5_FLOW_LAYER_OUTER_VLAN;
1020                         break;
1021                 case RTE_FLOW_ITEM_TYPE_IPV4:
1022                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1023                                                            error);
1024                         if (ret < 0)
1025                                 return ret;
1026                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1027                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1028                         if (items->mask != NULL &&
1029                             ((const struct rte_flow_item_ipv4 *)
1030                              items->mask)->hdr.next_proto_id)
1031                                 next_protocol =
1032                                         ((const struct rte_flow_item_ipv4 *)
1033                                          (items->spec))->hdr.next_proto_id;
1034                         break;
1035                 case RTE_FLOW_ITEM_TYPE_IPV6:
1036                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1037                                                            error);
1038                         if (ret < 0)
1039                                 return ret;
1040                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1041                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1042                         if (items->mask != NULL &&
1043                             ((const struct rte_flow_item_ipv6 *)
1044                              items->mask)->hdr.proto)
1045                                 next_protocol =
1046                                         ((const struct rte_flow_item_ipv6 *)
1047                                          items->spec)->hdr.proto;
1048                         break;
1049                 case RTE_FLOW_ITEM_TYPE_UDP:
1050                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1051                                                           next_protocol,
1052                                                           error);
1053                         if (ret < 0)
1054                                 return ret;
1055                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1056                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1057                         break;
1058                 case RTE_FLOW_ITEM_TYPE_TCP:
1059                         ret = mlx5_flow_validate_item_tcp
1060                                                 (items, item_flags,
1061                                                  next_protocol,
1062                                                  &rte_flow_item_tcp_mask,
1063                                                  error);
1064                         if (ret < 0)
1065                                 return ret;
1066                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1067                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1068                         break;
1069                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1070                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1071                                                             error);
1072                         if (ret < 0)
1073                                 return ret;
1074                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1075                         break;
1076                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1077                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
1078                                                                 item_flags,
1079                                                                 dev, error);
1080                         if (ret < 0)
1081                                 return ret;
1082                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1083                         break;
1084                 case RTE_FLOW_ITEM_TYPE_GRE:
1085                         ret = mlx5_flow_validate_item_gre(items, item_flags,
1086                                                           next_protocol, error);
1087                         if (ret < 0)
1088                                 return ret;
1089                         item_flags |= MLX5_FLOW_LAYER_GRE;
1090                         break;
1091                 case RTE_FLOW_ITEM_TYPE_MPLS:
1092                         ret = mlx5_flow_validate_item_mpls(items, item_flags,
1093                                                            next_protocol,
1094                                                            error);
1095                         if (ret < 0)
1096                                 return ret;
1097                         if (next_protocol != 0xff &&
1098                             next_protocol != IPPROTO_MPLS)
1099                                 return rte_flow_error_set
1100                                         (error, EINVAL,
1101                                          RTE_FLOW_ERROR_TYPE_ITEM, items,
1102                                          "protocol filtering not compatible"
1103                                          " with MPLS layer");
1104                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1105                         break;
1106                 default:
1107                         return rte_flow_error_set(error, ENOTSUP,
1108                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1109                                                   NULL, "item not supported");
1110                 }
1111         }
1112         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1113                 tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1114                 switch (actions->type) {
1115                 case RTE_FLOW_ACTION_TYPE_VOID:
1116                         break;
1117                 case RTE_FLOW_ACTION_TYPE_FLAG:
1118                         ret = mlx5_flow_validate_action_flag(action_flags,
1119                                                              attr,
1120                                                              error);
1121                         if (ret < 0)
1122                                 return ret;
1123                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1124                         break;
1125                 case RTE_FLOW_ACTION_TYPE_MARK:
1126                         ret = mlx5_flow_validate_action_mark(actions,
1127                                                              action_flags,
1128                                                              attr,
1129                                                              error);
1130                         if (ret < 0)
1131                                 return ret;
1132                         action_flags |= MLX5_FLOW_ACTION_MARK;
1133                         break;
1134                 case RTE_FLOW_ACTION_TYPE_DROP:
1135                         ret = mlx5_flow_validate_action_drop(action_flags,
1136                                                              attr,
1137                                                              error);
1138                         if (ret < 0)
1139                                 return ret;
1140                         action_flags |= MLX5_FLOW_ACTION_DROP;
1141                         break;
1142                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1143                         ret = mlx5_flow_validate_action_queue(actions,
1144                                                               action_flags, dev,
1145                                                               attr,
1146                                                               error);
1147                         if (ret < 0)
1148                                 return ret;
1149                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1150                         break;
1151                 case RTE_FLOW_ACTION_TYPE_RSS:
1152                         ret = mlx5_flow_validate_action_rss(actions,
1153                                                             action_flags, dev,
1154                                                             attr,
1155                                                             error);
1156                         if (ret < 0)
1157                                 return ret;
1158                         action_flags |= MLX5_FLOW_ACTION_RSS;
1159                         break;
1160                 case RTE_FLOW_ACTION_TYPE_COUNT:
1161                         ret = mlx5_flow_validate_action_count(dev, attr, error);
1162                         if (ret < 0)
1163                                 return ret;
1164                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1165                         break;
1166                 default:
1167                         return rte_flow_error_set(error, ENOTSUP,
1168                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1169                                                   actions,
1170                                                   "action not supported");
1171                 }
1172         }
1173         if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1174                 return rte_flow_error_set(error, EINVAL,
1175                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1176                                           "no fate action is found");
1177         return 0;
1178 }
1179
1180 /**
1181  * Calculate the required bytes that are needed for the action part of the verbs
1182  * flow, in addtion returns bit-fields with all the detected action, in order to
1183  * avoid another interation over the actions.
1184  *
1185  * @param[in] actions
1186  *   Pointer to the list of actions.
1187  * @param[out] action_flags
1188  *   Pointer to the detected actions.
1189  *
1190  * @return
1191  *   The size of the memory needed for all actions.
1192  */
1193 static int
1194 flow_verbs_get_actions_and_size(const struct rte_flow_action actions[],
1195                                 uint64_t *action_flags)
1196 {
1197         int size = 0;
1198         uint64_t detected_actions = 0;
1199
1200         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1201                 switch (actions->type) {
1202                 case RTE_FLOW_ACTION_TYPE_VOID:
1203                         break;
1204                 case RTE_FLOW_ACTION_TYPE_FLAG:
1205                         size += sizeof(struct ibv_flow_spec_action_tag);
1206                         detected_actions |= MLX5_FLOW_ACTION_FLAG;
1207                         break;
1208                 case RTE_FLOW_ACTION_TYPE_MARK:
1209                         size += sizeof(struct ibv_flow_spec_action_tag);
1210                         detected_actions |= MLX5_FLOW_ACTION_MARK;
1211                         break;
1212                 case RTE_FLOW_ACTION_TYPE_DROP:
1213                         size += sizeof(struct ibv_flow_spec_action_drop);
1214                         detected_actions |= MLX5_FLOW_ACTION_DROP;
1215                         break;
1216                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1217                         detected_actions |= MLX5_FLOW_ACTION_QUEUE;
1218                         break;
1219                 case RTE_FLOW_ACTION_TYPE_RSS:
1220                         detected_actions |= MLX5_FLOW_ACTION_RSS;
1221                         break;
1222                 case RTE_FLOW_ACTION_TYPE_COUNT:
1223 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1224                         size += sizeof(struct ibv_flow_spec_counter_action);
1225 #endif
1226                         detected_actions |= MLX5_FLOW_ACTION_COUNT;
1227                         break;
1228                 default:
1229                         break;
1230                 }
1231         }
1232         *action_flags = detected_actions;
1233         return size;
1234 }
1235
1236 /**
1237  * Calculate the required bytes that are needed for the item part of the verbs
1238  * flow, in addtion returns bit-fields with all the detected action, in order to
1239  * avoid another interation over the actions.
1240  *
1241  * @param[in] actions
1242  *   Pointer to the list of items.
1243  * @param[in, out] item_flags
1244  *   Pointer to the detected items.
1245  *
1246  * @return
1247  *   The size of the memory needed for all items.
1248  */
1249 static int
1250 flow_verbs_get_items_and_size(const struct rte_flow_item items[],
1251                               uint64_t *item_flags)
1252 {
1253         int size = 0;
1254         uint64_t detected_items = 0;
1255         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
1256
1257         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1258                 switch (items->type) {
1259                 case RTE_FLOW_ITEM_TYPE_VOID:
1260                         break;
1261                 case RTE_FLOW_ITEM_TYPE_ETH:
1262                         size += sizeof(struct ibv_flow_spec_eth);
1263                         detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1264                                                    MLX5_FLOW_LAYER_OUTER_L2;
1265                         break;
1266                 case RTE_FLOW_ITEM_TYPE_VLAN:
1267                         size += sizeof(struct ibv_flow_spec_eth);
1268                         detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1269                                                    MLX5_FLOW_LAYER_OUTER_VLAN;
1270                         break;
1271                 case RTE_FLOW_ITEM_TYPE_IPV4:
1272                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
1273                         detected_items |= tunnel ?
1274                                           MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1275                                           MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1276                         break;
1277                 case RTE_FLOW_ITEM_TYPE_IPV6:
1278                         size += sizeof(struct ibv_flow_spec_ipv6);
1279                         detected_items |= tunnel ?
1280                                           MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1281                                           MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1282                         break;
1283                 case RTE_FLOW_ITEM_TYPE_UDP:
1284                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1285                         detected_items |= tunnel ?
1286                                           MLX5_FLOW_LAYER_INNER_L4_UDP :
1287                                           MLX5_FLOW_LAYER_OUTER_L4_UDP;
1288                         break;
1289                 case RTE_FLOW_ITEM_TYPE_TCP:
1290                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1291                         detected_items |= tunnel ?
1292                                           MLX5_FLOW_LAYER_INNER_L4_TCP :
1293                                           MLX5_FLOW_LAYER_OUTER_L4_TCP;
1294                         break;
1295                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1296                         size += sizeof(struct ibv_flow_spec_tunnel);
1297                         detected_items |= MLX5_FLOW_LAYER_VXLAN;
1298                         break;
1299                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1300                         size += sizeof(struct ibv_flow_spec_tunnel);
1301                         detected_items |= MLX5_FLOW_LAYER_VXLAN_GPE;
1302                         break;
1303 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1304                 case RTE_FLOW_ITEM_TYPE_GRE:
1305                         size += sizeof(struct ibv_flow_spec_gre);
1306                         detected_items |= MLX5_FLOW_LAYER_GRE;
1307                         break;
1308                 case RTE_FLOW_ITEM_TYPE_MPLS:
1309                         size += sizeof(struct ibv_flow_spec_mpls);
1310                         detected_items |= MLX5_FLOW_LAYER_MPLS;
1311                         break;
1312 #else
1313                 case RTE_FLOW_ITEM_TYPE_GRE:
1314                         size += sizeof(struct ibv_flow_spec_tunnel);
1315                         detected_items |= MLX5_FLOW_LAYER_TUNNEL;
1316                         break;
1317 #endif
1318                 default:
1319                         break;
1320                 }
1321         }
1322         *item_flags = detected_items;
1323         return size;
1324 }
1325
1326 /**
1327  * Internal preparation function. Allocate mlx5_flow with the required size.
1328  * The required size is calculate based on the actions and items. This function
1329  * also returns the detected actions and items for later use.
1330  *
1331  * @param[in] attr
1332  *   Pointer to the flow attributes.
1333  * @param[in] items
1334  *   Pointer to the list of items.
1335  * @param[in] actions
1336  *   Pointer to the list of actions.
1337  * @param[out] item_flags
1338  *   Pointer to bit mask of all items detected.
1339  * @param[out] action_flags
1340  *   Pointer to bit mask of all actions detected.
1341  * @param[out] error
1342  *   Pointer to the error structure.
1343  *
1344  * @return
1345  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1346  *   is set.
1347  */
1348 static struct mlx5_flow *
1349 flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
1350                    const struct rte_flow_item items[],
1351                    const struct rte_flow_action actions[],
1352                    uint64_t *item_flags,
1353                    uint64_t *action_flags,
1354                    struct rte_flow_error *error)
1355 {
1356         uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
1357         struct mlx5_flow *flow;
1358
1359         size += flow_verbs_get_actions_and_size(actions, action_flags);
1360         size += flow_verbs_get_items_and_size(items, item_flags);
1361         flow = rte_calloc(__func__, 1, size, 0);
1362         if (!flow) {
1363                 rte_flow_error_set(error, ENOMEM,
1364                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1365                                    "not enough memory to create flow");
1366                 return NULL;
1367         }
1368         flow->verbs.attr = (void *)(flow + 1);
1369         flow->verbs.specs =
1370                 (uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
1371         return flow;
1372 }
1373
1374 /**
1375  * Fill the flow with verb spec.
1376  *
1377  * @param[in] dev
1378  *   Pointer to Ethernet device.
1379  * @param[in, out] dev_flow
1380  *   Pointer to the mlx5 flow.
1381  * @param[in] attr
1382  *   Pointer to the flow attributes.
1383  * @param[in] items
1384  *   Pointer to the list of items.
1385  * @param[in] actions
1386  *   Pointer to the list of actions.
1387  * @param[out] error
1388  *   Pointer to the error structure.
1389  *
1390  * @return
1391  *   0 on success, else a negative errno value otherwise and rte_ernno is set.
1392  */
1393 static int
1394 flow_verbs_translate(struct rte_eth_dev *dev,
1395                      struct mlx5_flow *dev_flow,
1396                      const struct rte_flow_attr *attr,
1397                      const struct rte_flow_item items[],
1398                      const struct rte_flow_action actions[],
1399                      struct rte_flow_error *error)
1400 {
1401         uint64_t action_flags = 0;
1402         uint64_t item_flags = 0;
1403         uint64_t priority = attr->priority;
1404         struct priv *priv = dev->data->dev_private;
1405
1406         if (priority == MLX5_FLOW_PRIO_RSVD)
1407                 priority = priv->config.flow_prio - 1;
1408         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1409                 int ret;
1410                 switch (actions->type) {
1411                 case RTE_FLOW_ACTION_TYPE_VOID:
1412                         break;
1413                 case RTE_FLOW_ACTION_TYPE_FLAG:
1414                         flow_verbs_translate_action_flag(actions,
1415                                                          &action_flags,
1416                                                          dev_flow);
1417                         break;
1418                 case RTE_FLOW_ACTION_TYPE_MARK:
1419                         flow_verbs_translate_action_mark(actions,
1420                                                          &action_flags,
1421                                                          dev_flow);
1422                         break;
1423                 case RTE_FLOW_ACTION_TYPE_DROP:
1424                         flow_verbs_translate_action_drop(&action_flags,
1425                                                          dev_flow);
1426                         break;
1427                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1428                         flow_verbs_translate_action_queue(actions,
1429                                                           &action_flags,
1430                                                           dev_flow);
1431                         break;
1432                 case RTE_FLOW_ACTION_TYPE_RSS:
1433                         flow_verbs_translate_action_rss(actions,
1434                                                         &action_flags,
1435                                                         dev_flow);
1436                         break;
1437                 case RTE_FLOW_ACTION_TYPE_COUNT:
1438                         ret = flow_verbs_translate_action_count(dev,
1439                                                                 actions,
1440                                                                 &action_flags,
1441                                                                 dev_flow,
1442                                                                 error);
1443                         if (ret < 0)
1444                                 return ret;
1445                         break;
1446                 default:
1447                         return rte_flow_error_set(error, ENOTSUP,
1448                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1449                                                   actions,
1450                                                   "action not supported");
1451                 }
1452         }
1453         dev_flow->flow->actions |= action_flags;
1454         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1455                 switch (items->type) {
1456                 case RTE_FLOW_ITEM_TYPE_VOID:
1457                         break;
1458                 case RTE_FLOW_ITEM_TYPE_ETH:
1459                         flow_verbs_translate_item_eth(items, &item_flags,
1460                                                       dev_flow);
1461                         break;
1462                 case RTE_FLOW_ITEM_TYPE_VLAN:
1463                         flow_verbs_translate_item_vlan(items, &item_flags,
1464                                                        dev_flow);
1465                         break;
1466                 case RTE_FLOW_ITEM_TYPE_IPV4:
1467                         flow_verbs_translate_item_ipv4(items, &item_flags,
1468                                                        dev_flow);
1469                         break;
1470                 case RTE_FLOW_ITEM_TYPE_IPV6:
1471                         flow_verbs_translate_item_ipv6(items, &item_flags,
1472                                                        dev_flow);
1473                         break;
1474                 case RTE_FLOW_ITEM_TYPE_UDP:
1475                         flow_verbs_translate_item_udp(items, &item_flags,
1476                                                       dev_flow);
1477                         break;
1478                 case RTE_FLOW_ITEM_TYPE_TCP:
1479                         flow_verbs_translate_item_tcp(items, &item_flags,
1480                                                       dev_flow);
1481                         break;
1482                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1483                         flow_verbs_translate_item_vxlan(items, &item_flags,
1484                                                         dev_flow);
1485                         break;
1486                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1487                         flow_verbs_translate_item_vxlan_gpe(items, &item_flags,
1488                                                             dev_flow);
1489                         break;
1490                 case RTE_FLOW_ITEM_TYPE_GRE:
1491                         flow_verbs_translate_item_gre(items, &item_flags,
1492                                                       dev_flow);
1493                         break;
1494                 case RTE_FLOW_ITEM_TYPE_MPLS:
1495                         flow_verbs_translate_item_mpls(items, &item_flags,
1496                                                        dev_flow);
1497                         break;
1498                 default:
1499                         return rte_flow_error_set(error, ENOTSUP,
1500                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1501                                                   NULL,
1502                                                   "item not supported");
1503                 }
1504         }
1505         dev_flow->verbs.attr->priority =
1506                 mlx5_flow_adjust_priority(dev, priority,
1507                                           dev_flow->verbs.attr->priority);
1508         return 0;
1509 }
1510
1511 /**
1512  * Remove the flow from the NIC but keeps it in memory.
1513  *
1514  * @param[in] dev
1515  *   Pointer to the Ethernet device structure.
1516  * @param[in, out] flow
1517  *   Pointer to flow structure.
1518  */
1519 static void
1520 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1521 {
1522         struct mlx5_flow_verbs *verbs;
1523         struct mlx5_flow *dev_flow;
1524
1525         if (!flow)
1526                 return;
1527         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1528                 verbs = &dev_flow->verbs;
1529                 if (verbs->flow) {
1530                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1531                         verbs->flow = NULL;
1532                 }
1533                 if (verbs->hrxq) {
1534                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1535                                 mlx5_hrxq_drop_release(dev);
1536                         else
1537                                 mlx5_hrxq_release(dev, verbs->hrxq);
1538                         verbs->hrxq = NULL;
1539                 }
1540         }
1541         if (flow->counter) {
1542                 flow_verbs_counter_release(flow->counter);
1543                 flow->counter = NULL;
1544         }
1545 }
1546
1547 /**
1548  * Remove the flow from the NIC and the memory.
1549  *
1550  * @param[in] dev
1551  *   Pointer to the Ethernet device structure.
1552  * @param[in, out] flow
1553  *   Pointer to flow structure.
1554  */
1555 static void
1556 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1557 {
1558         struct mlx5_flow *dev_flow;
1559
1560         if (!flow)
1561                 return;
1562         flow_verbs_remove(dev, flow);
1563         while (!LIST_EMPTY(&flow->dev_flows)) {
1564                 dev_flow = LIST_FIRST(&flow->dev_flows);
1565                 LIST_REMOVE(dev_flow, next);
1566                 rte_free(dev_flow);
1567         }
1568 }
1569
1570 /**
1571  * Apply the flow to the NIC.
1572  *
1573  * @param[in] dev
1574  *   Pointer to the Ethernet device structure.
1575  * @param[in, out] flow
1576  *   Pointer to flow structure.
1577  * @param[out] error
1578  *   Pointer to error structure.
1579  *
1580  * @return
1581  *   0 on success, a negative errno value otherwise and rte_errno is set.
1582  */
1583 static int
1584 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1585                  struct rte_flow_error *error)
1586 {
1587         struct mlx5_flow_verbs *verbs;
1588         struct mlx5_flow *dev_flow;
1589         int err;
1590
1591         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1592                 verbs = &dev_flow->verbs;
1593                 if (flow->actions & MLX5_FLOW_ACTION_DROP) {
1594                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
1595                         if (!verbs->hrxq) {
1596                                 rte_flow_error_set
1597                                         (error, errno,
1598                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1599                                          "cannot get drop hash queue");
1600                                 goto error;
1601                         }
1602                 } else {
1603                         struct mlx5_hrxq *hrxq;
1604
1605                         hrxq = mlx5_hrxq_get(dev, flow->key,
1606                                              MLX5_RSS_HASH_KEY_LEN,
1607                                              verbs->hash_fields,
1608                                              (*flow->queue),
1609                                              flow->rss.queue_num);
1610                         if (!hrxq)
1611                                 hrxq = mlx5_hrxq_new(dev, flow->key,
1612                                                      MLX5_RSS_HASH_KEY_LEN,
1613                                                      verbs->hash_fields,
1614                                                      (*flow->queue),
1615                                                      flow->rss.queue_num,
1616                                                      !!(flow->layers &
1617                                                       MLX5_FLOW_LAYER_TUNNEL));
1618                         if (!hrxq) {
1619                                 rte_flow_error_set
1620                                         (error, rte_errno,
1621                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1622                                          "cannot get hash queue");
1623                                 goto error;
1624                         }
1625                         verbs->hrxq = hrxq;
1626                 }
1627                 verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
1628                                                      verbs->attr);
1629                 if (!verbs->flow) {
1630                         rte_flow_error_set(error, errno,
1631                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1632                                            NULL,
1633                                            "hardware refuses to create flow");
1634                         goto error;
1635                 }
1636         }
1637         return 0;
1638 error:
1639         err = rte_errno; /* Save rte_errno before cleanup. */
1640         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1641                 verbs = &dev_flow->verbs;
1642                 if (verbs->hrxq) {
1643                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1644                                 mlx5_hrxq_drop_release(dev);
1645                         else
1646                                 mlx5_hrxq_release(dev, verbs->hrxq);
1647                         verbs->hrxq = NULL;
1648                 }
1649         }
1650         rte_errno = err; /* Restore rte_errno. */
1651         return -rte_errno;
1652 }
1653
1654 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1655         .validate = flow_verbs_validate,
1656         .prepare = flow_verbs_prepare,
1657         .translate = flow_verbs_translate,
1658         .apply = flow_verbs_apply,
1659         .remove = flow_verbs_remove,
1660         .destroy = flow_verbs_destroy,
1661 };