3d6fedb466e876920fe1039a3afe6682b416b6b4
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_eth_ctrl.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_prm.h"
33 #include "mlx5_glue.h"
34 #include "mlx5_flow.h"
35
36 /**
37  * Get a flow counter.
38  *
39  * @param[in] dev
40  *   Pointer to the Ethernet device structure.
41  * @param[in] shared
42  *   Indicate if this counter is shared with other flows.
43  * @param[in] id
44  *   Counter identifier.
45  *
46  * @return
47  *   A pointer to the counter, NULL otherwise and rte_errno is set.
48  */
49 static struct mlx5_flow_counter *
50 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
51 {
52         struct priv *priv = dev->data->dev_private;
53         struct mlx5_flow_counter *cnt;
54
55         LIST_FOREACH(cnt, &priv->flow_counters, next) {
56                 if (!cnt->shared || cnt->shared != shared)
57                         continue;
58                 if (cnt->id != id)
59                         continue;
60                 cnt->ref_cnt++;
61                 return cnt;
62         }
63 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_V42
64
65         struct mlx5_flow_counter tmpl = {
66                 .shared = shared,
67                 .id = id,
68                 .cs = mlx5_glue->create_counter_set
69                         (priv->ctx,
70                          &(struct ibv_counter_set_init_attr){
71                                  .counter_set_id = id,
72                          }),
73                 .hits = 0,
74                 .bytes = 0,
75                 .ref_cnt = 1,
76         };
77
78         if (!tmpl.cs) {
79                 rte_errno = errno;
80                 return NULL;
81         }
82         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
83         if (!cnt) {
84                 claim_zero(mlx5_glue->destroy_counter_set(tmpl.cs));
85                 rte_errno = ENOMEM;
86                 return NULL;
87         }
88         *cnt = tmpl;
89         LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
90         return cnt;
91 #endif
92         rte_errno = ENOTSUP;
93         return NULL;
94 }
95
96 /**
97  * Release a flow counter.
98  *
99  * @param[in] counter
100  *   Pointer to the counter handler.
101  */
102 static void
103 flow_verbs_counter_release(struct mlx5_flow_counter *counter)
104 {
105         if (--counter->ref_cnt == 0) {
106                 claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
107                 LIST_REMOVE(counter, next);
108                 rte_free(counter);
109         }
110 }
111
112 /**
113  * Add a verbs item specification into @p flow.
114  *
115  * @param[in, out] flow
116  *   Pointer to flow structure.
117  * @param[in] src
118  *   Create specification.
119  * @param[in] size
120  *   Size in bytes of the specification to copy.
121  */
122 static void
123 flow_verbs_spec_add(struct mlx5_flow *flow, void *src, unsigned int size)
124 {
125         struct mlx5_flow_verbs *verbs = &flow->verbs;
126
127         if (verbs->specs) {
128                 void *dst;
129
130                 dst = (void *)(verbs->specs + verbs->size);
131                 memcpy(dst, src, size);
132                 ++verbs->attr->num_of_specs;
133         }
134         verbs->size += size;
135 }
136
137 /**
138  * Convert the @p item into a Verbs specification. This function assumes that
139  * the input is valid and that there is space to insert the requested item
140  * into the flow.
141  *
142  * @param[in] item
143  *   Item specification.
144  * @param[in] item_flags
145  *   Bit field with all detected items.
146  * @param[in, out] dev_flow
147  *   Pointer to dev_flow structure.
148  */
149 static void
150 flow_verbs_translate_item_eth(const struct rte_flow_item *item,
151                               uint64_t *item_flags,
152                               struct mlx5_flow *dev_flow)
153 {
154         const struct rte_flow_item_eth *spec = item->spec;
155         const struct rte_flow_item_eth *mask = item->mask;
156         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
157         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
158         struct ibv_flow_spec_eth eth = {
159                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
160                 .size = size,
161         };
162
163         if (!mask)
164                 mask = &rte_flow_item_eth_mask;
165         if (spec) {
166                 unsigned int i;
167
168                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
169                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
170                 eth.val.ether_type = spec->type;
171                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
172                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
173                 eth.mask.ether_type = mask->type;
174                 /* Remove unwanted bits from values. */
175                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
176                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
177                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
178                 }
179                 eth.val.ether_type &= eth.mask.ether_type;
180                 dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
181         }
182         flow_verbs_spec_add(dev_flow, &eth, size);
183         *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
184                                 MLX5_FLOW_LAYER_OUTER_L2;
185 }
186
187 /**
188  * Update the VLAN tag in the Verbs Ethernet specification.
189  * This function assumes that the input is valid and there is space to add
190  * the requested item.
191  *
192  * @param[in, out] attr
193  *   Pointer to Verbs attributes structure.
194  * @param[in] eth
195  *   Verbs structure containing the VLAN information to copy.
196  */
197 static void
198 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
199                             struct ibv_flow_spec_eth *eth)
200 {
201         unsigned int i;
202         const enum ibv_flow_spec_type search = eth->type;
203         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
204                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
205
206         for (i = 0; i != attr->num_of_specs; ++i) {
207                 if (hdr->type == search) {
208                         struct ibv_flow_spec_eth *e =
209                                 (struct ibv_flow_spec_eth *)hdr;
210
211                         e->val.vlan_tag = eth->val.vlan_tag;
212                         e->mask.vlan_tag = eth->mask.vlan_tag;
213                         e->val.ether_type = eth->val.ether_type;
214                         e->mask.ether_type = eth->mask.ether_type;
215                         break;
216                 }
217                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
218         }
219 }
220
221 /**
222  * Convert the @p item into a Verbs specification. This function assumes that
223  * the input is valid and that there is space to insert the requested item
224  * into the flow.
225  *
226  * @param[in] item
227  *   Item specification.
228  * @param[in, out] item_flags
229  *   Bit mask that holds all detected items.
230  * @param[in, out] dev_flow
231  *   Pointer to dev_flow structure.
232  */
233 static void
234 flow_verbs_translate_item_vlan(const struct rte_flow_item *item,
235                                uint64_t *item_flags,
236                                struct mlx5_flow *dev_flow)
237 {
238         const struct rte_flow_item_vlan *spec = item->spec;
239         const struct rte_flow_item_vlan *mask = item->mask;
240         unsigned int size = sizeof(struct ibv_flow_spec_eth);
241         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
242         struct ibv_flow_spec_eth eth = {
243                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
244                 .size = size,
245         };
246         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
247                                       MLX5_FLOW_LAYER_OUTER_L2;
248
249         if (!mask)
250                 mask = &rte_flow_item_vlan_mask;
251         if (spec) {
252                 eth.val.vlan_tag = spec->tci;
253                 eth.mask.vlan_tag = mask->tci;
254                 eth.val.vlan_tag &= eth.mask.vlan_tag;
255                 eth.val.ether_type = spec->inner_type;
256                 eth.mask.ether_type = mask->inner_type;
257                 eth.val.ether_type &= eth.mask.ether_type;
258         }
259         if (!(*item_flags & l2m)) {
260                 dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
261                 flow_verbs_spec_add(dev_flow, &eth, size);
262         } else {
263                 flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
264                 size = 0; /* Only an update is done in eth specification. */
265         }
266         *item_flags |= tunnel ?
267                        (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
268                        (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
269 }
270
271 /**
272  * Convert the @p item into a Verbs specification. This function assumes that
273  * the input is valid and that there is space to insert the requested item
274  * into the flow.
275  *
276  * @param[in] item
277  *   Item specification.
278  * @param[in, out] item_flags
279  *   Bit mask that marks all detected items.
280  * @param[in, out] dev_flow
281  *   Pointer to sepacific flow structure.
282  */
283 static void
284 flow_verbs_translate_item_ipv4(const struct rte_flow_item *item,
285                                uint64_t *item_flags,
286                                struct mlx5_flow *dev_flow)
287 {
288         const struct rte_flow_item_ipv4 *spec = item->spec;
289         const struct rte_flow_item_ipv4 *mask = item->mask;
290         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
291         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
292         struct ibv_flow_spec_ipv4_ext ipv4 = {
293                 .type = IBV_FLOW_SPEC_IPV4_EXT |
294                         (tunnel ? IBV_FLOW_SPEC_INNER : 0),
295                 .size = size,
296         };
297
298         if (!mask)
299                 mask = &rte_flow_item_ipv4_mask;
300         *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
301                                 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
302         if (spec) {
303                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
304                         .src_ip = spec->hdr.src_addr,
305                         .dst_ip = spec->hdr.dst_addr,
306                         .proto = spec->hdr.next_proto_id,
307                         .tos = spec->hdr.type_of_service,
308                 };
309                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
310                         .src_ip = mask->hdr.src_addr,
311                         .dst_ip = mask->hdr.dst_addr,
312                         .proto = mask->hdr.next_proto_id,
313                         .tos = mask->hdr.type_of_service,
314                 };
315                 /* Remove unwanted bits from values. */
316                 ipv4.val.src_ip &= ipv4.mask.src_ip;
317                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
318                 ipv4.val.proto &= ipv4.mask.proto;
319                 ipv4.val.tos &= ipv4.mask.tos;
320         }
321         dev_flow->verbs.hash_fields |=
322                 mlx5_flow_hashfields_adjust(dev_flow, tunnel,
323                                             MLX5_IPV4_LAYER_TYPES,
324                                             MLX5_IPV4_IBV_RX_HASH);
325         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
326         flow_verbs_spec_add(dev_flow, &ipv4, size);
327 }
328
329 /**
330  * Convert the @p item into a Verbs specification. This function assumes that
331  * the input is valid and that there is space to insert the requested item
332  * into the flow.
333  *
334  * @param[in] item
335  *   Item specification.
336  * @param[in, out] item_flags
337  *   Bit mask that marks all detected items.
338  * @param[in, out] dev_flow
339  *   Pointer to sepacific flow structure.
340  */
341 static void
342 flow_verbs_translate_item_ipv6(const struct rte_flow_item *item,
343                                uint64_t *item_flags,
344                                struct mlx5_flow *dev_flow)
345 {
346         const struct rte_flow_item_ipv6 *spec = item->spec;
347         const struct rte_flow_item_ipv6 *mask = item->mask;
348         const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
349         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
350         struct ibv_flow_spec_ipv6 ipv6 = {
351                 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
352                 .size = size,
353         };
354
355         if (!mask)
356                 mask = &rte_flow_item_ipv6_mask;
357          *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
358                                  MLX5_FLOW_LAYER_OUTER_L3_IPV6;
359         if (spec) {
360                 unsigned int i;
361                 uint32_t vtc_flow_val;
362                 uint32_t vtc_flow_mask;
363
364                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
365                        RTE_DIM(ipv6.val.src_ip));
366                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
367                        RTE_DIM(ipv6.val.dst_ip));
368                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
369                        RTE_DIM(ipv6.mask.src_ip));
370                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
371                        RTE_DIM(ipv6.mask.dst_ip));
372                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
373                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
374                 ipv6.val.flow_label =
375                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
376                                          IPV6_HDR_FL_SHIFT);
377                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
378                                          IPV6_HDR_TC_SHIFT;
379                 ipv6.val.next_hdr = spec->hdr.proto;
380                 ipv6.val.hop_limit = spec->hdr.hop_limits;
381                 ipv6.mask.flow_label =
382                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
383                                          IPV6_HDR_FL_SHIFT);
384                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
385                                           IPV6_HDR_TC_SHIFT;
386                 ipv6.mask.next_hdr = mask->hdr.proto;
387                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
388                 /* Remove unwanted bits from values. */
389                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
390                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
391                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
392                 }
393                 ipv6.val.flow_label &= ipv6.mask.flow_label;
394                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
395                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
396                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
397         }
398         dev_flow->verbs.hash_fields |=
399                 mlx5_flow_hashfields_adjust(dev_flow, tunnel,
400                                             MLX5_IPV6_LAYER_TYPES,
401                                             MLX5_IPV6_IBV_RX_HASH);
402         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
403         flow_verbs_spec_add(dev_flow, &ipv6, size);
404 }
405
406 /**
407  * Convert the @p item into a Verbs specification. This function assumes that
408  * the input is valid and that there is space to insert the requested item
409  * into the flow.
410  *
411  * @param[in] item
412  *   Item specification.
413  * @param[in, out] item_flags
414  *   Bit mask that marks all detected items.
415  * @param[in, out] dev_flow
416  *   Pointer to sepacific flow structure.
417  */
418 static void
419 flow_verbs_translate_item_udp(const struct rte_flow_item *item,
420                               uint64_t *item_flags,
421                               struct mlx5_flow *dev_flow)
422 {
423         const struct rte_flow_item_udp *spec = item->spec;
424         const struct rte_flow_item_udp *mask = item->mask;
425         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
426         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
427         struct ibv_flow_spec_tcp_udp udp = {
428                 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
429                 .size = size,
430         };
431
432         if (!mask)
433                 mask = &rte_flow_item_udp_mask;
434         *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
435                                 MLX5_FLOW_LAYER_OUTER_L4_UDP;
436         if (spec) {
437                 udp.val.dst_port = spec->hdr.dst_port;
438                 udp.val.src_port = spec->hdr.src_port;
439                 udp.mask.dst_port = mask->hdr.dst_port;
440                 udp.mask.src_port = mask->hdr.src_port;
441                 /* Remove unwanted bits from values. */
442                 udp.val.src_port &= udp.mask.src_port;
443                 udp.val.dst_port &= udp.mask.dst_port;
444         }
445         dev_flow->verbs.hash_fields |=
446                 mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_UDP,
447                                             (IBV_RX_HASH_SRC_PORT_UDP |
448                                              IBV_RX_HASH_DST_PORT_UDP));
449         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
450         flow_verbs_spec_add(dev_flow, &udp, size);
451 }
452
453 /**
454  * Convert the @p item into a Verbs specification. This function assumes that
455  * the input is valid and that there is space to insert the requested item
456  * into the flow.
457  *
458  * @param[in] item
459  *   Item specification.
460  * @param[in, out] item_flags
461  *   Bit mask that marks all detected items.
462  * @param[in, out] dev_flow
463  *   Pointer to sepacific flow structure.
464  */
465 static void
466 flow_verbs_translate_item_tcp(const struct rte_flow_item *item,
467                               uint64_t *item_flags,
468                               struct mlx5_flow *dev_flow)
469 {
470         const struct rte_flow_item_tcp *spec = item->spec;
471         const struct rte_flow_item_tcp *mask = item->mask;
472         const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
473         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
474         struct ibv_flow_spec_tcp_udp tcp = {
475                 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
476                 .size = size,
477         };
478
479         if (!mask)
480                 mask = &rte_flow_item_tcp_mask;
481         *item_flags |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
482                                  MLX5_FLOW_LAYER_OUTER_L4_TCP;
483         if (spec) {
484                 tcp.val.dst_port = spec->hdr.dst_port;
485                 tcp.val.src_port = spec->hdr.src_port;
486                 tcp.mask.dst_port = mask->hdr.dst_port;
487                 tcp.mask.src_port = mask->hdr.src_port;
488                 /* Remove unwanted bits from values. */
489                 tcp.val.src_port &= tcp.mask.src_port;
490                 tcp.val.dst_port &= tcp.mask.dst_port;
491         }
492         dev_flow->verbs.hash_fields |=
493                 mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_TCP,
494                                             (IBV_RX_HASH_SRC_PORT_TCP |
495                                              IBV_RX_HASH_DST_PORT_TCP));
496         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
497         flow_verbs_spec_add(dev_flow, &tcp, size);
498 }
499
500 /**
501  * Convert the @p item into a Verbs specification. This function assumes that
502  * the input is valid and that there is space to insert the requested item
503  * into the flow.
504  *
505  * @param[in] item
506  *   Item specification.
507  * @param[in, out] item_flags
508  *   Bit mask that marks all detected items.
509  * @param[in, out] dev_flow
510  *   Pointer to sepacific flow structure.
511  */
512 static void
513 flow_verbs_translate_item_vxlan(const struct rte_flow_item *item,
514                                 uint64_t *item_flags,
515                                 struct mlx5_flow *dev_flow)
516 {
517         const struct rte_flow_item_vxlan *spec = item->spec;
518         const struct rte_flow_item_vxlan *mask = item->mask;
519         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
520         struct ibv_flow_spec_tunnel vxlan = {
521                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
522                 .size = size,
523         };
524         union vni {
525                 uint32_t vlan_id;
526                 uint8_t vni[4];
527         } id = { .vlan_id = 0, };
528
529         if (!mask)
530                 mask = &rte_flow_item_vxlan_mask;
531         if (spec) {
532                 memcpy(&id.vni[1], spec->vni, 3);
533                 vxlan.val.tunnel_id = id.vlan_id;
534                 memcpy(&id.vni[1], mask->vni, 3);
535                 vxlan.mask.tunnel_id = id.vlan_id;
536                 /* Remove unwanted bits from values. */
537                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
538         }
539         flow_verbs_spec_add(dev_flow, &vxlan, size);
540         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
541         *item_flags |= MLX5_FLOW_LAYER_VXLAN;
542 }
543
544 /**
545  * Convert the @p item into a Verbs specification. This function assumes that
546  * the input is valid and that there is space to insert the requested item
547  * into the flow.
548  *
549  * @param[in] item
550  *   Item specification.
551  * @param[in, out] item_flags
552  *   Bit mask that marks all detected items.
553  * @param[in, out] dev_flow
554  *   Pointer to sepacific flow structure.
555  */
556 static void
557 flow_verbs_translate_item_vxlan_gpe(const struct rte_flow_item *item,
558                                     uint64_t *item_flags,
559                                     struct mlx5_flow *dev_flow)
560 {
561         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
562         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
563         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
564         struct ibv_flow_spec_tunnel vxlan_gpe = {
565                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
566                 .size = size,
567         };
568         union vni {
569                 uint32_t vlan_id;
570                 uint8_t vni[4];
571         } id = { .vlan_id = 0, };
572
573         if (!mask)
574                 mask = &rte_flow_item_vxlan_gpe_mask;
575         if (spec) {
576                 memcpy(&id.vni[1], spec->vni, 3);
577                 vxlan_gpe.val.tunnel_id = id.vlan_id;
578                 memcpy(&id.vni[1], mask->vni, 3);
579                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
580                 /* Remove unwanted bits from values. */
581                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
582         }
583         flow_verbs_spec_add(dev_flow, &vxlan_gpe, size);
584         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
585         *item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
586 }
587
588 /**
589  * Update the protocol in Verbs IPv4/IPv6 spec.
590  *
591  * @param[in, out] attr
592  *   Pointer to Verbs attributes structure.
593  * @param[in] search
594  *   Specification type to search in order to update the IP protocol.
595  * @param[in] protocol
596  *   Protocol value to set if none is present in the specification.
597  */
598 static void
599 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
600                                        enum ibv_flow_spec_type search,
601                                        uint8_t protocol)
602 {
603         unsigned int i;
604         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
605                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
606
607         if (!attr)
608                 return;
609         for (i = 0; i != attr->num_of_specs; ++i) {
610                 if (hdr->type == search) {
611                         union {
612                                 struct ibv_flow_spec_ipv4_ext *ipv4;
613                                 struct ibv_flow_spec_ipv6 *ipv6;
614                         } ip;
615
616                         switch (search) {
617                         case IBV_FLOW_SPEC_IPV4_EXT:
618                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
619                                 if (!ip.ipv4->val.proto) {
620                                         ip.ipv4->val.proto = protocol;
621                                         ip.ipv4->mask.proto = 0xff;
622                                 }
623                                 break;
624                         case IBV_FLOW_SPEC_IPV6:
625                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
626                                 if (!ip.ipv6->val.next_hdr) {
627                                         ip.ipv6->val.next_hdr = protocol;
628                                         ip.ipv6->mask.next_hdr = 0xff;
629                                 }
630                                 break;
631                         default:
632                                 break;
633                         }
634                         break;
635                 }
636                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
637         }
638 }
639
640 /**
641  * Convert the @p item into a Verbs specification. This function assumes that
642  * the input is valid and that there is space to insert the requested item
643  * into the flow.
644  *
645  * @param[in] item
646  *   Item specification.
647  * @param[in, out] item_flags
648  *   Bit mask that marks all detected items.
649  * @param[in, out] dev_flow
650  *   Pointer to sepacific flow structure.
651  */
652 static void
653 flow_verbs_translate_item_gre(const struct rte_flow_item *item __rte_unused,
654                               uint64_t *item_flags,
655                               struct mlx5_flow *dev_flow)
656 {
657         struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
658 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
659         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
660         struct ibv_flow_spec_tunnel tunnel = {
661                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
662                 .size = size,
663         };
664 #else
665         const struct rte_flow_item_gre *spec = item->spec;
666         const struct rte_flow_item_gre *mask = item->mask;
667         unsigned int size = sizeof(struct ibv_flow_spec_gre);
668         struct ibv_flow_spec_gre tunnel = {
669                 .type = IBV_FLOW_SPEC_GRE,
670                 .size = size,
671         };
672
673         if (!mask)
674                 mask = &rte_flow_item_gre_mask;
675         if (spec) {
676                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
677                 tunnel.val.protocol = spec->protocol;
678                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
679                 tunnel.mask.protocol = mask->protocol;
680                 /* Remove unwanted bits from values. */
681                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
682                 tunnel.val.protocol &= tunnel.mask.protocol;
683                 tunnel.val.key &= tunnel.mask.key;
684         }
685 #endif
686         if (*item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
687                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
688                                                        IBV_FLOW_SPEC_IPV4_EXT,
689                                                        IPPROTO_GRE);
690         else
691                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
692                                                        IBV_FLOW_SPEC_IPV6,
693                                                        IPPROTO_GRE);
694         flow_verbs_spec_add(dev_flow, &tunnel, size);
695         verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
696         *item_flags |= MLX5_FLOW_LAYER_GRE;
697 }
698
699 /**
700  * Convert the @p action into a Verbs specification. This function assumes that
701  * the input is valid and that there is space to insert the requested action
702  * into the flow. This function also return the action that was added.
703  *
704  * @param[in] item
705  *   Item specification.
706  * @param[in, out] item_flags
707  *   Bit mask that marks all detected items.
708  * @param[in, out] dev_flow
709  *   Pointer to sepacific flow structure.
710  */
711 static void
712 flow_verbs_translate_item_mpls(const struct rte_flow_item *item __rte_unused,
713                                uint64_t *action_flags __rte_unused,
714                                struct mlx5_flow *dev_flow __rte_unused)
715 {
716 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
717         const struct rte_flow_item_mpls *spec = item->spec;
718         const struct rte_flow_item_mpls *mask = item->mask;
719         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
720         struct ibv_flow_spec_mpls mpls = {
721                 .type = IBV_FLOW_SPEC_MPLS,
722                 .size = size,
723         };
724
725         if (!mask)
726                 mask = &rte_flow_item_mpls_mask;
727         if (spec) {
728                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
729                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
730                 /* Remove unwanted bits from values.  */
731                 mpls.val.label &= mpls.mask.label;
732         }
733         flow_verbs_spec_add(dev_flow, &mpls, size);
734         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
735         *action_flags |= MLX5_FLOW_LAYER_MPLS;
736 #endif
737 }
738
739 /**
740  * Convert the @p action into a Verbs specification. This function assumes that
741  * the input is valid and that there is space to insert the requested action
742  * into the flow. This function also return the action that was added.
743  *
744  * @param[in, out] action_flags
745  *   Pointer to the detected actions.
746  * @param[in] dev_flow
747  *   Pointer to mlx5_flow.
748  */
749 static void
750 flow_verbs_translate_action_drop(uint64_t *action_flags,
751                                  struct mlx5_flow *dev_flow)
752 {
753         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
754         struct ibv_flow_spec_action_drop drop = {
755                         .type = IBV_FLOW_SPEC_ACTION_DROP,
756                         .size = size,
757         };
758
759         flow_verbs_spec_add(dev_flow, &drop, size);
760         *action_flags |= MLX5_FLOW_ACTION_DROP;
761 }
762
763 /**
764  * Convert the @p action into a Verbs specification. This function assumes that
765  * the input is valid and that there is space to insert the requested action
766  * into the flow. This function also return the action that was added.
767  *
768  * @param[in] action
769  *   Action configuration.
770  * @param[in, out] action_flags
771  *   Pointer to the detected actions.
772  * @param[in] dev_flow
773  *   Pointer to mlx5_flow.
774  */
775 static void
776 flow_verbs_translate_action_queue(const struct rte_flow_action *action,
777                                   uint64_t *action_flags,
778                                   struct mlx5_flow *dev_flow)
779 {
780         const struct rte_flow_action_queue *queue = action->conf;
781         struct rte_flow *flow = dev_flow->flow;
782
783         if (flow->queue)
784                 (*flow->queue)[0] = queue->index;
785         flow->rss.queue_num = 1;
786         *action_flags |= MLX5_FLOW_ACTION_QUEUE;
787 }
788
789 /**
790  * Convert the @p action into a Verbs specification. This function assumes that
791  * the input is valid and that there is space to insert the requested action
792  * into the flow. This function also return the action that was added.
793  *
794  * @param[in] action
795  *   Action configuration.
796  * @param[in, out] action_flags
797  *   Pointer to the detected actions.
798  * @param[in] dev_flow
799  *   Pointer to mlx5_flow.
800  */
801 static void
802 flow_verbs_translate_action_rss(const struct rte_flow_action *action,
803                                 uint64_t *action_flags,
804                                 struct mlx5_flow *dev_flow)
805 {
806         const struct rte_flow_action_rss *rss = action->conf;
807         struct rte_flow *flow = dev_flow->flow;
808
809         if (flow->queue)
810                 memcpy((*flow->queue), rss->queue,
811                        rss->queue_num * sizeof(uint16_t));
812         flow->rss.queue_num = rss->queue_num;
813         memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
814         flow->rss.types = rss->types;
815         flow->rss.level = rss->level;
816         *action_flags |= MLX5_FLOW_ACTION_RSS;
817 }
818
819 /**
820  * Convert the @p action into a Verbs specification. This function assumes that
821  * the input is valid and that there is space to insert the requested action
822  * into the flow. This function also return the action that was added.
823  *
824  * @param[in] action
825  *   Action configuration.
826  * @param[in, out] action_flags
827  *   Pointer to the detected actions.
828  * @param[in] dev_flow
829  *   Pointer to mlx5_flow.
830  */
831 static void
832 flow_verbs_translate_action_flag
833                         (const struct rte_flow_action *action __rte_unused,
834                          uint64_t *action_flags,
835                          struct mlx5_flow *dev_flow)
836 {
837         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
838         struct ibv_flow_spec_action_tag tag = {
839                 .type = IBV_FLOW_SPEC_ACTION_TAG,
840                 .size = size,
841                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
842         };
843         *action_flags |= MLX5_FLOW_ACTION_MARK;
844         flow_verbs_spec_add(dev_flow, &tag, size);
845 }
846
847 /**
848  * Update verbs specification to modify the flag to mark.
849  *
850  * @param[in, out] verbs
851  *   Pointer to the mlx5_flow_verbs structure.
852  * @param[in] mark_id
853  *   Mark identifier to replace the flag.
854  */
855 static void
856 flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
857 {
858         struct ibv_spec_header *hdr;
859         int i;
860
861         if (!verbs)
862                 return;
863         /* Update Verbs specification. */
864         hdr = (struct ibv_spec_header *)verbs->specs;
865         if (!hdr)
866                 return;
867         for (i = 0; i != verbs->attr->num_of_specs; ++i) {
868                 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
869                         struct ibv_flow_spec_action_tag *t =
870                                 (struct ibv_flow_spec_action_tag *)hdr;
871
872                         t->tag_id = mlx5_flow_mark_set(mark_id);
873                 }
874                 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
875         }
876 }
877
878 /**
879  * Convert the @p action into a Verbs specification. This function assumes that
880  * the input is valid and that there is space to insert the requested action
881  * into the flow. This function also return the action that was added.
882  *
883  * @param[in] action
884  *   Action configuration.
885  * @param[in, out] action_flags
886  *   Pointer to the detected actions.
887  * @param[in] dev_flow
888  *   Pointer to mlx5_flow.
889  */
890 static void
891 flow_verbs_translate_action_mark(const struct rte_flow_action *action,
892                                  uint64_t *action_flags,
893                                  struct mlx5_flow *dev_flow)
894 {
895         const struct rte_flow_action_mark *mark = action->conf;
896         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
897         struct ibv_flow_spec_action_tag tag = {
898                 .type = IBV_FLOW_SPEC_ACTION_TAG,
899                 .size = size,
900         };
901         struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
902
903         if (*action_flags & MLX5_FLOW_ACTION_FLAG) {
904                 flow_verbs_mark_update(verbs, mark->id);
905                 size = 0;
906         } else {
907                 tag.tag_id = mlx5_flow_mark_set(mark->id);
908                 flow_verbs_spec_add(dev_flow, &tag, size);
909         }
910         *action_flags |= MLX5_FLOW_ACTION_MARK;
911 }
912
913 /**
914  * Convert the @p action into a Verbs specification. This function assumes that
915  * the input is valid and that there is space to insert the requested action
916  * into the flow. This function also return the action that was added.
917  *
918  * @param[in] dev
919  *   Pointer to the Ethernet device structure.
920  * @param[in] action
921  *   Action configuration.
922  * @param[in, out] action_flags
923  *   Pointer to the detected actions.
924  * @param[in] dev_flow
925  *   Pointer to mlx5_flow.
926  * @param[out] error
927  *   Pointer to error structure.
928  *
929  * @return
930  *   0 On success else a negative errno value is returned and rte_errno is set.
931  */
932 static int
933 flow_verbs_translate_action_count(struct rte_eth_dev *dev,
934                                   const struct rte_flow_action *action,
935                                   uint64_t *action_flags,
936                                   struct mlx5_flow *dev_flow,
937                                   struct rte_flow_error *error)
938 {
939         const struct rte_flow_action_count *count = action->conf;
940         struct rte_flow *flow = dev_flow->flow;
941 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_V42
942         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
943         struct ibv_flow_spec_counter_action counter = {
944                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
945                 .size = size,
946         };
947 #endif
948
949         if (!flow->counter) {
950                 flow->counter = flow_verbs_counter_new(dev, count->shared,
951                                                        count->id);
952                 if (!flow->counter)
953                         return rte_flow_error_set(error, rte_errno,
954                                                   RTE_FLOW_ERROR_TYPE_ACTION,
955                                                   action,
956                                                   "cannot get counter"
957                                                   " context.");
958         }
959         *action_flags |= MLX5_FLOW_ACTION_COUNT;
960 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_V42
961         counter.counter_set_handle = flow->counter->cs->handle;
962         flow_verbs_spec_add(dev_flow, &counter, size);
963 #endif
964         return 0;
965 }
966
967 /**
968  * Internal validation function. For validating both actions and items.
969  *
970  * @param[in] dev
971  *   Pointer to the Ethernet device structure.
972  * @param[in] attr
973  *   Pointer to the flow attributes.
974  * @param[in] items
975  *   Pointer to the list of items.
976  * @param[in] actions
977  *   Pointer to the list of actions.
978  * @param[out] error
979  *   Pointer to the error structure.
980  *
981  * @return
982  *   0 on success, a negative errno value otherwise and rte_errno is set.
983  */
984 static int
985 flow_verbs_validate(struct rte_eth_dev *dev,
986                     const struct rte_flow_attr *attr,
987                     const struct rte_flow_item items[],
988                     const struct rte_flow_action actions[],
989                     struct rte_flow_error *error)
990 {
991         int ret;
992         uint32_t action_flags = 0;
993         uint32_t item_flags = 0;
994         int tunnel = 0;
995         uint8_t next_protocol = 0xff;
996
997         if (items == NULL)
998                 return -1;
999         ret = mlx5_flow_validate_attributes(dev, attr, error);
1000         if (ret < 0)
1001                 return ret;
1002         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1003                 int ret = 0;
1004                 switch (items->type) {
1005                 case RTE_FLOW_ITEM_TYPE_VOID:
1006                         break;
1007                 case RTE_FLOW_ITEM_TYPE_ETH:
1008                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1009                                                           error);
1010                         if (ret < 0)
1011                                 return ret;
1012                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1013                                                MLX5_FLOW_LAYER_OUTER_L2;
1014                         break;
1015                 case RTE_FLOW_ITEM_TYPE_VLAN:
1016                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1017                                                            error);
1018                         if (ret < 0)
1019                                 return ret;
1020                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1021                                                MLX5_FLOW_LAYER_OUTER_VLAN;
1022                         break;
1023                 case RTE_FLOW_ITEM_TYPE_IPV4:
1024                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1025                                                            error);
1026                         if (ret < 0)
1027                                 return ret;
1028                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1029                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1030                         if (items->mask != NULL &&
1031                             ((const struct rte_flow_item_ipv4 *)
1032                              items->mask)->hdr.next_proto_id)
1033                                 next_protocol =
1034                                         ((const struct rte_flow_item_ipv4 *)
1035                                          (items->spec))->hdr.next_proto_id;
1036                         break;
1037                 case RTE_FLOW_ITEM_TYPE_IPV6:
1038                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1039                                                            error);
1040                         if (ret < 0)
1041                                 return ret;
1042                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1043                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1044                         if (items->mask != NULL &&
1045                             ((const struct rte_flow_item_ipv6 *)
1046                              items->mask)->hdr.proto)
1047                                 next_protocol =
1048                                         ((const struct rte_flow_item_ipv6 *)
1049                                          items->spec)->hdr.proto;
1050                         break;
1051                 case RTE_FLOW_ITEM_TYPE_UDP:
1052                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1053                                                           next_protocol,
1054                                                           error);
1055                         if (ret < 0)
1056                                 return ret;
1057                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1058                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1059                         break;
1060                 case RTE_FLOW_ITEM_TYPE_TCP:
1061                         ret = mlx5_flow_validate_item_tcp
1062                                                 (items, item_flags,
1063                                                  next_protocol,
1064                                                  &rte_flow_item_tcp_mask,
1065                                                  error);
1066                         if (ret < 0)
1067                                 return ret;
1068                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1069                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1070                         break;
1071                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1072                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1073                                                             error);
1074                         if (ret < 0)
1075                                 return ret;
1076                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1077                         break;
1078                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1079                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
1080                                                                 item_flags,
1081                                                                 dev, error);
1082                         if (ret < 0)
1083                                 return ret;
1084                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1085                         break;
1086                 case RTE_FLOW_ITEM_TYPE_GRE:
1087                         ret = mlx5_flow_validate_item_gre(items, item_flags,
1088                                                           next_protocol, error);
1089                         if (ret < 0)
1090                                 return ret;
1091                         item_flags |= MLX5_FLOW_LAYER_GRE;
1092                         break;
1093                 case RTE_FLOW_ITEM_TYPE_MPLS:
1094                         ret = mlx5_flow_validate_item_mpls(items, item_flags,
1095                                                            next_protocol,
1096                                                            error);
1097                         if (ret < 0)
1098                                 return ret;
1099                         if (next_protocol != 0xff &&
1100                             next_protocol != IPPROTO_MPLS)
1101                                 return rte_flow_error_set
1102                                         (error, EINVAL,
1103                                          RTE_FLOW_ERROR_TYPE_ITEM, items,
1104                                          "protocol filtering not compatible"
1105                                          " with MPLS layer");
1106                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1107                         break;
1108                 default:
1109                         return rte_flow_error_set(error, ENOTSUP,
1110                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1111                                                   NULL, "item not supported");
1112                 }
1113         }
1114         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1115                 tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1116                 switch (actions->type) {
1117                 case RTE_FLOW_ACTION_TYPE_VOID:
1118                         break;
1119                 case RTE_FLOW_ACTION_TYPE_FLAG:
1120                         ret = mlx5_flow_validate_action_flag(action_flags,
1121                                                              attr,
1122                                                              error);
1123                         if (ret < 0)
1124                                 return ret;
1125                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1126                         break;
1127                 case RTE_FLOW_ACTION_TYPE_MARK:
1128                         ret = mlx5_flow_validate_action_mark(actions,
1129                                                              action_flags,
1130                                                              attr,
1131                                                              error);
1132                         if (ret < 0)
1133                                 return ret;
1134                         action_flags |= MLX5_FLOW_ACTION_MARK;
1135                         break;
1136                 case RTE_FLOW_ACTION_TYPE_DROP:
1137                         ret = mlx5_flow_validate_action_drop(action_flags,
1138                                                              attr,
1139                                                              error);
1140                         if (ret < 0)
1141                                 return ret;
1142                         action_flags |= MLX5_FLOW_ACTION_DROP;
1143                         break;
1144                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1145                         ret = mlx5_flow_validate_action_queue(actions,
1146                                                               action_flags, dev,
1147                                                               attr,
1148                                                               error);
1149                         if (ret < 0)
1150                                 return ret;
1151                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1152                         break;
1153                 case RTE_FLOW_ACTION_TYPE_RSS:
1154                         ret = mlx5_flow_validate_action_rss(actions,
1155                                                             action_flags, dev,
1156                                                             attr,
1157                                                             error);
1158                         if (ret < 0)
1159                                 return ret;
1160                         action_flags |= MLX5_FLOW_ACTION_RSS;
1161                         break;
1162                 case RTE_FLOW_ACTION_TYPE_COUNT:
1163                         ret = mlx5_flow_validate_action_count(dev, attr, error);
1164                         if (ret < 0)
1165                                 return ret;
1166                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1167                         break;
1168                 default:
1169                         return rte_flow_error_set(error, ENOTSUP,
1170                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1171                                                   actions,
1172                                                   "action not supported");
1173                 }
1174         }
1175         if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1176                 return rte_flow_error_set(error, EINVAL,
1177                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1178                                           "no fate action is found");
1179         return 0;
1180 }
1181
1182 /**
1183  * Calculate the required bytes that are needed for the action part of the verbs
1184  * flow, in addtion returns bit-fields with all the detected action, in order to
1185  * avoid another interation over the actions.
1186  *
1187  * @param[in] actions
1188  *   Pointer to the list of actions.
1189  * @param[out] action_flags
1190  *   Pointer to the detected actions.
1191  *
1192  * @return
1193  *   The size of the memory needed for all actions.
1194  */
1195 static int
1196 flow_verbs_get_actions_and_size(const struct rte_flow_action actions[],
1197                                 uint64_t *action_flags)
1198 {
1199         int size = 0;
1200         uint64_t detected_actions = 0;
1201
1202         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1203                 switch (actions->type) {
1204                 case RTE_FLOW_ACTION_TYPE_VOID:
1205                         break;
1206                 case RTE_FLOW_ACTION_TYPE_FLAG:
1207                         size += sizeof(struct ibv_flow_spec_action_tag);
1208                         detected_actions |= MLX5_FLOW_ACTION_FLAG;
1209                         break;
1210                 case RTE_FLOW_ACTION_TYPE_MARK:
1211                         size += sizeof(struct ibv_flow_spec_action_tag);
1212                         detected_actions |= MLX5_FLOW_ACTION_MARK;
1213                         break;
1214                 case RTE_FLOW_ACTION_TYPE_DROP:
1215                         size += sizeof(struct ibv_flow_spec_action_drop);
1216                         detected_actions |= MLX5_FLOW_ACTION_DROP;
1217                         break;
1218                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1219                         detected_actions |= MLX5_FLOW_ACTION_QUEUE;
1220                         break;
1221                 case RTE_FLOW_ACTION_TYPE_RSS:
1222                         detected_actions |= MLX5_FLOW_ACTION_RSS;
1223                         break;
1224                 case RTE_FLOW_ACTION_TYPE_COUNT:
1225 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_V42
1226                         size += sizeof(struct ibv_flow_spec_counter_action);
1227 #endif
1228                         detected_actions |= MLX5_FLOW_ACTION_COUNT;
1229                         break;
1230                 default:
1231                         break;
1232                 }
1233         }
1234         *action_flags = detected_actions;
1235         return size;
1236 }
1237
1238 /**
1239  * Calculate the required bytes that are needed for the item part of the verbs
1240  * flow, in addtion returns bit-fields with all the detected action, in order to
1241  * avoid another interation over the actions.
1242  *
1243  * @param[in] actions
1244  *   Pointer to the list of items.
1245  * @param[in, out] item_flags
1246  *   Pointer to the detected items.
1247  *
1248  * @return
1249  *   The size of the memory needed for all items.
1250  */
1251 static int
1252 flow_verbs_get_items_and_size(const struct rte_flow_item items[],
1253                               uint64_t *item_flags)
1254 {
1255         int size = 0;
1256         uint64_t detected_items = 0;
1257         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
1258
1259         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1260                 switch (items->type) {
1261                 case RTE_FLOW_ITEM_TYPE_VOID:
1262                         break;
1263                 case RTE_FLOW_ITEM_TYPE_ETH:
1264                         size += sizeof(struct ibv_flow_spec_eth);
1265                         detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1266                                                    MLX5_FLOW_LAYER_OUTER_L2;
1267                         break;
1268                 case RTE_FLOW_ITEM_TYPE_VLAN:
1269                         size += sizeof(struct ibv_flow_spec_eth);
1270                         detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1271                                                    MLX5_FLOW_LAYER_OUTER_VLAN;
1272                         break;
1273                 case RTE_FLOW_ITEM_TYPE_IPV4:
1274                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
1275                         detected_items |= tunnel ?
1276                                           MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1277                                           MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1278                         break;
1279                 case RTE_FLOW_ITEM_TYPE_IPV6:
1280                         size += sizeof(struct ibv_flow_spec_ipv6);
1281                         detected_items |= tunnel ?
1282                                           MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1283                                           MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1284                         break;
1285                 case RTE_FLOW_ITEM_TYPE_UDP:
1286                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1287                         detected_items |= tunnel ?
1288                                           MLX5_FLOW_LAYER_INNER_L4_UDP :
1289                                           MLX5_FLOW_LAYER_OUTER_L4_UDP;
1290                         break;
1291                 case RTE_FLOW_ITEM_TYPE_TCP:
1292                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1293                         detected_items |= tunnel ?
1294                                           MLX5_FLOW_LAYER_INNER_L4_TCP :
1295                                           MLX5_FLOW_LAYER_OUTER_L4_TCP;
1296                         break;
1297                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1298                         size += sizeof(struct ibv_flow_spec_tunnel);
1299                         detected_items |= MLX5_FLOW_LAYER_VXLAN;
1300                         break;
1301                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1302                         size += sizeof(struct ibv_flow_spec_tunnel);
1303                         detected_items |= MLX5_FLOW_LAYER_VXLAN_GPE;
1304                         break;
1305 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1306                 case RTE_FLOW_ITEM_TYPE_GRE:
1307                         size += sizeof(struct ibv_flow_spec_gre);
1308                         detected_items |= MLX5_FLOW_LAYER_GRE;
1309                         break;
1310                 case RTE_FLOW_ITEM_TYPE_MPLS:
1311                         size += sizeof(struct ibv_flow_spec_mpls);
1312                         detected_items |= MLX5_FLOW_LAYER_MPLS;
1313                         break;
1314 #else
1315                 case RTE_FLOW_ITEM_TYPE_GRE:
1316                         size += sizeof(struct ibv_flow_spec_tunnel);
1317                         detected_items |= MLX5_FLOW_LAYER_TUNNEL;
1318                         break;
1319 #endif
1320                 default:
1321                         break;
1322                 }
1323         }
1324         *item_flags = detected_items;
1325         return size;
1326 }
1327
1328 /**
1329  * Internal preparation function. Allocate mlx5_flow with the required size.
1330  * The required size is calculate based on the actions and items. This function
1331  * also returns the detected actions and items for later use.
1332  *
1333  * @param[in] attr
1334  *   Pointer to the flow attributes.
1335  * @param[in] items
1336  *   Pointer to the list of items.
1337  * @param[in] actions
1338  *   Pointer to the list of actions.
1339  * @param[out] item_flags
1340  *   Pointer to bit mask of all items detected.
1341  * @param[out] action_flags
1342  *   Pointer to bit mask of all actions detected.
1343  * @param[out] error
1344  *   Pointer to the error structure.
1345  *
1346  * @return
1347  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1348  *   is set.
1349  */
1350 static struct mlx5_flow *
1351 flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
1352                    const struct rte_flow_item items[],
1353                    const struct rte_flow_action actions[],
1354                    uint64_t *item_flags,
1355                    uint64_t *action_flags,
1356                    struct rte_flow_error *error)
1357 {
1358         uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
1359         struct mlx5_flow *flow;
1360
1361         size += flow_verbs_get_actions_and_size(actions, action_flags);
1362         size += flow_verbs_get_items_and_size(items, item_flags);
1363         flow = rte_calloc(__func__, 1, size, 0);
1364         if (!flow) {
1365                 rte_flow_error_set(error, ENOMEM,
1366                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1367                                    "not enough memory to create flow");
1368                 return NULL;
1369         }
1370         flow->verbs.attr = (void *)(flow + 1);
1371         flow->verbs.specs =
1372                 (uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
1373         return flow;
1374 }
1375
1376 /**
1377  * Fill the flow with verb spec.
1378  *
1379  * @param[in] dev
1380  *   Pointer to Ethernet device.
1381  * @param[in, out] dev_flow
1382  *   Pointer to the mlx5 flow.
1383  * @param[in] attr
1384  *   Pointer to the flow attributes.
1385  * @param[in] items
1386  *   Pointer to the list of items.
1387  * @param[in] actions
1388  *   Pointer to the list of actions.
1389  * @param[out] error
1390  *   Pointer to the error structure.
1391  *
1392  * @return
1393  *   0 on success, else a negative errno value otherwise and rte_ernno is set.
1394  */
1395 static int
1396 flow_verbs_translate(struct rte_eth_dev *dev,
1397                      struct mlx5_flow *dev_flow,
1398                      const struct rte_flow_attr *attr,
1399                      const struct rte_flow_item items[],
1400                      const struct rte_flow_action actions[],
1401                      struct rte_flow_error *error)
1402 {
1403         uint64_t action_flags = 0;
1404         uint64_t item_flags = 0;
1405         uint64_t priority = attr->priority;
1406         struct priv *priv = dev->data->dev_private;
1407
1408         if (priority == MLX5_FLOW_PRIO_RSVD)
1409                 priority = priv->config.flow_prio - 1;
1410         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1411                 int ret;
1412                 switch (actions->type) {
1413                 case RTE_FLOW_ACTION_TYPE_VOID:
1414                         break;
1415                 case RTE_FLOW_ACTION_TYPE_FLAG:
1416                         flow_verbs_translate_action_flag(actions,
1417                                                          &action_flags,
1418                                                          dev_flow);
1419                         break;
1420                 case RTE_FLOW_ACTION_TYPE_MARK:
1421                         flow_verbs_translate_action_mark(actions,
1422                                                          &action_flags,
1423                                                          dev_flow);
1424                         break;
1425                 case RTE_FLOW_ACTION_TYPE_DROP:
1426                         flow_verbs_translate_action_drop(&action_flags,
1427                                                          dev_flow);
1428                         break;
1429                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1430                         flow_verbs_translate_action_queue(actions,
1431                                                           &action_flags,
1432                                                           dev_flow);
1433                         break;
1434                 case RTE_FLOW_ACTION_TYPE_RSS:
1435                         flow_verbs_translate_action_rss(actions,
1436                                                         &action_flags,
1437                                                         dev_flow);
1438                         break;
1439                 case RTE_FLOW_ACTION_TYPE_COUNT:
1440                         ret = flow_verbs_translate_action_count(dev,
1441                                                                 actions,
1442                                                                 &action_flags,
1443                                                                 dev_flow,
1444                                                                 error);
1445                         if (ret < 0)
1446                                 return ret;
1447                         break;
1448                 default:
1449                         return rte_flow_error_set(error, ENOTSUP,
1450                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1451                                                   actions,
1452                                                   "action not supported");
1453                 }
1454         }
1455         dev_flow->flow->actions |= action_flags;
1456         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1457                 switch (items->type) {
1458                 case RTE_FLOW_ITEM_TYPE_VOID:
1459                         break;
1460                 case RTE_FLOW_ITEM_TYPE_ETH:
1461                         flow_verbs_translate_item_eth(items, &item_flags,
1462                                                       dev_flow);
1463                         break;
1464                 case RTE_FLOW_ITEM_TYPE_VLAN:
1465                         flow_verbs_translate_item_vlan(items, &item_flags,
1466                                                        dev_flow);
1467                         break;
1468                 case RTE_FLOW_ITEM_TYPE_IPV4:
1469                         flow_verbs_translate_item_ipv4(items, &item_flags,
1470                                                        dev_flow);
1471                         break;
1472                 case RTE_FLOW_ITEM_TYPE_IPV6:
1473                         flow_verbs_translate_item_ipv6(items, &item_flags,
1474                                                        dev_flow);
1475                         break;
1476                 case RTE_FLOW_ITEM_TYPE_UDP:
1477                         flow_verbs_translate_item_udp(items, &item_flags,
1478                                                       dev_flow);
1479                         break;
1480                 case RTE_FLOW_ITEM_TYPE_TCP:
1481                         flow_verbs_translate_item_tcp(items, &item_flags,
1482                                                       dev_flow);
1483                         break;
1484                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1485                         flow_verbs_translate_item_vxlan(items, &item_flags,
1486                                                         dev_flow);
1487                         break;
1488                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1489                         flow_verbs_translate_item_vxlan_gpe(items, &item_flags,
1490                                                             dev_flow);
1491                         break;
1492                 case RTE_FLOW_ITEM_TYPE_GRE:
1493                         flow_verbs_translate_item_gre(items, &item_flags,
1494                                                       dev_flow);
1495                         break;
1496                 case RTE_FLOW_ITEM_TYPE_MPLS:
1497                         flow_verbs_translate_item_mpls(items, &item_flags,
1498                                                        dev_flow);
1499                         break;
1500                 default:
1501                         return rte_flow_error_set(error, ENOTSUP,
1502                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1503                                                   NULL,
1504                                                   "item not supported");
1505                 }
1506         }
1507         dev_flow->verbs.attr->priority =
1508                 mlx5_flow_adjust_priority(dev, priority,
1509                                           dev_flow->verbs.attr->priority);
1510         return 0;
1511 }
1512
1513 /**
1514  * Remove the flow from the NIC but keeps it in memory.
1515  *
1516  * @param[in] dev
1517  *   Pointer to the Ethernet device structure.
1518  * @param[in, out] flow
1519  *   Pointer to flow structure.
1520  */
1521 static void
1522 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1523 {
1524         struct mlx5_flow_verbs *verbs;
1525         struct mlx5_flow *dev_flow;
1526
1527         if (!flow)
1528                 return;
1529         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1530                 verbs = &dev_flow->verbs;
1531                 if (verbs->flow) {
1532                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1533                         verbs->flow = NULL;
1534                 }
1535                 if (verbs->hrxq) {
1536                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1537                                 mlx5_hrxq_drop_release(dev);
1538                         else
1539                                 mlx5_hrxq_release(dev, verbs->hrxq);
1540                         verbs->hrxq = NULL;
1541                 }
1542         }
1543         if (flow->counter) {
1544                 flow_verbs_counter_release(flow->counter);
1545                 flow->counter = NULL;
1546         }
1547 }
1548
1549 /**
1550  * Remove the flow from the NIC and the memory.
1551  *
1552  * @param[in] dev
1553  *   Pointer to the Ethernet device structure.
1554  * @param[in, out] flow
1555  *   Pointer to flow structure.
1556  */
1557 static void
1558 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1559 {
1560         struct mlx5_flow *dev_flow;
1561
1562         if (!flow)
1563                 return;
1564         flow_verbs_remove(dev, flow);
1565         while (!LIST_EMPTY(&flow->dev_flows)) {
1566                 dev_flow = LIST_FIRST(&flow->dev_flows);
1567                 LIST_REMOVE(dev_flow, next);
1568                 rte_free(dev_flow);
1569         }
1570 }
1571
1572 /**
1573  * Apply the flow to the NIC.
1574  *
1575  * @param[in] dev
1576  *   Pointer to the Ethernet device structure.
1577  * @param[in, out] flow
1578  *   Pointer to flow structure.
1579  * @param[out] error
1580  *   Pointer to error structure.
1581  *
1582  * @return
1583  *   0 on success, a negative errno value otherwise and rte_errno is set.
1584  */
1585 static int
1586 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1587                  struct rte_flow_error *error)
1588 {
1589         struct mlx5_flow_verbs *verbs;
1590         struct mlx5_flow *dev_flow;
1591         int err;
1592
1593         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1594                 verbs = &dev_flow->verbs;
1595                 if (flow->actions & MLX5_FLOW_ACTION_DROP) {
1596                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
1597                         if (!verbs->hrxq) {
1598                                 rte_flow_error_set
1599                                         (error, errno,
1600                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1601                                          "cannot get drop hash queue");
1602                                 goto error;
1603                         }
1604                 } else {
1605                         struct mlx5_hrxq *hrxq;
1606
1607                         hrxq = mlx5_hrxq_get(dev, flow->key,
1608                                              MLX5_RSS_HASH_KEY_LEN,
1609                                              verbs->hash_fields,
1610                                              (*flow->queue),
1611                                              flow->rss.queue_num);
1612                         if (!hrxq)
1613                                 hrxq = mlx5_hrxq_new(dev, flow->key,
1614                                                      MLX5_RSS_HASH_KEY_LEN,
1615                                                      verbs->hash_fields,
1616                                                      (*flow->queue),
1617                                                      flow->rss.queue_num,
1618                                                      !!(flow->layers &
1619                                                       MLX5_FLOW_LAYER_TUNNEL));
1620                         if (!hrxq) {
1621                                 rte_flow_error_set
1622                                         (error, rte_errno,
1623                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1624                                          "cannot get hash queue");
1625                                 goto error;
1626                         }
1627                         verbs->hrxq = hrxq;
1628                 }
1629                 verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
1630                                                      verbs->attr);
1631                 if (!verbs->flow) {
1632                         rte_flow_error_set(error, errno,
1633                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1634                                            NULL,
1635                                            "hardware refuses to create flow");
1636                         goto error;
1637                 }
1638         }
1639         return 0;
1640 error:
1641         err = rte_errno; /* Save rte_errno before cleanup. */
1642         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1643                 verbs = &dev_flow->verbs;
1644                 if (verbs->hrxq) {
1645                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1646                                 mlx5_hrxq_drop_release(dev);
1647                         else
1648                                 mlx5_hrxq_release(dev, verbs->hrxq);
1649                         verbs->hrxq = NULL;
1650                 }
1651         }
1652         rte_errno = err; /* Restore rte_errno. */
1653         return -rte_errno;
1654 }
1655
1656 /**
1657  * Query a flows.
1658  *
1659  * @see rte_flow_query()
1660  * @see rte_flow_ops
1661  */
1662 static int
1663 flow_verbs_query_count(struct rte_eth_dev *dev __rte_unused,
1664                        struct rte_flow *flow __rte_unused,
1665                        void *data __rte_unused,
1666                        struct rte_flow_error *error)
1667 {
1668 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_V42
1669         if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
1670                 struct rte_flow_query_count *qc = data;
1671                 uint64_t counters[2] = {0, 0};
1672                 struct ibv_query_counter_set_attr query_cs_attr = {
1673                         .cs = flow->counter->cs,
1674                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
1675                 };
1676                 struct ibv_counter_set_data query_out = {
1677                         .out = counters,
1678                         .outlen = 2 * sizeof(uint64_t),
1679                 };
1680                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
1681                                                        &query_out);
1682
1683                 if (err)
1684                         return rte_flow_error_set
1685                                 (error, err,
1686                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1687                                  NULL,
1688                                  "cannot read counter");
1689                 qc->hits_set = 1;
1690                 qc->bytes_set = 1;
1691                 qc->hits = counters[0] - flow->counter->hits;
1692                 qc->bytes = counters[1] - flow->counter->bytes;
1693                 if (qc->reset) {
1694                         flow->counter->hits = counters[0];
1695                         flow->counter->bytes = counters[1];
1696                 }
1697                 return 0;
1698         }
1699         return rte_flow_error_set(error, EINVAL,
1700                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1701                                   NULL,
1702                                   "flow does not have counter");
1703 #endif
1704         return rte_flow_error_set(error, ENOTSUP,
1705                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1706                                   NULL,
1707                                   "counters are not available");
1708 }
1709
1710 /**
1711  * Query a flow.
1712  *
1713  * @see rte_flow_query()
1714  * @see rte_flow_ops
1715  */
1716 static int
1717 flow_verbs_query(struct rte_eth_dev *dev,
1718                  struct rte_flow *flow,
1719                  const struct rte_flow_action *actions,
1720                  void *data,
1721                  struct rte_flow_error *error)
1722 {
1723         int ret = -EINVAL;
1724
1725         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1726                 switch (actions->type) {
1727                 case RTE_FLOW_ACTION_TYPE_VOID:
1728                         break;
1729                 case RTE_FLOW_ACTION_TYPE_COUNT:
1730                         ret = flow_verbs_query_count(dev, flow, data, error);
1731                         break;
1732                 default:
1733                         return rte_flow_error_set(error, ENOTSUP,
1734                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1735                                                   actions,
1736                                                   "action not supported");
1737                 }
1738         }
1739         return ret;
1740 }
1741
1742 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1743         .validate = flow_verbs_validate,
1744         .prepare = flow_verbs_prepare,
1745         .translate = flow_verbs_translate,
1746         .apply = flow_verbs_apply,
1747         .remove = flow_verbs_remove,
1748         .destroy = flow_verbs_destroy,
1749         .query = flow_verbs_query,
1750 };