net/mlx5: relocate flow counters query function
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_eth_ctrl.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_prm.h"
33 #include "mlx5_glue.h"
34 #include "mlx5_flow.h"
35
36 /**
37  * Get a flow counter.
38  *
39  * @param[in] dev
40  *   Pointer to the Ethernet device structure.
41  * @param[in] shared
42  *   Indicate if this counter is shared with other flows.
43  * @param[in] id
44  *   Counter identifier.
45  *
46  * @return
47  *   A pointer to the counter, NULL otherwise and rte_errno is set.
48  */
49 static struct mlx5_flow_counter *
50 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
51 {
52         struct priv *priv = dev->data->dev_private;
53         struct mlx5_flow_counter *cnt;
54
55         LIST_FOREACH(cnt, &priv->flow_counters, next) {
56                 if (!cnt->shared || cnt->shared != shared)
57                         continue;
58                 if (cnt->id != id)
59                         continue;
60                 cnt->ref_cnt++;
61                 return cnt;
62         }
63 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_V42
64
65         struct mlx5_flow_counter tmpl = {
66                 .shared = shared,
67                 .id = id,
68                 .cs = mlx5_glue->create_counter_set
69                         (priv->ctx,
70                          &(struct ibv_counter_set_init_attr){
71                                  .counter_set_id = id,
72                          }),
73                 .hits = 0,
74                 .bytes = 0,
75                 .ref_cnt = 1,
76         };
77
78         if (!tmpl.cs) {
79                 rte_errno = errno;
80                 return NULL;
81         }
82         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
83         if (!cnt) {
84                 claim_zero(mlx5_glue->destroy_counter_set(tmpl.cs));
85                 rte_errno = ENOMEM;
86                 return NULL;
87         }
88         *cnt = tmpl;
89         LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
90         return cnt;
91 #endif
92         rte_errno = ENOTSUP;
93         return NULL;
94 }
95
96 /**
97  * Release a flow counter.
98  *
99  * @param[in] counter
100  *   Pointer to the counter handler.
101  */
102 static void
103 flow_verbs_counter_release(struct mlx5_flow_counter *counter)
104 {
105         if (--counter->ref_cnt == 0) {
106                 claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
107                 LIST_REMOVE(counter, next);
108                 rte_free(counter);
109         }
110 }
111
112 /**
113  * Query a flow counter via Verbs library call.
114  *
115  * @see rte_flow_query()
116  * @see rte_flow_ops
117  */
118 static int
119 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
120                          struct rte_flow *flow __rte_unused,
121                          void *data __rte_unused,
122                          struct rte_flow_error *error)
123 {
124 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_V42
125         if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
126                 struct rte_flow_query_count *qc = data;
127                 uint64_t counters[2] = {0, 0};
128                 struct ibv_query_counter_set_attr query_cs_attr = {
129                         .cs = flow->counter->cs,
130                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
131                 };
132                 struct ibv_counter_set_data query_out = {
133                         .out = counters,
134                         .outlen = 2 * sizeof(uint64_t),
135                 };
136                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
137                                                        &query_out);
138
139                 if (err)
140                         return rte_flow_error_set
141                                 (error, err,
142                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
143                                  NULL,
144                                  "cannot read counter");
145                 qc->hits_set = 1;
146                 qc->bytes_set = 1;
147                 qc->hits = counters[0] - flow->counter->hits;
148                 qc->bytes = counters[1] - flow->counter->bytes;
149                 if (qc->reset) {
150                         flow->counter->hits = counters[0];
151                         flow->counter->bytes = counters[1];
152                 }
153                 return 0;
154         }
155         return rte_flow_error_set(error, EINVAL,
156                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
157                                   NULL,
158                                   "flow does not have counter");
159 #else
160         return rte_flow_error_set(error, ENOTSUP,
161                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
162                                   NULL,
163                                   "counters are not available");
164 #endif
165 }
166
167 /**
168  * Add a verbs item specification into @p flow.
169  *
170  * @param[in, out] flow
171  *   Pointer to flow structure.
172  * @param[in] src
173  *   Create specification.
174  * @param[in] size
175  *   Size in bytes of the specification to copy.
176  */
177 static void
178 flow_verbs_spec_add(struct mlx5_flow *flow, void *src, unsigned int size)
179 {
180         struct mlx5_flow_verbs *verbs = &flow->verbs;
181
182         if (verbs->specs) {
183                 void *dst;
184
185                 dst = (void *)(verbs->specs + verbs->size);
186                 memcpy(dst, src, size);
187                 ++verbs->attr->num_of_specs;
188         }
189         verbs->size += size;
190 }
191
192 /**
193  * Convert the @p item into a Verbs specification. This function assumes that
194  * the input is valid and that there is space to insert the requested item
195  * into the flow.
196  *
197  * @param[in] item
198  *   Item specification.
199  * @param[in] item_flags
200  *   Bit field with all detected items.
201  * @param[in, out] dev_flow
202  *   Pointer to dev_flow structure.
203  */
204 static void
205 flow_verbs_translate_item_eth(const struct rte_flow_item *item,
206                               uint64_t *item_flags,
207                               struct mlx5_flow *dev_flow)
208 {
209         const struct rte_flow_item_eth *spec = item->spec;
210         const struct rte_flow_item_eth *mask = item->mask;
211         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
212         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
213         struct ibv_flow_spec_eth eth = {
214                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
215                 .size = size,
216         };
217
218         if (!mask)
219                 mask = &rte_flow_item_eth_mask;
220         if (spec) {
221                 unsigned int i;
222
223                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
224                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
225                 eth.val.ether_type = spec->type;
226                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
227                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
228                 eth.mask.ether_type = mask->type;
229                 /* Remove unwanted bits from values. */
230                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
231                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
232                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
233                 }
234                 eth.val.ether_type &= eth.mask.ether_type;
235                 dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
236         }
237         flow_verbs_spec_add(dev_flow, &eth, size);
238         *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
239                                 MLX5_FLOW_LAYER_OUTER_L2;
240 }
241
242 /**
243  * Update the VLAN tag in the Verbs Ethernet specification.
244  * This function assumes that the input is valid and there is space to add
245  * the requested item.
246  *
247  * @param[in, out] attr
248  *   Pointer to Verbs attributes structure.
249  * @param[in] eth
250  *   Verbs structure containing the VLAN information to copy.
251  */
252 static void
253 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
254                             struct ibv_flow_spec_eth *eth)
255 {
256         unsigned int i;
257         const enum ibv_flow_spec_type search = eth->type;
258         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
259                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
260
261         for (i = 0; i != attr->num_of_specs; ++i) {
262                 if (hdr->type == search) {
263                         struct ibv_flow_spec_eth *e =
264                                 (struct ibv_flow_spec_eth *)hdr;
265
266                         e->val.vlan_tag = eth->val.vlan_tag;
267                         e->mask.vlan_tag = eth->mask.vlan_tag;
268                         e->val.ether_type = eth->val.ether_type;
269                         e->mask.ether_type = eth->mask.ether_type;
270                         break;
271                 }
272                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
273         }
274 }
275
276 /**
277  * Convert the @p item into a Verbs specification. This function assumes that
278  * the input is valid and that there is space to insert the requested item
279  * into the flow.
280  *
281  * @param[in] item
282  *   Item specification.
283  * @param[in, out] item_flags
284  *   Bit mask that holds all detected items.
285  * @param[in, out] dev_flow
286  *   Pointer to dev_flow structure.
287  */
288 static void
289 flow_verbs_translate_item_vlan(const struct rte_flow_item *item,
290                                uint64_t *item_flags,
291                                struct mlx5_flow *dev_flow)
292 {
293         const struct rte_flow_item_vlan *spec = item->spec;
294         const struct rte_flow_item_vlan *mask = item->mask;
295         unsigned int size = sizeof(struct ibv_flow_spec_eth);
296         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
297         struct ibv_flow_spec_eth eth = {
298                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
299                 .size = size,
300         };
301         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
302                                       MLX5_FLOW_LAYER_OUTER_L2;
303
304         if (!mask)
305                 mask = &rte_flow_item_vlan_mask;
306         if (spec) {
307                 eth.val.vlan_tag = spec->tci;
308                 eth.mask.vlan_tag = mask->tci;
309                 eth.val.vlan_tag &= eth.mask.vlan_tag;
310                 eth.val.ether_type = spec->inner_type;
311                 eth.mask.ether_type = mask->inner_type;
312                 eth.val.ether_type &= eth.mask.ether_type;
313         }
314         if (!(*item_flags & l2m)) {
315                 dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
316                 flow_verbs_spec_add(dev_flow, &eth, size);
317         } else {
318                 flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
319                 size = 0; /* Only an update is done in eth specification. */
320         }
321         *item_flags |= tunnel ?
322                        (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
323                        (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
324 }
325
326 /**
327  * Convert the @p item into a Verbs specification. This function assumes that
328  * the input is valid and that there is space to insert the requested item
329  * into the flow.
330  *
331  * @param[in] item
332  *   Item specification.
333  * @param[in, out] item_flags
334  *   Bit mask that marks all detected items.
335  * @param[in, out] dev_flow
336  *   Pointer to sepacific flow structure.
337  */
338 static void
339 flow_verbs_translate_item_ipv4(const struct rte_flow_item *item,
340                                uint64_t *item_flags,
341                                struct mlx5_flow *dev_flow)
342 {
343         const struct rte_flow_item_ipv4 *spec = item->spec;
344         const struct rte_flow_item_ipv4 *mask = item->mask;
345         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
346         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
347         struct ibv_flow_spec_ipv4_ext ipv4 = {
348                 .type = IBV_FLOW_SPEC_IPV4_EXT |
349                         (tunnel ? IBV_FLOW_SPEC_INNER : 0),
350                 .size = size,
351         };
352
353         if (!mask)
354                 mask = &rte_flow_item_ipv4_mask;
355         *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
356                                 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
357         if (spec) {
358                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
359                         .src_ip = spec->hdr.src_addr,
360                         .dst_ip = spec->hdr.dst_addr,
361                         .proto = spec->hdr.next_proto_id,
362                         .tos = spec->hdr.type_of_service,
363                 };
364                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
365                         .src_ip = mask->hdr.src_addr,
366                         .dst_ip = mask->hdr.dst_addr,
367                         .proto = mask->hdr.next_proto_id,
368                         .tos = mask->hdr.type_of_service,
369                 };
370                 /* Remove unwanted bits from values. */
371                 ipv4.val.src_ip &= ipv4.mask.src_ip;
372                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
373                 ipv4.val.proto &= ipv4.mask.proto;
374                 ipv4.val.tos &= ipv4.mask.tos;
375         }
376         dev_flow->verbs.hash_fields |=
377                 mlx5_flow_hashfields_adjust(dev_flow, tunnel,
378                                             MLX5_IPV4_LAYER_TYPES,
379                                             MLX5_IPV4_IBV_RX_HASH);
380         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
381         flow_verbs_spec_add(dev_flow, &ipv4, size);
382 }
383
384 /**
385  * Convert the @p item into a Verbs specification. This function assumes that
386  * the input is valid and that there is space to insert the requested item
387  * into the flow.
388  *
389  * @param[in] item
390  *   Item specification.
391  * @param[in, out] item_flags
392  *   Bit mask that marks all detected items.
393  * @param[in, out] dev_flow
394  *   Pointer to sepacific flow structure.
395  */
396 static void
397 flow_verbs_translate_item_ipv6(const struct rte_flow_item *item,
398                                uint64_t *item_flags,
399                                struct mlx5_flow *dev_flow)
400 {
401         const struct rte_flow_item_ipv6 *spec = item->spec;
402         const struct rte_flow_item_ipv6 *mask = item->mask;
403         const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
404         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
405         struct ibv_flow_spec_ipv6 ipv6 = {
406                 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
407                 .size = size,
408         };
409
410         if (!mask)
411                 mask = &rte_flow_item_ipv6_mask;
412          *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
413                                  MLX5_FLOW_LAYER_OUTER_L3_IPV6;
414         if (spec) {
415                 unsigned int i;
416                 uint32_t vtc_flow_val;
417                 uint32_t vtc_flow_mask;
418
419                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
420                        RTE_DIM(ipv6.val.src_ip));
421                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
422                        RTE_DIM(ipv6.val.dst_ip));
423                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
424                        RTE_DIM(ipv6.mask.src_ip));
425                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
426                        RTE_DIM(ipv6.mask.dst_ip));
427                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
428                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
429                 ipv6.val.flow_label =
430                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
431                                          IPV6_HDR_FL_SHIFT);
432                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
433                                          IPV6_HDR_TC_SHIFT;
434                 ipv6.val.next_hdr = spec->hdr.proto;
435                 ipv6.val.hop_limit = spec->hdr.hop_limits;
436                 ipv6.mask.flow_label =
437                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
438                                          IPV6_HDR_FL_SHIFT);
439                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
440                                           IPV6_HDR_TC_SHIFT;
441                 ipv6.mask.next_hdr = mask->hdr.proto;
442                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
443                 /* Remove unwanted bits from values. */
444                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
445                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
446                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
447                 }
448                 ipv6.val.flow_label &= ipv6.mask.flow_label;
449                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
450                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
451                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
452         }
453         dev_flow->verbs.hash_fields |=
454                 mlx5_flow_hashfields_adjust(dev_flow, tunnel,
455                                             MLX5_IPV6_LAYER_TYPES,
456                                             MLX5_IPV6_IBV_RX_HASH);
457         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
458         flow_verbs_spec_add(dev_flow, &ipv6, size);
459 }
460
461 /**
462  * Convert the @p item into a Verbs specification. This function assumes that
463  * the input is valid and that there is space to insert the requested item
464  * into the flow.
465  *
466  * @param[in] item
467  *   Item specification.
468  * @param[in, out] item_flags
469  *   Bit mask that marks all detected items.
470  * @param[in, out] dev_flow
471  *   Pointer to sepacific flow structure.
472  */
473 static void
474 flow_verbs_translate_item_udp(const struct rte_flow_item *item,
475                               uint64_t *item_flags,
476                               struct mlx5_flow *dev_flow)
477 {
478         const struct rte_flow_item_udp *spec = item->spec;
479         const struct rte_flow_item_udp *mask = item->mask;
480         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
481         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
482         struct ibv_flow_spec_tcp_udp udp = {
483                 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
484                 .size = size,
485         };
486
487         if (!mask)
488                 mask = &rte_flow_item_udp_mask;
489         *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
490                                 MLX5_FLOW_LAYER_OUTER_L4_UDP;
491         if (spec) {
492                 udp.val.dst_port = spec->hdr.dst_port;
493                 udp.val.src_port = spec->hdr.src_port;
494                 udp.mask.dst_port = mask->hdr.dst_port;
495                 udp.mask.src_port = mask->hdr.src_port;
496                 /* Remove unwanted bits from values. */
497                 udp.val.src_port &= udp.mask.src_port;
498                 udp.val.dst_port &= udp.mask.dst_port;
499         }
500         dev_flow->verbs.hash_fields |=
501                 mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_UDP,
502                                             (IBV_RX_HASH_SRC_PORT_UDP |
503                                              IBV_RX_HASH_DST_PORT_UDP));
504         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
505         flow_verbs_spec_add(dev_flow, &udp, size);
506 }
507
508 /**
509  * Convert the @p item into a Verbs specification. This function assumes that
510  * the input is valid and that there is space to insert the requested item
511  * into the flow.
512  *
513  * @param[in] item
514  *   Item specification.
515  * @param[in, out] item_flags
516  *   Bit mask that marks all detected items.
517  * @param[in, out] dev_flow
518  *   Pointer to sepacific flow structure.
519  */
520 static void
521 flow_verbs_translate_item_tcp(const struct rte_flow_item *item,
522                               uint64_t *item_flags,
523                               struct mlx5_flow *dev_flow)
524 {
525         const struct rte_flow_item_tcp *spec = item->spec;
526         const struct rte_flow_item_tcp *mask = item->mask;
527         const int tunnel = !!(dev_flow->flow->layers & MLX5_FLOW_LAYER_TUNNEL);
528         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
529         struct ibv_flow_spec_tcp_udp tcp = {
530                 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
531                 .size = size,
532         };
533
534         if (!mask)
535                 mask = &rte_flow_item_tcp_mask;
536         *item_flags |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
537                                  MLX5_FLOW_LAYER_OUTER_L4_TCP;
538         if (spec) {
539                 tcp.val.dst_port = spec->hdr.dst_port;
540                 tcp.val.src_port = spec->hdr.src_port;
541                 tcp.mask.dst_port = mask->hdr.dst_port;
542                 tcp.mask.src_port = mask->hdr.src_port;
543                 /* Remove unwanted bits from values. */
544                 tcp.val.src_port &= tcp.mask.src_port;
545                 tcp.val.dst_port &= tcp.mask.dst_port;
546         }
547         dev_flow->verbs.hash_fields |=
548                 mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_TCP,
549                                             (IBV_RX_HASH_SRC_PORT_TCP |
550                                              IBV_RX_HASH_DST_PORT_TCP));
551         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
552         flow_verbs_spec_add(dev_flow, &tcp, size);
553 }
554
555 /**
556  * Convert the @p item into a Verbs specification. This function assumes that
557  * the input is valid and that there is space to insert the requested item
558  * into the flow.
559  *
560  * @param[in] item
561  *   Item specification.
562  * @param[in, out] item_flags
563  *   Bit mask that marks all detected items.
564  * @param[in, out] dev_flow
565  *   Pointer to sepacific flow structure.
566  */
567 static void
568 flow_verbs_translate_item_vxlan(const struct rte_flow_item *item,
569                                 uint64_t *item_flags,
570                                 struct mlx5_flow *dev_flow)
571 {
572         const struct rte_flow_item_vxlan *spec = item->spec;
573         const struct rte_flow_item_vxlan *mask = item->mask;
574         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
575         struct ibv_flow_spec_tunnel vxlan = {
576                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
577                 .size = size,
578         };
579         union vni {
580                 uint32_t vlan_id;
581                 uint8_t vni[4];
582         } id = { .vlan_id = 0, };
583
584         if (!mask)
585                 mask = &rte_flow_item_vxlan_mask;
586         if (spec) {
587                 memcpy(&id.vni[1], spec->vni, 3);
588                 vxlan.val.tunnel_id = id.vlan_id;
589                 memcpy(&id.vni[1], mask->vni, 3);
590                 vxlan.mask.tunnel_id = id.vlan_id;
591                 /* Remove unwanted bits from values. */
592                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
593         }
594         flow_verbs_spec_add(dev_flow, &vxlan, size);
595         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
596         *item_flags |= MLX5_FLOW_LAYER_VXLAN;
597 }
598
599 /**
600  * Convert the @p item into a Verbs specification. This function assumes that
601  * the input is valid and that there is space to insert the requested item
602  * into the flow.
603  *
604  * @param[in] item
605  *   Item specification.
606  * @param[in, out] item_flags
607  *   Bit mask that marks all detected items.
608  * @param[in, out] dev_flow
609  *   Pointer to sepacific flow structure.
610  */
611 static void
612 flow_verbs_translate_item_vxlan_gpe(const struct rte_flow_item *item,
613                                     uint64_t *item_flags,
614                                     struct mlx5_flow *dev_flow)
615 {
616         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
617         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
618         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
619         struct ibv_flow_spec_tunnel vxlan_gpe = {
620                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
621                 .size = size,
622         };
623         union vni {
624                 uint32_t vlan_id;
625                 uint8_t vni[4];
626         } id = { .vlan_id = 0, };
627
628         if (!mask)
629                 mask = &rte_flow_item_vxlan_gpe_mask;
630         if (spec) {
631                 memcpy(&id.vni[1], spec->vni, 3);
632                 vxlan_gpe.val.tunnel_id = id.vlan_id;
633                 memcpy(&id.vni[1], mask->vni, 3);
634                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
635                 /* Remove unwanted bits from values. */
636                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
637         }
638         flow_verbs_spec_add(dev_flow, &vxlan_gpe, size);
639         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
640         *item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
641 }
642
643 /**
644  * Update the protocol in Verbs IPv4/IPv6 spec.
645  *
646  * @param[in, out] attr
647  *   Pointer to Verbs attributes structure.
648  * @param[in] search
649  *   Specification type to search in order to update the IP protocol.
650  * @param[in] protocol
651  *   Protocol value to set if none is present in the specification.
652  */
653 static void
654 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
655                                        enum ibv_flow_spec_type search,
656                                        uint8_t protocol)
657 {
658         unsigned int i;
659         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
660                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
661
662         if (!attr)
663                 return;
664         for (i = 0; i != attr->num_of_specs; ++i) {
665                 if (hdr->type == search) {
666                         union {
667                                 struct ibv_flow_spec_ipv4_ext *ipv4;
668                                 struct ibv_flow_spec_ipv6 *ipv6;
669                         } ip;
670
671                         switch (search) {
672                         case IBV_FLOW_SPEC_IPV4_EXT:
673                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
674                                 if (!ip.ipv4->val.proto) {
675                                         ip.ipv4->val.proto = protocol;
676                                         ip.ipv4->mask.proto = 0xff;
677                                 }
678                                 break;
679                         case IBV_FLOW_SPEC_IPV6:
680                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
681                                 if (!ip.ipv6->val.next_hdr) {
682                                         ip.ipv6->val.next_hdr = protocol;
683                                         ip.ipv6->mask.next_hdr = 0xff;
684                                 }
685                                 break;
686                         default:
687                                 break;
688                         }
689                         break;
690                 }
691                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
692         }
693 }
694
695 /**
696  * Convert the @p item into a Verbs specification. This function assumes that
697  * the input is valid and that there is space to insert the requested item
698  * into the flow.
699  *
700  * @param[in] item
701  *   Item specification.
702  * @param[in, out] item_flags
703  *   Bit mask that marks all detected items.
704  * @param[in, out] dev_flow
705  *   Pointer to sepacific flow structure.
706  */
707 static void
708 flow_verbs_translate_item_gre(const struct rte_flow_item *item __rte_unused,
709                               uint64_t *item_flags,
710                               struct mlx5_flow *dev_flow)
711 {
712         struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
713 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
714         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
715         struct ibv_flow_spec_tunnel tunnel = {
716                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
717                 .size = size,
718         };
719 #else
720         const struct rte_flow_item_gre *spec = item->spec;
721         const struct rte_flow_item_gre *mask = item->mask;
722         unsigned int size = sizeof(struct ibv_flow_spec_gre);
723         struct ibv_flow_spec_gre tunnel = {
724                 .type = IBV_FLOW_SPEC_GRE,
725                 .size = size,
726         };
727
728         if (!mask)
729                 mask = &rte_flow_item_gre_mask;
730         if (spec) {
731                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
732                 tunnel.val.protocol = spec->protocol;
733                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
734                 tunnel.mask.protocol = mask->protocol;
735                 /* Remove unwanted bits from values. */
736                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
737                 tunnel.val.protocol &= tunnel.mask.protocol;
738                 tunnel.val.key &= tunnel.mask.key;
739         }
740 #endif
741         if (*item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
742                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
743                                                        IBV_FLOW_SPEC_IPV4_EXT,
744                                                        IPPROTO_GRE);
745         else
746                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
747                                                        IBV_FLOW_SPEC_IPV6,
748                                                        IPPROTO_GRE);
749         flow_verbs_spec_add(dev_flow, &tunnel, size);
750         verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
751         *item_flags |= MLX5_FLOW_LAYER_GRE;
752 }
753
754 /**
755  * Convert the @p action into a Verbs specification. This function assumes that
756  * the input is valid and that there is space to insert the requested action
757  * into the flow. This function also return the action that was added.
758  *
759  * @param[in] item
760  *   Item specification.
761  * @param[in, out] item_flags
762  *   Bit mask that marks all detected items.
763  * @param[in, out] dev_flow
764  *   Pointer to sepacific flow structure.
765  */
766 static void
767 flow_verbs_translate_item_mpls(const struct rte_flow_item *item __rte_unused,
768                                uint64_t *action_flags __rte_unused,
769                                struct mlx5_flow *dev_flow __rte_unused)
770 {
771 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
772         const struct rte_flow_item_mpls *spec = item->spec;
773         const struct rte_flow_item_mpls *mask = item->mask;
774         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
775         struct ibv_flow_spec_mpls mpls = {
776                 .type = IBV_FLOW_SPEC_MPLS,
777                 .size = size,
778         };
779
780         if (!mask)
781                 mask = &rte_flow_item_mpls_mask;
782         if (spec) {
783                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
784                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
785                 /* Remove unwanted bits from values.  */
786                 mpls.val.label &= mpls.mask.label;
787         }
788         flow_verbs_spec_add(dev_flow, &mpls, size);
789         dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
790         *action_flags |= MLX5_FLOW_LAYER_MPLS;
791 #endif
792 }
793
794 /**
795  * Convert the @p action into a Verbs specification. This function assumes that
796  * the input is valid and that there is space to insert the requested action
797  * into the flow. This function also return the action that was added.
798  *
799  * @param[in, out] action_flags
800  *   Pointer to the detected actions.
801  * @param[in] dev_flow
802  *   Pointer to mlx5_flow.
803  */
804 static void
805 flow_verbs_translate_action_drop(uint64_t *action_flags,
806                                  struct mlx5_flow *dev_flow)
807 {
808         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
809         struct ibv_flow_spec_action_drop drop = {
810                         .type = IBV_FLOW_SPEC_ACTION_DROP,
811                         .size = size,
812         };
813
814         flow_verbs_spec_add(dev_flow, &drop, size);
815         *action_flags |= MLX5_FLOW_ACTION_DROP;
816 }
817
818 /**
819  * Convert the @p action into a Verbs specification. This function assumes that
820  * the input is valid and that there is space to insert the requested action
821  * into the flow. This function also return the action that was added.
822  *
823  * @param[in] action
824  *   Action configuration.
825  * @param[in, out] action_flags
826  *   Pointer to the detected actions.
827  * @param[in] dev_flow
828  *   Pointer to mlx5_flow.
829  */
830 static void
831 flow_verbs_translate_action_queue(const struct rte_flow_action *action,
832                                   uint64_t *action_flags,
833                                   struct mlx5_flow *dev_flow)
834 {
835         const struct rte_flow_action_queue *queue = action->conf;
836         struct rte_flow *flow = dev_flow->flow;
837
838         if (flow->queue)
839                 (*flow->queue)[0] = queue->index;
840         flow->rss.queue_num = 1;
841         *action_flags |= MLX5_FLOW_ACTION_QUEUE;
842 }
843
844 /**
845  * Convert the @p action into a Verbs specification. This function assumes that
846  * the input is valid and that there is space to insert the requested action
847  * into the flow. This function also return the action that was added.
848  *
849  * @param[in] action
850  *   Action configuration.
851  * @param[in, out] action_flags
852  *   Pointer to the detected actions.
853  * @param[in] dev_flow
854  *   Pointer to mlx5_flow.
855  */
856 static void
857 flow_verbs_translate_action_rss(const struct rte_flow_action *action,
858                                 uint64_t *action_flags,
859                                 struct mlx5_flow *dev_flow)
860 {
861         const struct rte_flow_action_rss *rss = action->conf;
862         struct rte_flow *flow = dev_flow->flow;
863
864         if (flow->queue)
865                 memcpy((*flow->queue), rss->queue,
866                        rss->queue_num * sizeof(uint16_t));
867         flow->rss.queue_num = rss->queue_num;
868         memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
869         flow->rss.types = rss->types;
870         flow->rss.level = rss->level;
871         *action_flags |= MLX5_FLOW_ACTION_RSS;
872 }
873
874 /**
875  * Convert the @p action into a Verbs specification. This function assumes that
876  * the input is valid and that there is space to insert the requested action
877  * into the flow. This function also return the action that was added.
878  *
879  * @param[in] action
880  *   Action configuration.
881  * @param[in, out] action_flags
882  *   Pointer to the detected actions.
883  * @param[in] dev_flow
884  *   Pointer to mlx5_flow.
885  */
886 static void
887 flow_verbs_translate_action_flag
888                         (const struct rte_flow_action *action __rte_unused,
889                          uint64_t *action_flags,
890                          struct mlx5_flow *dev_flow)
891 {
892         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
893         struct ibv_flow_spec_action_tag tag = {
894                 .type = IBV_FLOW_SPEC_ACTION_TAG,
895                 .size = size,
896                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
897         };
898         *action_flags |= MLX5_FLOW_ACTION_MARK;
899         flow_verbs_spec_add(dev_flow, &tag, size);
900 }
901
902 /**
903  * Update verbs specification to modify the flag to mark.
904  *
905  * @param[in, out] verbs
906  *   Pointer to the mlx5_flow_verbs structure.
907  * @param[in] mark_id
908  *   Mark identifier to replace the flag.
909  */
910 static void
911 flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
912 {
913         struct ibv_spec_header *hdr;
914         int i;
915
916         if (!verbs)
917                 return;
918         /* Update Verbs specification. */
919         hdr = (struct ibv_spec_header *)verbs->specs;
920         if (!hdr)
921                 return;
922         for (i = 0; i != verbs->attr->num_of_specs; ++i) {
923                 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
924                         struct ibv_flow_spec_action_tag *t =
925                                 (struct ibv_flow_spec_action_tag *)hdr;
926
927                         t->tag_id = mlx5_flow_mark_set(mark_id);
928                 }
929                 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
930         }
931 }
932
933 /**
934  * Convert the @p action into a Verbs specification. This function assumes that
935  * the input is valid and that there is space to insert the requested action
936  * into the flow. This function also return the action that was added.
937  *
938  * @param[in] action
939  *   Action configuration.
940  * @param[in, out] action_flags
941  *   Pointer to the detected actions.
942  * @param[in] dev_flow
943  *   Pointer to mlx5_flow.
944  */
945 static void
946 flow_verbs_translate_action_mark(const struct rte_flow_action *action,
947                                  uint64_t *action_flags,
948                                  struct mlx5_flow *dev_flow)
949 {
950         const struct rte_flow_action_mark *mark = action->conf;
951         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
952         struct ibv_flow_spec_action_tag tag = {
953                 .type = IBV_FLOW_SPEC_ACTION_TAG,
954                 .size = size,
955         };
956         struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
957
958         if (*action_flags & MLX5_FLOW_ACTION_FLAG) {
959                 flow_verbs_mark_update(verbs, mark->id);
960                 size = 0;
961         } else {
962                 tag.tag_id = mlx5_flow_mark_set(mark->id);
963                 flow_verbs_spec_add(dev_flow, &tag, size);
964         }
965         *action_flags |= MLX5_FLOW_ACTION_MARK;
966 }
967
968 /**
969  * Convert the @p action into a Verbs specification. This function assumes that
970  * the input is valid and that there is space to insert the requested action
971  * into the flow. This function also return the action that was added.
972  *
973  * @param[in] dev
974  *   Pointer to the Ethernet device structure.
975  * @param[in] action
976  *   Action configuration.
977  * @param[in, out] action_flags
978  *   Pointer to the detected actions.
979  * @param[in] dev_flow
980  *   Pointer to mlx5_flow.
981  * @param[out] error
982  *   Pointer to error structure.
983  *
984  * @return
985  *   0 On success else a negative errno value is returned and rte_errno is set.
986  */
987 static int
988 flow_verbs_translate_action_count(struct rte_eth_dev *dev,
989                                   const struct rte_flow_action *action,
990                                   uint64_t *action_flags,
991                                   struct mlx5_flow *dev_flow,
992                                   struct rte_flow_error *error)
993 {
994         const struct rte_flow_action_count *count = action->conf;
995         struct rte_flow *flow = dev_flow->flow;
996 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_V42
997         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
998         struct ibv_flow_spec_counter_action counter = {
999                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1000                 .size = size,
1001         };
1002 #endif
1003
1004         if (!flow->counter) {
1005                 flow->counter = flow_verbs_counter_new(dev, count->shared,
1006                                                        count->id);
1007                 if (!flow->counter)
1008                         return rte_flow_error_set(error, rte_errno,
1009                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1010                                                   action,
1011                                                   "cannot get counter"
1012                                                   " context.");
1013         }
1014         *action_flags |= MLX5_FLOW_ACTION_COUNT;
1015 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_V42
1016         counter.counter_set_handle = flow->counter->cs->handle;
1017         flow_verbs_spec_add(dev_flow, &counter, size);
1018 #endif
1019         return 0;
1020 }
1021
1022 /**
1023  * Internal validation function. For validating both actions and items.
1024  *
1025  * @param[in] dev
1026  *   Pointer to the Ethernet device structure.
1027  * @param[in] attr
1028  *   Pointer to the flow attributes.
1029  * @param[in] items
1030  *   Pointer to the list of items.
1031  * @param[in] actions
1032  *   Pointer to the list of actions.
1033  * @param[out] error
1034  *   Pointer to the error structure.
1035  *
1036  * @return
1037  *   0 on success, a negative errno value otherwise and rte_errno is set.
1038  */
1039 static int
1040 flow_verbs_validate(struct rte_eth_dev *dev,
1041                     const struct rte_flow_attr *attr,
1042                     const struct rte_flow_item items[],
1043                     const struct rte_flow_action actions[],
1044                     struct rte_flow_error *error)
1045 {
1046         int ret;
1047         uint32_t action_flags = 0;
1048         uint32_t item_flags = 0;
1049         int tunnel = 0;
1050         uint8_t next_protocol = 0xff;
1051
1052         if (items == NULL)
1053                 return -1;
1054         ret = mlx5_flow_validate_attributes(dev, attr, error);
1055         if (ret < 0)
1056                 return ret;
1057         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1058                 int ret = 0;
1059                 switch (items->type) {
1060                 case RTE_FLOW_ITEM_TYPE_VOID:
1061                         break;
1062                 case RTE_FLOW_ITEM_TYPE_ETH:
1063                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1064                                                           error);
1065                         if (ret < 0)
1066                                 return ret;
1067                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1068                                                MLX5_FLOW_LAYER_OUTER_L2;
1069                         break;
1070                 case RTE_FLOW_ITEM_TYPE_VLAN:
1071                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1072                                                            error);
1073                         if (ret < 0)
1074                                 return ret;
1075                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1076                                                MLX5_FLOW_LAYER_OUTER_VLAN;
1077                         break;
1078                 case RTE_FLOW_ITEM_TYPE_IPV4:
1079                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1080                                                            error);
1081                         if (ret < 0)
1082                                 return ret;
1083                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1084                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1085                         if (items->mask != NULL &&
1086                             ((const struct rte_flow_item_ipv4 *)
1087                              items->mask)->hdr.next_proto_id)
1088                                 next_protocol =
1089                                         ((const struct rte_flow_item_ipv4 *)
1090                                          (items->spec))->hdr.next_proto_id;
1091                         break;
1092                 case RTE_FLOW_ITEM_TYPE_IPV6:
1093                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1094                                                            error);
1095                         if (ret < 0)
1096                                 return ret;
1097                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1098                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1099                         if (items->mask != NULL &&
1100                             ((const struct rte_flow_item_ipv6 *)
1101                              items->mask)->hdr.proto)
1102                                 next_protocol =
1103                                         ((const struct rte_flow_item_ipv6 *)
1104                                          items->spec)->hdr.proto;
1105                         break;
1106                 case RTE_FLOW_ITEM_TYPE_UDP:
1107                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1108                                                           next_protocol,
1109                                                           error);
1110                         if (ret < 0)
1111                                 return ret;
1112                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1113                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1114                         break;
1115                 case RTE_FLOW_ITEM_TYPE_TCP:
1116                         ret = mlx5_flow_validate_item_tcp
1117                                                 (items, item_flags,
1118                                                  next_protocol,
1119                                                  &rte_flow_item_tcp_mask,
1120                                                  error);
1121                         if (ret < 0)
1122                                 return ret;
1123                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1124                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1125                         break;
1126                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1127                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1128                                                             error);
1129                         if (ret < 0)
1130                                 return ret;
1131                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1132                         break;
1133                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1134                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
1135                                                                 item_flags,
1136                                                                 dev, error);
1137                         if (ret < 0)
1138                                 return ret;
1139                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1140                         break;
1141                 case RTE_FLOW_ITEM_TYPE_GRE:
1142                         ret = mlx5_flow_validate_item_gre(items, item_flags,
1143                                                           next_protocol, error);
1144                         if (ret < 0)
1145                                 return ret;
1146                         item_flags |= MLX5_FLOW_LAYER_GRE;
1147                         break;
1148                 case RTE_FLOW_ITEM_TYPE_MPLS:
1149                         ret = mlx5_flow_validate_item_mpls(items, item_flags,
1150                                                            next_protocol,
1151                                                            error);
1152                         if (ret < 0)
1153                                 return ret;
1154                         if (next_protocol != 0xff &&
1155                             next_protocol != IPPROTO_MPLS)
1156                                 return rte_flow_error_set
1157                                         (error, EINVAL,
1158                                          RTE_FLOW_ERROR_TYPE_ITEM, items,
1159                                          "protocol filtering not compatible"
1160                                          " with MPLS layer");
1161                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1162                         break;
1163                 default:
1164                         return rte_flow_error_set(error, ENOTSUP,
1165                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1166                                                   NULL, "item not supported");
1167                 }
1168         }
1169         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1170                 tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1171                 switch (actions->type) {
1172                 case RTE_FLOW_ACTION_TYPE_VOID:
1173                         break;
1174                 case RTE_FLOW_ACTION_TYPE_FLAG:
1175                         ret = mlx5_flow_validate_action_flag(action_flags,
1176                                                              attr,
1177                                                              error);
1178                         if (ret < 0)
1179                                 return ret;
1180                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1181                         break;
1182                 case RTE_FLOW_ACTION_TYPE_MARK:
1183                         ret = mlx5_flow_validate_action_mark(actions,
1184                                                              action_flags,
1185                                                              attr,
1186                                                              error);
1187                         if (ret < 0)
1188                                 return ret;
1189                         action_flags |= MLX5_FLOW_ACTION_MARK;
1190                         break;
1191                 case RTE_FLOW_ACTION_TYPE_DROP:
1192                         ret = mlx5_flow_validate_action_drop(action_flags,
1193                                                              attr,
1194                                                              error);
1195                         if (ret < 0)
1196                                 return ret;
1197                         action_flags |= MLX5_FLOW_ACTION_DROP;
1198                         break;
1199                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1200                         ret = mlx5_flow_validate_action_queue(actions,
1201                                                               action_flags, dev,
1202                                                               attr,
1203                                                               error);
1204                         if (ret < 0)
1205                                 return ret;
1206                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1207                         break;
1208                 case RTE_FLOW_ACTION_TYPE_RSS:
1209                         ret = mlx5_flow_validate_action_rss(actions,
1210                                                             action_flags, dev,
1211                                                             attr,
1212                                                             error);
1213                         if (ret < 0)
1214                                 return ret;
1215                         action_flags |= MLX5_FLOW_ACTION_RSS;
1216                         break;
1217                 case RTE_FLOW_ACTION_TYPE_COUNT:
1218                         ret = mlx5_flow_validate_action_count(dev, attr, error);
1219                         if (ret < 0)
1220                                 return ret;
1221                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1222                         break;
1223                 default:
1224                         return rte_flow_error_set(error, ENOTSUP,
1225                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1226                                                   actions,
1227                                                   "action not supported");
1228                 }
1229         }
1230         if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1231                 return rte_flow_error_set(error, EINVAL,
1232                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1233                                           "no fate action is found");
1234         return 0;
1235 }
1236
1237 /**
1238  * Calculate the required bytes that are needed for the action part of the verbs
1239  * flow, in addtion returns bit-fields with all the detected action, in order to
1240  * avoid another interation over the actions.
1241  *
1242  * @param[in] actions
1243  *   Pointer to the list of actions.
1244  * @param[out] action_flags
1245  *   Pointer to the detected actions.
1246  *
1247  * @return
1248  *   The size of the memory needed for all actions.
1249  */
1250 static int
1251 flow_verbs_get_actions_and_size(const struct rte_flow_action actions[],
1252                                 uint64_t *action_flags)
1253 {
1254         int size = 0;
1255         uint64_t detected_actions = 0;
1256
1257         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1258                 switch (actions->type) {
1259                 case RTE_FLOW_ACTION_TYPE_VOID:
1260                         break;
1261                 case RTE_FLOW_ACTION_TYPE_FLAG:
1262                         size += sizeof(struct ibv_flow_spec_action_tag);
1263                         detected_actions |= MLX5_FLOW_ACTION_FLAG;
1264                         break;
1265                 case RTE_FLOW_ACTION_TYPE_MARK:
1266                         size += sizeof(struct ibv_flow_spec_action_tag);
1267                         detected_actions |= MLX5_FLOW_ACTION_MARK;
1268                         break;
1269                 case RTE_FLOW_ACTION_TYPE_DROP:
1270                         size += sizeof(struct ibv_flow_spec_action_drop);
1271                         detected_actions |= MLX5_FLOW_ACTION_DROP;
1272                         break;
1273                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1274                         detected_actions |= MLX5_FLOW_ACTION_QUEUE;
1275                         break;
1276                 case RTE_FLOW_ACTION_TYPE_RSS:
1277                         detected_actions |= MLX5_FLOW_ACTION_RSS;
1278                         break;
1279                 case RTE_FLOW_ACTION_TYPE_COUNT:
1280 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_V42
1281                         size += sizeof(struct ibv_flow_spec_counter_action);
1282 #endif
1283                         detected_actions |= MLX5_FLOW_ACTION_COUNT;
1284                         break;
1285                 default:
1286                         break;
1287                 }
1288         }
1289         *action_flags = detected_actions;
1290         return size;
1291 }
1292
1293 /**
1294  * Calculate the required bytes that are needed for the item part of the verbs
1295  * flow, in addtion returns bit-fields with all the detected action, in order to
1296  * avoid another interation over the actions.
1297  *
1298  * @param[in] actions
1299  *   Pointer to the list of items.
1300  * @param[in, out] item_flags
1301  *   Pointer to the detected items.
1302  *
1303  * @return
1304  *   The size of the memory needed for all items.
1305  */
1306 static int
1307 flow_verbs_get_items_and_size(const struct rte_flow_item items[],
1308                               uint64_t *item_flags)
1309 {
1310         int size = 0;
1311         uint64_t detected_items = 0;
1312         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
1313
1314         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1315                 switch (items->type) {
1316                 case RTE_FLOW_ITEM_TYPE_VOID:
1317                         break;
1318                 case RTE_FLOW_ITEM_TYPE_ETH:
1319                         size += sizeof(struct ibv_flow_spec_eth);
1320                         detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1321                                                    MLX5_FLOW_LAYER_OUTER_L2;
1322                         break;
1323                 case RTE_FLOW_ITEM_TYPE_VLAN:
1324                         size += sizeof(struct ibv_flow_spec_eth);
1325                         detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1326                                                    MLX5_FLOW_LAYER_OUTER_VLAN;
1327                         break;
1328                 case RTE_FLOW_ITEM_TYPE_IPV4:
1329                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
1330                         detected_items |= tunnel ?
1331                                           MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1332                                           MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1333                         break;
1334                 case RTE_FLOW_ITEM_TYPE_IPV6:
1335                         size += sizeof(struct ibv_flow_spec_ipv6);
1336                         detected_items |= tunnel ?
1337                                           MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1338                                           MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1339                         break;
1340                 case RTE_FLOW_ITEM_TYPE_UDP:
1341                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1342                         detected_items |= tunnel ?
1343                                           MLX5_FLOW_LAYER_INNER_L4_UDP :
1344                                           MLX5_FLOW_LAYER_OUTER_L4_UDP;
1345                         break;
1346                 case RTE_FLOW_ITEM_TYPE_TCP:
1347                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1348                         detected_items |= tunnel ?
1349                                           MLX5_FLOW_LAYER_INNER_L4_TCP :
1350                                           MLX5_FLOW_LAYER_OUTER_L4_TCP;
1351                         break;
1352                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1353                         size += sizeof(struct ibv_flow_spec_tunnel);
1354                         detected_items |= MLX5_FLOW_LAYER_VXLAN;
1355                         break;
1356                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1357                         size += sizeof(struct ibv_flow_spec_tunnel);
1358                         detected_items |= MLX5_FLOW_LAYER_VXLAN_GPE;
1359                         break;
1360 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1361                 case RTE_FLOW_ITEM_TYPE_GRE:
1362                         size += sizeof(struct ibv_flow_spec_gre);
1363                         detected_items |= MLX5_FLOW_LAYER_GRE;
1364                         break;
1365                 case RTE_FLOW_ITEM_TYPE_MPLS:
1366                         size += sizeof(struct ibv_flow_spec_mpls);
1367                         detected_items |= MLX5_FLOW_LAYER_MPLS;
1368                         break;
1369 #else
1370                 case RTE_FLOW_ITEM_TYPE_GRE:
1371                         size += sizeof(struct ibv_flow_spec_tunnel);
1372                         detected_items |= MLX5_FLOW_LAYER_TUNNEL;
1373                         break;
1374 #endif
1375                 default:
1376                         break;
1377                 }
1378         }
1379         *item_flags = detected_items;
1380         return size;
1381 }
1382
1383 /**
1384  * Internal preparation function. Allocate mlx5_flow with the required size.
1385  * The required size is calculate based on the actions and items. This function
1386  * also returns the detected actions and items for later use.
1387  *
1388  * @param[in] attr
1389  *   Pointer to the flow attributes.
1390  * @param[in] items
1391  *   Pointer to the list of items.
1392  * @param[in] actions
1393  *   Pointer to the list of actions.
1394  * @param[out] item_flags
1395  *   Pointer to bit mask of all items detected.
1396  * @param[out] action_flags
1397  *   Pointer to bit mask of all actions detected.
1398  * @param[out] error
1399  *   Pointer to the error structure.
1400  *
1401  * @return
1402  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1403  *   is set.
1404  */
1405 static struct mlx5_flow *
1406 flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
1407                    const struct rte_flow_item items[],
1408                    const struct rte_flow_action actions[],
1409                    uint64_t *item_flags,
1410                    uint64_t *action_flags,
1411                    struct rte_flow_error *error)
1412 {
1413         uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
1414         struct mlx5_flow *flow;
1415
1416         size += flow_verbs_get_actions_and_size(actions, action_flags);
1417         size += flow_verbs_get_items_and_size(items, item_flags);
1418         flow = rte_calloc(__func__, 1, size, 0);
1419         if (!flow) {
1420                 rte_flow_error_set(error, ENOMEM,
1421                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1422                                    "not enough memory to create flow");
1423                 return NULL;
1424         }
1425         flow->verbs.attr = (void *)(flow + 1);
1426         flow->verbs.specs =
1427                 (uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
1428         return flow;
1429 }
1430
1431 /**
1432  * Fill the flow with verb spec.
1433  *
1434  * @param[in] dev
1435  *   Pointer to Ethernet device.
1436  * @param[in, out] dev_flow
1437  *   Pointer to the mlx5 flow.
1438  * @param[in] attr
1439  *   Pointer to the flow attributes.
1440  * @param[in] items
1441  *   Pointer to the list of items.
1442  * @param[in] actions
1443  *   Pointer to the list of actions.
1444  * @param[out] error
1445  *   Pointer to the error structure.
1446  *
1447  * @return
1448  *   0 on success, else a negative errno value otherwise and rte_ernno is set.
1449  */
1450 static int
1451 flow_verbs_translate(struct rte_eth_dev *dev,
1452                      struct mlx5_flow *dev_flow,
1453                      const struct rte_flow_attr *attr,
1454                      const struct rte_flow_item items[],
1455                      const struct rte_flow_action actions[],
1456                      struct rte_flow_error *error)
1457 {
1458         uint64_t action_flags = 0;
1459         uint64_t item_flags = 0;
1460         uint64_t priority = attr->priority;
1461         struct priv *priv = dev->data->dev_private;
1462
1463         if (priority == MLX5_FLOW_PRIO_RSVD)
1464                 priority = priv->config.flow_prio - 1;
1465         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1466                 int ret;
1467                 switch (actions->type) {
1468                 case RTE_FLOW_ACTION_TYPE_VOID:
1469                         break;
1470                 case RTE_FLOW_ACTION_TYPE_FLAG:
1471                         flow_verbs_translate_action_flag(actions,
1472                                                          &action_flags,
1473                                                          dev_flow);
1474                         break;
1475                 case RTE_FLOW_ACTION_TYPE_MARK:
1476                         flow_verbs_translate_action_mark(actions,
1477                                                          &action_flags,
1478                                                          dev_flow);
1479                         break;
1480                 case RTE_FLOW_ACTION_TYPE_DROP:
1481                         flow_verbs_translate_action_drop(&action_flags,
1482                                                          dev_flow);
1483                         break;
1484                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1485                         flow_verbs_translate_action_queue(actions,
1486                                                           &action_flags,
1487                                                           dev_flow);
1488                         break;
1489                 case RTE_FLOW_ACTION_TYPE_RSS:
1490                         flow_verbs_translate_action_rss(actions,
1491                                                         &action_flags,
1492                                                         dev_flow);
1493                         break;
1494                 case RTE_FLOW_ACTION_TYPE_COUNT:
1495                         ret = flow_verbs_translate_action_count(dev,
1496                                                                 actions,
1497                                                                 &action_flags,
1498                                                                 dev_flow,
1499                                                                 error);
1500                         if (ret < 0)
1501                                 return ret;
1502                         break;
1503                 default:
1504                         return rte_flow_error_set(error, ENOTSUP,
1505                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1506                                                   actions,
1507                                                   "action not supported");
1508                 }
1509         }
1510         dev_flow->flow->actions |= action_flags;
1511         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1512                 switch (items->type) {
1513                 case RTE_FLOW_ITEM_TYPE_VOID:
1514                         break;
1515                 case RTE_FLOW_ITEM_TYPE_ETH:
1516                         flow_verbs_translate_item_eth(items, &item_flags,
1517                                                       dev_flow);
1518                         break;
1519                 case RTE_FLOW_ITEM_TYPE_VLAN:
1520                         flow_verbs_translate_item_vlan(items, &item_flags,
1521                                                        dev_flow);
1522                         break;
1523                 case RTE_FLOW_ITEM_TYPE_IPV4:
1524                         flow_verbs_translate_item_ipv4(items, &item_flags,
1525                                                        dev_flow);
1526                         break;
1527                 case RTE_FLOW_ITEM_TYPE_IPV6:
1528                         flow_verbs_translate_item_ipv6(items, &item_flags,
1529                                                        dev_flow);
1530                         break;
1531                 case RTE_FLOW_ITEM_TYPE_UDP:
1532                         flow_verbs_translate_item_udp(items, &item_flags,
1533                                                       dev_flow);
1534                         break;
1535                 case RTE_FLOW_ITEM_TYPE_TCP:
1536                         flow_verbs_translate_item_tcp(items, &item_flags,
1537                                                       dev_flow);
1538                         break;
1539                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1540                         flow_verbs_translate_item_vxlan(items, &item_flags,
1541                                                         dev_flow);
1542                         break;
1543                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1544                         flow_verbs_translate_item_vxlan_gpe(items, &item_flags,
1545                                                             dev_flow);
1546                         break;
1547                 case RTE_FLOW_ITEM_TYPE_GRE:
1548                         flow_verbs_translate_item_gre(items, &item_flags,
1549                                                       dev_flow);
1550                         break;
1551                 case RTE_FLOW_ITEM_TYPE_MPLS:
1552                         flow_verbs_translate_item_mpls(items, &item_flags,
1553                                                        dev_flow);
1554                         break;
1555                 default:
1556                         return rte_flow_error_set(error, ENOTSUP,
1557                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1558                                                   NULL,
1559                                                   "item not supported");
1560                 }
1561         }
1562         dev_flow->verbs.attr->priority =
1563                 mlx5_flow_adjust_priority(dev, priority,
1564                                           dev_flow->verbs.attr->priority);
1565         return 0;
1566 }
1567
1568 /**
1569  * Remove the flow from the NIC but keeps it in memory.
1570  *
1571  * @param[in] dev
1572  *   Pointer to the Ethernet device structure.
1573  * @param[in, out] flow
1574  *   Pointer to flow structure.
1575  */
1576 static void
1577 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1578 {
1579         struct mlx5_flow_verbs *verbs;
1580         struct mlx5_flow *dev_flow;
1581
1582         if (!flow)
1583                 return;
1584         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1585                 verbs = &dev_flow->verbs;
1586                 if (verbs->flow) {
1587                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1588                         verbs->flow = NULL;
1589                 }
1590                 if (verbs->hrxq) {
1591                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1592                                 mlx5_hrxq_drop_release(dev);
1593                         else
1594                                 mlx5_hrxq_release(dev, verbs->hrxq);
1595                         verbs->hrxq = NULL;
1596                 }
1597         }
1598         if (flow->counter) {
1599                 flow_verbs_counter_release(flow->counter);
1600                 flow->counter = NULL;
1601         }
1602 }
1603
1604 /**
1605  * Remove the flow from the NIC and the memory.
1606  *
1607  * @param[in] dev
1608  *   Pointer to the Ethernet device structure.
1609  * @param[in, out] flow
1610  *   Pointer to flow structure.
1611  */
1612 static void
1613 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1614 {
1615         struct mlx5_flow *dev_flow;
1616
1617         if (!flow)
1618                 return;
1619         flow_verbs_remove(dev, flow);
1620         while (!LIST_EMPTY(&flow->dev_flows)) {
1621                 dev_flow = LIST_FIRST(&flow->dev_flows);
1622                 LIST_REMOVE(dev_flow, next);
1623                 rte_free(dev_flow);
1624         }
1625 }
1626
1627 /**
1628  * Apply the flow to the NIC.
1629  *
1630  * @param[in] dev
1631  *   Pointer to the Ethernet device structure.
1632  * @param[in, out] flow
1633  *   Pointer to flow structure.
1634  * @param[out] error
1635  *   Pointer to error structure.
1636  *
1637  * @return
1638  *   0 on success, a negative errno value otherwise and rte_errno is set.
1639  */
1640 static int
1641 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1642                  struct rte_flow_error *error)
1643 {
1644         struct mlx5_flow_verbs *verbs;
1645         struct mlx5_flow *dev_flow;
1646         int err;
1647
1648         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1649                 verbs = &dev_flow->verbs;
1650                 if (flow->actions & MLX5_FLOW_ACTION_DROP) {
1651                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
1652                         if (!verbs->hrxq) {
1653                                 rte_flow_error_set
1654                                         (error, errno,
1655                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1656                                          "cannot get drop hash queue");
1657                                 goto error;
1658                         }
1659                 } else {
1660                         struct mlx5_hrxq *hrxq;
1661
1662                         hrxq = mlx5_hrxq_get(dev, flow->key,
1663                                              MLX5_RSS_HASH_KEY_LEN,
1664                                              verbs->hash_fields,
1665                                              (*flow->queue),
1666                                              flow->rss.queue_num);
1667                         if (!hrxq)
1668                                 hrxq = mlx5_hrxq_new(dev, flow->key,
1669                                                      MLX5_RSS_HASH_KEY_LEN,
1670                                                      verbs->hash_fields,
1671                                                      (*flow->queue),
1672                                                      flow->rss.queue_num,
1673                                                      !!(flow->layers &
1674                                                       MLX5_FLOW_LAYER_TUNNEL));
1675                         if (!hrxq) {
1676                                 rte_flow_error_set
1677                                         (error, rte_errno,
1678                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1679                                          "cannot get hash queue");
1680                                 goto error;
1681                         }
1682                         verbs->hrxq = hrxq;
1683                 }
1684                 verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
1685                                                      verbs->attr);
1686                 if (!verbs->flow) {
1687                         rte_flow_error_set(error, errno,
1688                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1689                                            NULL,
1690                                            "hardware refuses to create flow");
1691                         goto error;
1692                 }
1693         }
1694         return 0;
1695 error:
1696         err = rte_errno; /* Save rte_errno before cleanup. */
1697         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1698                 verbs = &dev_flow->verbs;
1699                 if (verbs->hrxq) {
1700                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1701                                 mlx5_hrxq_drop_release(dev);
1702                         else
1703                                 mlx5_hrxq_release(dev, verbs->hrxq);
1704                         verbs->hrxq = NULL;
1705                 }
1706         }
1707         rte_errno = err; /* Restore rte_errno. */
1708         return -rte_errno;
1709 }
1710
1711 /**
1712  * Query a flow.
1713  *
1714  * @see rte_flow_query()
1715  * @see rte_flow_ops
1716  */
1717 static int
1718 flow_verbs_query(struct rte_eth_dev *dev,
1719                  struct rte_flow *flow,
1720                  const struct rte_flow_action *actions,
1721                  void *data,
1722                  struct rte_flow_error *error)
1723 {
1724         int ret = -EINVAL;
1725
1726         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1727                 switch (actions->type) {
1728                 case RTE_FLOW_ACTION_TYPE_VOID:
1729                         break;
1730                 case RTE_FLOW_ACTION_TYPE_COUNT:
1731                         ret = flow_verbs_counter_query(dev, flow, data, error);
1732                         break;
1733                 default:
1734                         return rte_flow_error_set(error, ENOTSUP,
1735                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1736                                                   actions,
1737                                                   "action not supported");
1738                 }
1739         }
1740         return ret;
1741 }
1742
1743 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1744         .validate = flow_verbs_validate,
1745         .prepare = flow_verbs_prepare,
1746         .translate = flow_verbs_translate,
1747         .apply = flow_verbs_apply,
1748         .remove = flow_verbs_remove,
1749         .destroy = flow_verbs_destroy,
1750         .query = flow_verbs_query,
1751 };