net: add rte prefix to IP defines
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_flow.h"
32 #include "mlx5_glue.h"
33 #include "mlx5_prm.h"
34 #include "mlx5_rxtx.h"
35
36 #define VERBS_SPEC_INNER(item_flags) \
37         (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
38
39 /**
40  * Create Verbs flow counter with Verbs library.
41  *
42  * @param[in] dev
43  *   Pointer to the Ethernet device structure.
44  * @param[in, out] counter
45  *   mlx5 flow counter object, contains the counter id,
46  *   handle of created Verbs flow counter is returned
47  *   in cs field (if counters are supported).
48  *
49  * @return
50  *   0 On success else a negative errno value is returned
51  *   and rte_errno is set.
52  */
53 static int
54 flow_verbs_counter_create(struct rte_eth_dev *dev,
55                           struct mlx5_flow_counter *counter)
56 {
57 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
58         struct mlx5_priv *priv = dev->data->dev_private;
59         struct ibv_context *ctx = priv->sh->ctx;
60         struct ibv_counter_set_init_attr init = {
61                          .counter_set_id = counter->id};
62
63         counter->cs = mlx5_glue->create_counter_set(ctx, &init);
64         if (!counter->cs) {
65                 rte_errno = ENOTSUP;
66                 return -ENOTSUP;
67         }
68         return 0;
69 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
70         struct mlx5_priv *priv = dev->data->dev_private;
71         struct ibv_context *ctx = priv->sh->ctx;
72         struct ibv_counters_init_attr init = {0};
73         struct ibv_counter_attach_attr attach;
74         int ret;
75
76         memset(&attach, 0, sizeof(attach));
77         counter->cs = mlx5_glue->create_counters(ctx, &init);
78         if (!counter->cs) {
79                 rte_errno = ENOTSUP;
80                 return -ENOTSUP;
81         }
82         attach.counter_desc = IBV_COUNTER_PACKETS;
83         attach.index = 0;
84         ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
85         if (!ret) {
86                 attach.counter_desc = IBV_COUNTER_BYTES;
87                 attach.index = 1;
88                 ret = mlx5_glue->attach_counters
89                                         (counter->cs, &attach, NULL);
90         }
91         if (ret) {
92                 claim_zero(mlx5_glue->destroy_counters(counter->cs));
93                 counter->cs = NULL;
94                 rte_errno = ret;
95                 return -ret;
96         }
97         return 0;
98 #else
99         (void)dev;
100         (void)counter;
101         rte_errno = ENOTSUP;
102         return -ENOTSUP;
103 #endif
104 }
105
106 /**
107  * Get a flow counter.
108  *
109  * @param[in] dev
110  *   Pointer to the Ethernet device structure.
111  * @param[in] shared
112  *   Indicate if this counter is shared with other flows.
113  * @param[in] id
114  *   Counter identifier.
115  *
116  * @return
117  *   A pointer to the counter, NULL otherwise and rte_errno is set.
118  */
119 static struct mlx5_flow_counter *
120 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
121 {
122         struct mlx5_priv *priv = dev->data->dev_private;
123         struct mlx5_flow_counter *cnt;
124         int ret;
125
126         if (shared) {
127                 LIST_FOREACH(cnt, &priv->flow_counters, next) {
128                         if (cnt->shared && cnt->id == id) {
129                                 cnt->ref_cnt++;
130                                 return cnt;
131                         }
132                 }
133         }
134         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
135         if (!cnt) {
136                 rte_errno = ENOMEM;
137                 return NULL;
138         }
139         cnt->id = id;
140         cnt->shared = shared;
141         cnt->ref_cnt = 1;
142         cnt->hits = 0;
143         cnt->bytes = 0;
144         /* Create counter with Verbs. */
145         ret = flow_verbs_counter_create(dev, cnt);
146         if (!ret) {
147                 LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
148                 return cnt;
149         }
150         /* Some error occurred in Verbs library. */
151         rte_free(cnt);
152         rte_errno = -ret;
153         return NULL;
154 }
155
156 /**
157  * Release a flow counter.
158  *
159  * @param[in] counter
160  *   Pointer to the counter handler.
161  */
162 static void
163 flow_verbs_counter_release(struct mlx5_flow_counter *counter)
164 {
165         if (--counter->ref_cnt == 0) {
166 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
167                 claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
168 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
169                 claim_zero(mlx5_glue->destroy_counters(counter->cs));
170 #endif
171                 LIST_REMOVE(counter, next);
172                 rte_free(counter);
173         }
174 }
175
176 /**
177  * Query a flow counter via Verbs library call.
178  *
179  * @see rte_flow_query()
180  * @see rte_flow_ops
181  */
182 static int
183 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
184                          struct rte_flow *flow, void *data,
185                          struct rte_flow_error *error)
186 {
187 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
188         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
189         if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
190                 struct rte_flow_query_count *qc = data;
191                 uint64_t counters[2] = {0, 0};
192 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
193                 struct ibv_query_counter_set_attr query_cs_attr = {
194                         .cs = flow->counter->cs,
195                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
196                 };
197                 struct ibv_counter_set_data query_out = {
198                         .out = counters,
199                         .outlen = 2 * sizeof(uint64_t),
200                 };
201                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
202                                                        &query_out);
203 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
204                 int err = mlx5_glue->query_counters
205                                (flow->counter->cs, counters,
206                                 RTE_DIM(counters),
207                                 IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
208 #endif
209                 if (err)
210                         return rte_flow_error_set
211                                 (error, err,
212                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
213                                  NULL,
214                                  "cannot read counter");
215                 qc->hits_set = 1;
216                 qc->bytes_set = 1;
217                 qc->hits = counters[0] - flow->counter->hits;
218                 qc->bytes = counters[1] - flow->counter->bytes;
219                 if (qc->reset) {
220                         flow->counter->hits = counters[0];
221                         flow->counter->bytes = counters[1];
222                 }
223                 return 0;
224         }
225         return rte_flow_error_set(error, EINVAL,
226                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
227                                   NULL,
228                                   "flow does not have counter");
229 #else
230         (void)flow;
231         (void)data;
232         return rte_flow_error_set(error, ENOTSUP,
233                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
234                                   NULL,
235                                   "counters are not available");
236 #endif
237 }
238
239 /**
240  * Add a verbs item specification into @p verbs.
241  *
242  * @param[out] verbs
243  *   Pointer to verbs structure.
244  * @param[in] src
245  *   Create specification.
246  * @param[in] size
247  *   Size in bytes of the specification to copy.
248  */
249 static void
250 flow_verbs_spec_add(struct mlx5_flow_verbs *verbs, void *src, unsigned int size)
251 {
252         void *dst;
253
254         if (!verbs)
255                 return;
256         assert(verbs->specs);
257         dst = (void *)(verbs->specs + verbs->size);
258         memcpy(dst, src, size);
259         ++verbs->attr->num_of_specs;
260         verbs->size += size;
261 }
262
263 /**
264  * Convert the @p item into a Verbs specification. This function assumes that
265  * the input is valid and that there is space to insert the requested item
266  * into the flow.
267  *
268  * @param[in, out] dev_flow
269  *   Pointer to dev_flow structure.
270  * @param[in] item
271  *   Item specification.
272  * @param[in] item_flags
273  *   Parsed item flags.
274  */
275 static void
276 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
277                               const struct rte_flow_item *item,
278                               uint64_t item_flags)
279 {
280         const struct rte_flow_item_eth *spec = item->spec;
281         const struct rte_flow_item_eth *mask = item->mask;
282         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
283         struct ibv_flow_spec_eth eth = {
284                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
285                 .size = size,
286         };
287
288         if (!mask)
289                 mask = &rte_flow_item_eth_mask;
290         if (spec) {
291                 unsigned int i;
292
293                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes,
294                         RTE_ETHER_ADDR_LEN);
295                 memcpy(&eth.val.src_mac, spec->src.addr_bytes,
296                         RTE_ETHER_ADDR_LEN);
297                 eth.val.ether_type = spec->type;
298                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes,
299                         RTE_ETHER_ADDR_LEN);
300                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes,
301                         RTE_ETHER_ADDR_LEN);
302                 eth.mask.ether_type = mask->type;
303                 /* Remove unwanted bits from values. */
304                 for (i = 0; i < RTE_ETHER_ADDR_LEN; ++i) {
305                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
306                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
307                 }
308                 eth.val.ether_type &= eth.mask.ether_type;
309         }
310         flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
311 }
312
313 /**
314  * Update the VLAN tag in the Verbs Ethernet specification.
315  * This function assumes that the input is valid and there is space to add
316  * the requested item.
317  *
318  * @param[in, out] attr
319  *   Pointer to Verbs attributes structure.
320  * @param[in] eth
321  *   Verbs structure containing the VLAN information to copy.
322  */
323 static void
324 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
325                             struct ibv_flow_spec_eth *eth)
326 {
327         unsigned int i;
328         const enum ibv_flow_spec_type search = eth->type;
329         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
330                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
331
332         for (i = 0; i != attr->num_of_specs; ++i) {
333                 if (hdr->type == search) {
334                         struct ibv_flow_spec_eth *e =
335                                 (struct ibv_flow_spec_eth *)hdr;
336
337                         e->val.vlan_tag = eth->val.vlan_tag;
338                         e->mask.vlan_tag = eth->mask.vlan_tag;
339                         e->val.ether_type = eth->val.ether_type;
340                         e->mask.ether_type = eth->mask.ether_type;
341                         break;
342                 }
343                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
344         }
345 }
346
347 /**
348  * Convert the @p item into a Verbs specification. This function assumes that
349  * the input is valid and that there is space to insert the requested item
350  * into the flow.
351  *
352  * @param[in, out] dev_flow
353  *   Pointer to dev_flow structure.
354  * @param[in] item
355  *   Item specification.
356  * @param[in] item_flags
357  *   Parsed item flags.
358  */
359 static void
360 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
361                                const struct rte_flow_item *item,
362                                uint64_t item_flags)
363 {
364         const struct rte_flow_item_vlan *spec = item->spec;
365         const struct rte_flow_item_vlan *mask = item->mask;
366         unsigned int size = sizeof(struct ibv_flow_spec_eth);
367         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
368         struct ibv_flow_spec_eth eth = {
369                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
370                 .size = size,
371         };
372         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
373                                       MLX5_FLOW_LAYER_OUTER_L2;
374
375         if (!mask)
376                 mask = &rte_flow_item_vlan_mask;
377         if (spec) {
378                 eth.val.vlan_tag = spec->tci;
379                 eth.mask.vlan_tag = mask->tci;
380                 eth.val.vlan_tag &= eth.mask.vlan_tag;
381                 eth.val.ether_type = spec->inner_type;
382                 eth.mask.ether_type = mask->inner_type;
383                 eth.val.ether_type &= eth.mask.ether_type;
384         }
385         if (!(item_flags & l2m))
386                 flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
387         else
388                 flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
389 }
390
391 /**
392  * Convert the @p item into a Verbs specification. This function assumes that
393  * the input is valid and that there is space to insert the requested item
394  * into the flow.
395  *
396  * @param[in, out] dev_flow
397  *   Pointer to dev_flow structure.
398  * @param[in] item
399  *   Item specification.
400  * @param[in] item_flags
401  *   Parsed item flags.
402  */
403 static void
404 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
405                                const struct rte_flow_item *item,
406                                uint64_t item_flags)
407 {
408         const struct rte_flow_item_ipv4 *spec = item->spec;
409         const struct rte_flow_item_ipv4 *mask = item->mask;
410         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
411         struct ibv_flow_spec_ipv4_ext ipv4 = {
412                 .type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
413                 .size = size,
414         };
415
416         if (!mask)
417                 mask = &rte_flow_item_ipv4_mask;
418         if (spec) {
419                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
420                         .src_ip = spec->hdr.src_addr,
421                         .dst_ip = spec->hdr.dst_addr,
422                         .proto = spec->hdr.next_proto_id,
423                         .tos = spec->hdr.type_of_service,
424                 };
425                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
426                         .src_ip = mask->hdr.src_addr,
427                         .dst_ip = mask->hdr.dst_addr,
428                         .proto = mask->hdr.next_proto_id,
429                         .tos = mask->hdr.type_of_service,
430                 };
431                 /* Remove unwanted bits from values. */
432                 ipv4.val.src_ip &= ipv4.mask.src_ip;
433                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
434                 ipv4.val.proto &= ipv4.mask.proto;
435                 ipv4.val.tos &= ipv4.mask.tos;
436         }
437         flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
438 }
439
440 /**
441  * Convert the @p item into a Verbs specification. This function assumes that
442  * the input is valid and that there is space to insert the requested item
443  * into the flow.
444  *
445  * @param[in, out] dev_flow
446  *   Pointer to dev_flow structure.
447  * @param[in] item
448  *   Item specification.
449  * @param[in] item_flags
450  *   Parsed item flags.
451  */
452 static void
453 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
454                                const struct rte_flow_item *item,
455                                uint64_t item_flags)
456 {
457         const struct rte_flow_item_ipv6 *spec = item->spec;
458         const struct rte_flow_item_ipv6 *mask = item->mask;
459         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
460         struct ibv_flow_spec_ipv6 ipv6 = {
461                 .type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
462                 .size = size,
463         };
464
465         if (!mask)
466                 mask = &rte_flow_item_ipv6_mask;
467         if (spec) {
468                 unsigned int i;
469                 uint32_t vtc_flow_val;
470                 uint32_t vtc_flow_mask;
471
472                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
473                        RTE_DIM(ipv6.val.src_ip));
474                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
475                        RTE_DIM(ipv6.val.dst_ip));
476                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
477                        RTE_DIM(ipv6.mask.src_ip));
478                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
479                        RTE_DIM(ipv6.mask.dst_ip));
480                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
481                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
482                 ipv6.val.flow_label =
483                         rte_cpu_to_be_32((vtc_flow_val & RTE_IPV6_HDR_FL_MASK) >>
484                                          RTE_IPV6_HDR_FL_SHIFT);
485                 ipv6.val.traffic_class = (vtc_flow_val & RTE_IPV6_HDR_TC_MASK) >>
486                                          RTE_IPV6_HDR_TC_SHIFT;
487                 ipv6.val.next_hdr = spec->hdr.proto;
488                 ipv6.val.hop_limit = spec->hdr.hop_limits;
489                 ipv6.mask.flow_label =
490                         rte_cpu_to_be_32((vtc_flow_mask & RTE_IPV6_HDR_FL_MASK) >>
491                                          RTE_IPV6_HDR_FL_SHIFT);
492                 ipv6.mask.traffic_class = (vtc_flow_mask & RTE_IPV6_HDR_TC_MASK) >>
493                                           RTE_IPV6_HDR_TC_SHIFT;
494                 ipv6.mask.next_hdr = mask->hdr.proto;
495                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
496                 /* Remove unwanted bits from values. */
497                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
498                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
499                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
500                 }
501                 ipv6.val.flow_label &= ipv6.mask.flow_label;
502                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
503                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
504                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
505         }
506         flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
507 }
508
509 /**
510  * Convert the @p item into a Verbs specification. This function assumes that
511  * the input is valid and that there is space to insert the requested item
512  * into the flow.
513  *
514  * @param[in, out] dev_flow
515  *   Pointer to dev_flow structure.
516  * @param[in] item
517  *   Item specification.
518  * @param[in] item_flags
519  *   Parsed item flags.
520  */
521 static void
522 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
523                               const struct rte_flow_item *item,
524                               uint64_t item_flags __rte_unused)
525 {
526         const struct rte_flow_item_tcp *spec = item->spec;
527         const struct rte_flow_item_tcp *mask = item->mask;
528         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
529         struct ibv_flow_spec_tcp_udp tcp = {
530                 .type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
531                 .size = size,
532         };
533
534         if (!mask)
535                 mask = &rte_flow_item_tcp_mask;
536         if (spec) {
537                 tcp.val.dst_port = spec->hdr.dst_port;
538                 tcp.val.src_port = spec->hdr.src_port;
539                 tcp.mask.dst_port = mask->hdr.dst_port;
540                 tcp.mask.src_port = mask->hdr.src_port;
541                 /* Remove unwanted bits from values. */
542                 tcp.val.src_port &= tcp.mask.src_port;
543                 tcp.val.dst_port &= tcp.mask.dst_port;
544         }
545         flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
546 }
547
548 /**
549  * Convert the @p item into a Verbs specification. This function assumes that
550  * the input is valid and that there is space to insert the requested item
551  * into the flow.
552  *
553  * @param[in, out] dev_flow
554  *   Pointer to dev_flow structure.
555  * @param[in] item
556  *   Item specification.
557  * @param[in] item_flags
558  *   Parsed item flags.
559  */
560 static void
561 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
562                               const struct rte_flow_item *item,
563                               uint64_t item_flags __rte_unused)
564 {
565         const struct rte_flow_item_udp *spec = item->spec;
566         const struct rte_flow_item_udp *mask = item->mask;
567         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
568         struct ibv_flow_spec_tcp_udp udp = {
569                 .type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
570                 .size = size,
571         };
572
573         if (!mask)
574                 mask = &rte_flow_item_udp_mask;
575         if (spec) {
576                 udp.val.dst_port = spec->hdr.dst_port;
577                 udp.val.src_port = spec->hdr.src_port;
578                 udp.mask.dst_port = mask->hdr.dst_port;
579                 udp.mask.src_port = mask->hdr.src_port;
580                 /* Remove unwanted bits from values. */
581                 udp.val.src_port &= udp.mask.src_port;
582                 udp.val.dst_port &= udp.mask.dst_port;
583         }
584         flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
585 }
586
587 /**
588  * Convert the @p item into a Verbs specification. This function assumes that
589  * the input is valid and that there is space to insert the requested item
590  * into the flow.
591  *
592  * @param[in, out] dev_flow
593  *   Pointer to dev_flow structure.
594  * @param[in] item
595  *   Item specification.
596  * @param[in] item_flags
597  *   Parsed item flags.
598  */
599 static void
600 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
601                                 const struct rte_flow_item *item,
602                                 uint64_t item_flags __rte_unused)
603 {
604         const struct rte_flow_item_vxlan *spec = item->spec;
605         const struct rte_flow_item_vxlan *mask = item->mask;
606         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
607         struct ibv_flow_spec_tunnel vxlan = {
608                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
609                 .size = size,
610         };
611         union vni {
612                 uint32_t vlan_id;
613                 uint8_t vni[4];
614         } id = { .vlan_id = 0, };
615
616         if (!mask)
617                 mask = &rte_flow_item_vxlan_mask;
618         if (spec) {
619                 memcpy(&id.vni[1], spec->vni, 3);
620                 vxlan.val.tunnel_id = id.vlan_id;
621                 memcpy(&id.vni[1], mask->vni, 3);
622                 vxlan.mask.tunnel_id = id.vlan_id;
623                 /* Remove unwanted bits from values. */
624                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
625         }
626         flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
627 }
628
629 /**
630  * Convert the @p item into a Verbs specification. This function assumes that
631  * the input is valid and that there is space to insert the requested item
632  * into the flow.
633  *
634  * @param[in, out] dev_flow
635  *   Pointer to dev_flow structure.
636  * @param[in] item
637  *   Item specification.
638  * @param[in] item_flags
639  *   Parsed item flags.
640  */
641 static void
642 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
643                                     const struct rte_flow_item *item,
644                                     uint64_t item_flags __rte_unused)
645 {
646         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
647         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
648         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
649         struct ibv_flow_spec_tunnel vxlan_gpe = {
650                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
651                 .size = size,
652         };
653         union vni {
654                 uint32_t vlan_id;
655                 uint8_t vni[4];
656         } id = { .vlan_id = 0, };
657
658         if (!mask)
659                 mask = &rte_flow_item_vxlan_gpe_mask;
660         if (spec) {
661                 memcpy(&id.vni[1], spec->vni, 3);
662                 vxlan_gpe.val.tunnel_id = id.vlan_id;
663                 memcpy(&id.vni[1], mask->vni, 3);
664                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
665                 /* Remove unwanted bits from values. */
666                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
667         }
668         flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
669 }
670
671 /**
672  * Update the protocol in Verbs IPv4/IPv6 spec.
673  *
674  * @param[in, out] attr
675  *   Pointer to Verbs attributes structure.
676  * @param[in] search
677  *   Specification type to search in order to update the IP protocol.
678  * @param[in] protocol
679  *   Protocol value to set if none is present in the specification.
680  */
681 static void
682 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
683                                        enum ibv_flow_spec_type search,
684                                        uint8_t protocol)
685 {
686         unsigned int i;
687         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
688                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
689
690         if (!attr)
691                 return;
692         for (i = 0; i != attr->num_of_specs; ++i) {
693                 if (hdr->type == search) {
694                         union {
695                                 struct ibv_flow_spec_ipv4_ext *ipv4;
696                                 struct ibv_flow_spec_ipv6 *ipv6;
697                         } ip;
698
699                         switch (search) {
700                         case IBV_FLOW_SPEC_IPV4_EXT:
701                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
702                                 if (!ip.ipv4->val.proto) {
703                                         ip.ipv4->val.proto = protocol;
704                                         ip.ipv4->mask.proto = 0xff;
705                                 }
706                                 break;
707                         case IBV_FLOW_SPEC_IPV6:
708                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
709                                 if (!ip.ipv6->val.next_hdr) {
710                                         ip.ipv6->val.next_hdr = protocol;
711                                         ip.ipv6->mask.next_hdr = 0xff;
712                                 }
713                                 break;
714                         default:
715                                 break;
716                         }
717                         break;
718                 }
719                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
720         }
721 }
722
723 /**
724  * Convert the @p item into a Verbs specification. This function assumes that
725  * the input is valid and that there is space to insert the requested item
726  * into the flow.
727  *
728  * @param[in, out] dev_flow
729  *   Pointer to dev_flow structure.
730  * @param[in] item
731  *   Item specification.
732  * @param[in] item_flags
733  *   Parsed item flags.
734  */
735 static void
736 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
737                               const struct rte_flow_item *item __rte_unused,
738                               uint64_t item_flags)
739 {
740         struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
741 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
742         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
743         struct ibv_flow_spec_tunnel tunnel = {
744                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
745                 .size = size,
746         };
747 #else
748         const struct rte_flow_item_gre *spec = item->spec;
749         const struct rte_flow_item_gre *mask = item->mask;
750         unsigned int size = sizeof(struct ibv_flow_spec_gre);
751         struct ibv_flow_spec_gre tunnel = {
752                 .type = IBV_FLOW_SPEC_GRE,
753                 .size = size,
754         };
755
756         if (!mask)
757                 mask = &rte_flow_item_gre_mask;
758         if (spec) {
759                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
760                 tunnel.val.protocol = spec->protocol;
761                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
762                 tunnel.mask.protocol = mask->protocol;
763                 /* Remove unwanted bits from values. */
764                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
765                 tunnel.val.protocol &= tunnel.mask.protocol;
766                 tunnel.val.key &= tunnel.mask.key;
767         }
768 #endif
769         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
770                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
771                                                        IBV_FLOW_SPEC_IPV4_EXT,
772                                                        IPPROTO_GRE);
773         else
774                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
775                                                        IBV_FLOW_SPEC_IPV6,
776                                                        IPPROTO_GRE);
777         flow_verbs_spec_add(verbs, &tunnel, size);
778 }
779
780 /**
781  * Convert the @p action into a Verbs specification. This function assumes that
782  * the input is valid and that there is space to insert the requested action
783  * into the flow. This function also return the action that was added.
784  *
785  * @param[in, out] dev_flow
786  *   Pointer to dev_flow structure.
787  * @param[in] item
788  *   Item specification.
789  * @param[in] item_flags
790  *   Parsed item flags.
791  */
792 static void
793 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
794                                const struct rte_flow_item *item __rte_unused,
795                                uint64_t item_flags __rte_unused)
796 {
797 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
798         const struct rte_flow_item_mpls *spec = item->spec;
799         const struct rte_flow_item_mpls *mask = item->mask;
800         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
801         struct ibv_flow_spec_mpls mpls = {
802                 .type = IBV_FLOW_SPEC_MPLS,
803                 .size = size,
804         };
805
806         if (!mask)
807                 mask = &rte_flow_item_mpls_mask;
808         if (spec) {
809                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
810                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
811                 /* Remove unwanted bits from values.  */
812                 mpls.val.label &= mpls.mask.label;
813         }
814         flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
815 #endif
816 }
817
818 /**
819  * Convert the @p action into a Verbs specification. This function assumes that
820  * the input is valid and that there is space to insert the requested action
821  * into the flow.
822  *
823  * @param[in] dev_flow
824  *   Pointer to mlx5_flow.
825  * @param[in] action
826  *   Action configuration.
827  */
828 static void
829 flow_verbs_translate_action_drop
830         (struct mlx5_flow *dev_flow,
831          const struct rte_flow_action *action __rte_unused)
832 {
833         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
834         struct ibv_flow_spec_action_drop drop = {
835                         .type = IBV_FLOW_SPEC_ACTION_DROP,
836                         .size = size,
837         };
838
839         flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
840 }
841
842 /**
843  * Convert the @p action into a Verbs specification. This function assumes that
844  * the input is valid and that there is space to insert the requested action
845  * into the flow.
846  *
847  * @param[in] dev_flow
848  *   Pointer to mlx5_flow.
849  * @param[in] action
850  *   Action configuration.
851  */
852 static void
853 flow_verbs_translate_action_queue(struct mlx5_flow *dev_flow,
854                                   const struct rte_flow_action *action)
855 {
856         const struct rte_flow_action_queue *queue = action->conf;
857         struct rte_flow *flow = dev_flow->flow;
858
859         if (flow->queue)
860                 (*flow->queue)[0] = queue->index;
861         flow->rss.queue_num = 1;
862 }
863
864 /**
865  * Convert the @p action into a Verbs specification. This function assumes that
866  * the input is valid and that there is space to insert the requested action
867  * into the flow.
868  *
869  * @param[in] action
870  *   Action configuration.
871  * @param[in, out] action_flags
872  *   Pointer to the detected actions.
873  * @param[in] dev_flow
874  *   Pointer to mlx5_flow.
875  */
876 static void
877 flow_verbs_translate_action_rss(struct mlx5_flow *dev_flow,
878                                 const struct rte_flow_action *action)
879 {
880         const struct rte_flow_action_rss *rss = action->conf;
881         const uint8_t *rss_key;
882         struct rte_flow *flow = dev_flow->flow;
883
884         if (flow->queue)
885                 memcpy((*flow->queue), rss->queue,
886                        rss->queue_num * sizeof(uint16_t));
887         flow->rss.queue_num = rss->queue_num;
888         /* NULL RSS key indicates default RSS key. */
889         rss_key = !rss->key ? rss_hash_default_key : rss->key;
890         memcpy(flow->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
891         /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
892         flow->rss.types = !rss->types ? ETH_RSS_IP : rss->types;
893         flow->rss.level = rss->level;
894 }
895
896 /**
897  * Convert the @p action into a Verbs specification. This function assumes that
898  * the input is valid and that there is space to insert the requested action
899  * into the flow.
900  *
901  * @param[in] dev_flow
902  *   Pointer to mlx5_flow.
903  * @param[in] action
904  *   Action configuration.
905  */
906 static void
907 flow_verbs_translate_action_flag
908         (struct mlx5_flow *dev_flow,
909          const struct rte_flow_action *action __rte_unused)
910 {
911         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
912         struct ibv_flow_spec_action_tag tag = {
913                 .type = IBV_FLOW_SPEC_ACTION_TAG,
914                 .size = size,
915                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
916         };
917
918         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
919 }
920
921 /**
922  * Convert the @p action into a Verbs specification. This function assumes that
923  * the input is valid and that there is space to insert the requested action
924  * into the flow.
925  *
926  * @param[in] dev_flow
927  *   Pointer to mlx5_flow.
928  * @param[in] action
929  *   Action configuration.
930  */
931 static void
932 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
933                                  const struct rte_flow_action *action)
934 {
935         const struct rte_flow_action_mark *mark = action->conf;
936         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
937         struct ibv_flow_spec_action_tag tag = {
938                 .type = IBV_FLOW_SPEC_ACTION_TAG,
939                 .size = size,
940                 .tag_id = mlx5_flow_mark_set(mark->id),
941         };
942
943         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
944 }
945
946 /**
947  * Convert the @p action into a Verbs specification. This function assumes that
948  * the input is valid and that there is space to insert the requested action
949  * into the flow.
950  *
951  * @param[in] dev
952  *   Pointer to the Ethernet device structure.
953  * @param[in] action
954  *   Action configuration.
955  * @param[in] dev_flow
956  *   Pointer to mlx5_flow.
957  * @param[out] error
958  *   Pointer to error structure.
959  *
960  * @return
961  *   0 On success else a negative errno value is returned and rte_errno is set.
962  */
963 static int
964 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
965                                   const struct rte_flow_action *action,
966                                   struct rte_eth_dev *dev,
967                                   struct rte_flow_error *error)
968 {
969         const struct rte_flow_action_count *count = action->conf;
970         struct rte_flow *flow = dev_flow->flow;
971 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
972         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
973         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
974         struct ibv_flow_spec_counter_action counter = {
975                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
976                 .size = size,
977         };
978 #endif
979
980         if (!flow->counter) {
981                 flow->counter = flow_verbs_counter_new(dev, count->shared,
982                                                        count->id);
983                 if (!flow->counter)
984                         return rte_flow_error_set(error, rte_errno,
985                                                   RTE_FLOW_ERROR_TYPE_ACTION,
986                                                   action,
987                                                   "cannot get counter"
988                                                   " context.");
989         }
990 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
991         counter.counter_set_handle = flow->counter->cs->handle;
992         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
993 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
994         counter.counters = flow->counter->cs;
995         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
996 #endif
997         return 0;
998 }
999
1000 /**
1001  * Internal validation function. For validating both actions and items.
1002  *
1003  * @param[in] dev
1004  *   Pointer to the Ethernet device structure.
1005  * @param[in] attr
1006  *   Pointer to the flow attributes.
1007  * @param[in] items
1008  *   Pointer to the list of items.
1009  * @param[in] actions
1010  *   Pointer to the list of actions.
1011  * @param[out] error
1012  *   Pointer to the error structure.
1013  *
1014  * @return
1015  *   0 on success, a negative errno value otherwise and rte_errno is set.
1016  */
1017 static int
1018 flow_verbs_validate(struct rte_eth_dev *dev,
1019                     const struct rte_flow_attr *attr,
1020                     const struct rte_flow_item items[],
1021                     const struct rte_flow_action actions[],
1022                     struct rte_flow_error *error)
1023 {
1024         int ret;
1025         uint64_t action_flags = 0;
1026         uint64_t item_flags = 0;
1027         uint64_t last_item = 0;
1028         uint8_t next_protocol = 0xff;
1029
1030         if (items == NULL)
1031                 return -1;
1032         ret = mlx5_flow_validate_attributes(dev, attr, error);
1033         if (ret < 0)
1034                 return ret;
1035         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1036                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1037                 int ret = 0;
1038
1039                 switch (items->type) {
1040                 case RTE_FLOW_ITEM_TYPE_VOID:
1041                         break;
1042                 case RTE_FLOW_ITEM_TYPE_ETH:
1043                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1044                                                           error);
1045                         if (ret < 0)
1046                                 return ret;
1047                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1048                                              MLX5_FLOW_LAYER_OUTER_L2;
1049                         break;
1050                 case RTE_FLOW_ITEM_TYPE_VLAN:
1051                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1052                                                            error);
1053                         if (ret < 0)
1054                                 return ret;
1055                         last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1056                                               MLX5_FLOW_LAYER_INNER_VLAN) :
1057                                              (MLX5_FLOW_LAYER_OUTER_L2 |
1058                                               MLX5_FLOW_LAYER_OUTER_VLAN);
1059                         break;
1060                 case RTE_FLOW_ITEM_TYPE_IPV4:
1061                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1062                                                            NULL, error);
1063                         if (ret < 0)
1064                                 return ret;
1065                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1066                                              MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1067                         if (items->mask != NULL &&
1068                             ((const struct rte_flow_item_ipv4 *)
1069                              items->mask)->hdr.next_proto_id) {
1070                                 next_protocol =
1071                                         ((const struct rte_flow_item_ipv4 *)
1072                                          (items->spec))->hdr.next_proto_id;
1073                                 next_protocol &=
1074                                         ((const struct rte_flow_item_ipv4 *)
1075                                          (items->mask))->hdr.next_proto_id;
1076                         } else {
1077                                 /* Reset for inner layer. */
1078                                 next_protocol = 0xff;
1079                         }
1080                         break;
1081                 case RTE_FLOW_ITEM_TYPE_IPV6:
1082                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1083                                                            NULL, error);
1084                         if (ret < 0)
1085                                 return ret;
1086                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1087                                              MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1088                         if (items->mask != NULL &&
1089                             ((const struct rte_flow_item_ipv6 *)
1090                              items->mask)->hdr.proto) {
1091                                 next_protocol =
1092                                         ((const struct rte_flow_item_ipv6 *)
1093                                          items->spec)->hdr.proto;
1094                                 next_protocol &=
1095                                         ((const struct rte_flow_item_ipv6 *)
1096                                          items->mask)->hdr.proto;
1097                         } else {
1098                                 /* Reset for inner layer. */
1099                                 next_protocol = 0xff;
1100                         }
1101                         break;
1102                 case RTE_FLOW_ITEM_TYPE_UDP:
1103                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1104                                                           next_protocol,
1105                                                           error);
1106                         if (ret < 0)
1107                                 return ret;
1108                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1109                                              MLX5_FLOW_LAYER_OUTER_L4_UDP;
1110                         break;
1111                 case RTE_FLOW_ITEM_TYPE_TCP:
1112                         ret = mlx5_flow_validate_item_tcp
1113                                                 (items, item_flags,
1114                                                  next_protocol,
1115                                                  &rte_flow_item_tcp_mask,
1116                                                  error);
1117                         if (ret < 0)
1118                                 return ret;
1119                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1120                                              MLX5_FLOW_LAYER_OUTER_L4_TCP;
1121                         break;
1122                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1123                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1124                                                             error);
1125                         if (ret < 0)
1126                                 return ret;
1127                         last_item = MLX5_FLOW_LAYER_VXLAN;
1128                         break;
1129                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1130                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
1131                                                                 item_flags,
1132                                                                 dev, error);
1133                         if (ret < 0)
1134                                 return ret;
1135                         last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1136                         break;
1137                 case RTE_FLOW_ITEM_TYPE_GRE:
1138                         ret = mlx5_flow_validate_item_gre(items, item_flags,
1139                                                           next_protocol, error);
1140                         if (ret < 0)
1141                                 return ret;
1142                         last_item = MLX5_FLOW_LAYER_GRE;
1143                         break;
1144                 case RTE_FLOW_ITEM_TYPE_MPLS:
1145                         ret = mlx5_flow_validate_item_mpls(dev, items,
1146                                                            item_flags,
1147                                                            last_item, error);
1148                         if (ret < 0)
1149                                 return ret;
1150                         last_item = MLX5_FLOW_LAYER_MPLS;
1151                         break;
1152                 default:
1153                         return rte_flow_error_set(error, ENOTSUP,
1154                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1155                                                   NULL, "item not supported");
1156                 }
1157                 item_flags |= last_item;
1158         }
1159         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1160                 switch (actions->type) {
1161                 case RTE_FLOW_ACTION_TYPE_VOID:
1162                         break;
1163                 case RTE_FLOW_ACTION_TYPE_FLAG:
1164                         ret = mlx5_flow_validate_action_flag(action_flags,
1165                                                              attr,
1166                                                              error);
1167                         if (ret < 0)
1168                                 return ret;
1169                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1170                         break;
1171                 case RTE_FLOW_ACTION_TYPE_MARK:
1172                         ret = mlx5_flow_validate_action_mark(actions,
1173                                                              action_flags,
1174                                                              attr,
1175                                                              error);
1176                         if (ret < 0)
1177                                 return ret;
1178                         action_flags |= MLX5_FLOW_ACTION_MARK;
1179                         break;
1180                 case RTE_FLOW_ACTION_TYPE_DROP:
1181                         ret = mlx5_flow_validate_action_drop(action_flags,
1182                                                              attr,
1183                                                              error);
1184                         if (ret < 0)
1185                                 return ret;
1186                         action_flags |= MLX5_FLOW_ACTION_DROP;
1187                         break;
1188                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1189                         ret = mlx5_flow_validate_action_queue(actions,
1190                                                               action_flags, dev,
1191                                                               attr,
1192                                                               error);
1193                         if (ret < 0)
1194                                 return ret;
1195                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1196                         break;
1197                 case RTE_FLOW_ACTION_TYPE_RSS:
1198                         ret = mlx5_flow_validate_action_rss(actions,
1199                                                             action_flags, dev,
1200                                                             attr, item_flags,
1201                                                             error);
1202                         if (ret < 0)
1203                                 return ret;
1204                         action_flags |= MLX5_FLOW_ACTION_RSS;
1205                         break;
1206                 case RTE_FLOW_ACTION_TYPE_COUNT:
1207                         ret = mlx5_flow_validate_action_count(dev, attr, error);
1208                         if (ret < 0)
1209                                 return ret;
1210                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1211                         break;
1212                 default:
1213                         return rte_flow_error_set(error, ENOTSUP,
1214                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1215                                                   actions,
1216                                                   "action not supported");
1217                 }
1218         }
1219         if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1220                 return rte_flow_error_set(error, EINVAL,
1221                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1222                                           "no fate action is found");
1223         return 0;
1224 }
1225
1226 /**
1227  * Calculate the required bytes that are needed for the action part of the verbs
1228  * flow.
1229  *
1230  * @param[in] actions
1231  *   Pointer to the list of actions.
1232  *
1233  * @return
1234  *   The size of the memory needed for all actions.
1235  */
1236 static int
1237 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1238 {
1239         int size = 0;
1240
1241         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1242                 switch (actions->type) {
1243                 case RTE_FLOW_ACTION_TYPE_VOID:
1244                         break;
1245                 case RTE_FLOW_ACTION_TYPE_FLAG:
1246                         size += sizeof(struct ibv_flow_spec_action_tag);
1247                         break;
1248                 case RTE_FLOW_ACTION_TYPE_MARK:
1249                         size += sizeof(struct ibv_flow_spec_action_tag);
1250                         break;
1251                 case RTE_FLOW_ACTION_TYPE_DROP:
1252                         size += sizeof(struct ibv_flow_spec_action_drop);
1253                         break;
1254                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1255                         break;
1256                 case RTE_FLOW_ACTION_TYPE_RSS:
1257                         break;
1258                 case RTE_FLOW_ACTION_TYPE_COUNT:
1259 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1260         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1261                         size += sizeof(struct ibv_flow_spec_counter_action);
1262 #endif
1263                         break;
1264                 default:
1265                         break;
1266                 }
1267         }
1268         return size;
1269 }
1270
1271 /**
1272  * Calculate the required bytes that are needed for the item part of the verbs
1273  * flow.
1274  *
1275  * @param[in] items
1276  *   Pointer to the list of items.
1277  *
1278  * @return
1279  *   The size of the memory needed for all items.
1280  */
1281 static int
1282 flow_verbs_get_items_size(const struct rte_flow_item items[])
1283 {
1284         int size = 0;
1285
1286         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1287                 switch (items->type) {
1288                 case RTE_FLOW_ITEM_TYPE_VOID:
1289                         break;
1290                 case RTE_FLOW_ITEM_TYPE_ETH:
1291                         size += sizeof(struct ibv_flow_spec_eth);
1292                         break;
1293                 case RTE_FLOW_ITEM_TYPE_VLAN:
1294                         size += sizeof(struct ibv_flow_spec_eth);
1295                         break;
1296                 case RTE_FLOW_ITEM_TYPE_IPV4:
1297                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
1298                         break;
1299                 case RTE_FLOW_ITEM_TYPE_IPV6:
1300                         size += sizeof(struct ibv_flow_spec_ipv6);
1301                         break;
1302                 case RTE_FLOW_ITEM_TYPE_UDP:
1303                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1304                         break;
1305                 case RTE_FLOW_ITEM_TYPE_TCP:
1306                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1307                         break;
1308                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1309                         size += sizeof(struct ibv_flow_spec_tunnel);
1310                         break;
1311                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1312                         size += sizeof(struct ibv_flow_spec_tunnel);
1313                         break;
1314 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1315                 case RTE_FLOW_ITEM_TYPE_GRE:
1316                         size += sizeof(struct ibv_flow_spec_gre);
1317                         break;
1318                 case RTE_FLOW_ITEM_TYPE_MPLS:
1319                         size += sizeof(struct ibv_flow_spec_mpls);
1320                         break;
1321 #else
1322                 case RTE_FLOW_ITEM_TYPE_GRE:
1323                         size += sizeof(struct ibv_flow_spec_tunnel);
1324                         break;
1325 #endif
1326                 default:
1327                         break;
1328                 }
1329         }
1330         return size;
1331 }
1332
1333 /**
1334  * Internal preparation function. Allocate mlx5_flow with the required size.
1335  * The required size is calculate based on the actions and items. This function
1336  * also returns the detected actions and items for later use.
1337  *
1338  * @param[in] attr
1339  *   Pointer to the flow attributes.
1340  * @param[in] items
1341  *   Pointer to the list of items.
1342  * @param[in] actions
1343  *   Pointer to the list of actions.
1344  * @param[out] error
1345  *   Pointer to the error structure.
1346  *
1347  * @return
1348  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1349  *   is set.
1350  */
1351 static struct mlx5_flow *
1352 flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
1353                    const struct rte_flow_item items[],
1354                    const struct rte_flow_action actions[],
1355                    struct rte_flow_error *error)
1356 {
1357         uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
1358         struct mlx5_flow *flow;
1359
1360         size += flow_verbs_get_actions_size(actions);
1361         size += flow_verbs_get_items_size(items);
1362         flow = rte_calloc(__func__, 1, size, 0);
1363         if (!flow) {
1364                 rte_flow_error_set(error, ENOMEM,
1365                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1366                                    "not enough memory to create flow");
1367                 return NULL;
1368         }
1369         flow->verbs.attr = (void *)(flow + 1);
1370         flow->verbs.specs =
1371                 (uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
1372         return flow;
1373 }
1374
1375 /**
1376  * Fill the flow with verb spec.
1377  *
1378  * @param[in] dev
1379  *   Pointer to Ethernet device.
1380  * @param[in, out] dev_flow
1381  *   Pointer to the mlx5 flow.
1382  * @param[in] attr
1383  *   Pointer to the flow attributes.
1384  * @param[in] items
1385  *   Pointer to the list of items.
1386  * @param[in] actions
1387  *   Pointer to the list of actions.
1388  * @param[out] error
1389  *   Pointer to the error structure.
1390  *
1391  * @return
1392  *   0 on success, else a negative errno value otherwise and rte_errno is set.
1393  */
1394 static int
1395 flow_verbs_translate(struct rte_eth_dev *dev,
1396                      struct mlx5_flow *dev_flow,
1397                      const struct rte_flow_attr *attr,
1398                      const struct rte_flow_item items[],
1399                      const struct rte_flow_action actions[],
1400                      struct rte_flow_error *error)
1401 {
1402         struct rte_flow *flow = dev_flow->flow;
1403         uint64_t item_flags = 0;
1404         uint64_t action_flags = 0;
1405         uint64_t priority = attr->priority;
1406         uint32_t subpriority = 0;
1407         struct mlx5_priv *priv = dev->data->dev_private;
1408
1409         if (priority == MLX5_FLOW_PRIO_RSVD)
1410                 priority = priv->config.flow_prio - 1;
1411         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1412                 int ret;
1413
1414                 switch (actions->type) {
1415                 case RTE_FLOW_ACTION_TYPE_VOID:
1416                         break;
1417                 case RTE_FLOW_ACTION_TYPE_FLAG:
1418                         flow_verbs_translate_action_flag(dev_flow, actions);
1419                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1420                         break;
1421                 case RTE_FLOW_ACTION_TYPE_MARK:
1422                         flow_verbs_translate_action_mark(dev_flow, actions);
1423                         action_flags |= MLX5_FLOW_ACTION_MARK;
1424                         break;
1425                 case RTE_FLOW_ACTION_TYPE_DROP:
1426                         flow_verbs_translate_action_drop(dev_flow, actions);
1427                         action_flags |= MLX5_FLOW_ACTION_DROP;
1428                         break;
1429                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1430                         flow_verbs_translate_action_queue(dev_flow, actions);
1431                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1432                         break;
1433                 case RTE_FLOW_ACTION_TYPE_RSS:
1434                         flow_verbs_translate_action_rss(dev_flow, actions);
1435                         action_flags |= MLX5_FLOW_ACTION_RSS;
1436                         break;
1437                 case RTE_FLOW_ACTION_TYPE_COUNT:
1438                         ret = flow_verbs_translate_action_count(dev_flow,
1439                                                                 actions,
1440                                                                 dev, error);
1441                         if (ret < 0)
1442                                 return ret;
1443                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1444                         break;
1445                 default:
1446                         return rte_flow_error_set(error, ENOTSUP,
1447                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1448                                                   actions,
1449                                                   "action not supported");
1450                 }
1451         }
1452         flow->actions = action_flags;
1453         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1454                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1455
1456                 switch (items->type) {
1457                 case RTE_FLOW_ITEM_TYPE_VOID:
1458                         break;
1459                 case RTE_FLOW_ITEM_TYPE_ETH:
1460                         flow_verbs_translate_item_eth(dev_flow, items,
1461                                                       item_flags);
1462                         subpriority = MLX5_PRIORITY_MAP_L2;
1463                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1464                                                MLX5_FLOW_LAYER_OUTER_L2;
1465                         break;
1466                 case RTE_FLOW_ITEM_TYPE_VLAN:
1467                         flow_verbs_translate_item_vlan(dev_flow, items,
1468                                                        item_flags);
1469                         subpriority = MLX5_PRIORITY_MAP_L2;
1470                         item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1471                                                 MLX5_FLOW_LAYER_INNER_VLAN) :
1472                                                (MLX5_FLOW_LAYER_OUTER_L2 |
1473                                                 MLX5_FLOW_LAYER_OUTER_VLAN);
1474                         break;
1475                 case RTE_FLOW_ITEM_TYPE_IPV4:
1476                         flow_verbs_translate_item_ipv4(dev_flow, items,
1477                                                        item_flags);
1478                         subpriority = MLX5_PRIORITY_MAP_L3;
1479                         dev_flow->verbs.hash_fields |=
1480                                 mlx5_flow_hashfields_adjust
1481                                         (dev_flow, tunnel,
1482                                          MLX5_IPV4_LAYER_TYPES,
1483                                          MLX5_IPV4_IBV_RX_HASH);
1484                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1485                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1486                         break;
1487                 case RTE_FLOW_ITEM_TYPE_IPV6:
1488                         flow_verbs_translate_item_ipv6(dev_flow, items,
1489                                                        item_flags);
1490                         subpriority = MLX5_PRIORITY_MAP_L3;
1491                         dev_flow->verbs.hash_fields |=
1492                                 mlx5_flow_hashfields_adjust
1493                                         (dev_flow, tunnel,
1494                                          MLX5_IPV6_LAYER_TYPES,
1495                                          MLX5_IPV6_IBV_RX_HASH);
1496                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1497                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1498                         break;
1499                 case RTE_FLOW_ITEM_TYPE_TCP:
1500                         flow_verbs_translate_item_tcp(dev_flow, items,
1501                                                       item_flags);
1502                         subpriority = MLX5_PRIORITY_MAP_L4;
1503                         dev_flow->verbs.hash_fields |=
1504                                 mlx5_flow_hashfields_adjust
1505                                         (dev_flow, tunnel, ETH_RSS_TCP,
1506                                          (IBV_RX_HASH_SRC_PORT_TCP |
1507                                           IBV_RX_HASH_DST_PORT_TCP));
1508                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1509                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1510                         break;
1511                 case RTE_FLOW_ITEM_TYPE_UDP:
1512                         flow_verbs_translate_item_udp(dev_flow, items,
1513                                                       item_flags);
1514                         subpriority = MLX5_PRIORITY_MAP_L4;
1515                         dev_flow->verbs.hash_fields |=
1516                                 mlx5_flow_hashfields_adjust
1517                                         (dev_flow, tunnel, ETH_RSS_UDP,
1518                                          (IBV_RX_HASH_SRC_PORT_UDP |
1519                                           IBV_RX_HASH_DST_PORT_UDP));
1520                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1521                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1522                         break;
1523                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1524                         flow_verbs_translate_item_vxlan(dev_flow, items,
1525                                                         item_flags);
1526                         subpriority = MLX5_PRIORITY_MAP_L2;
1527                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1528                         break;
1529                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1530                         flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1531                                                             item_flags);
1532                         subpriority = MLX5_PRIORITY_MAP_L2;
1533                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1534                         break;
1535                 case RTE_FLOW_ITEM_TYPE_GRE:
1536                         flow_verbs_translate_item_gre(dev_flow, items,
1537                                                       item_flags);
1538                         subpriority = MLX5_PRIORITY_MAP_L2;
1539                         item_flags |= MLX5_FLOW_LAYER_GRE;
1540                         break;
1541                 case RTE_FLOW_ITEM_TYPE_MPLS:
1542                         flow_verbs_translate_item_mpls(dev_flow, items,
1543                                                        item_flags);
1544                         subpriority = MLX5_PRIORITY_MAP_L2;
1545                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1546                         break;
1547                 default:
1548                         return rte_flow_error_set(error, ENOTSUP,
1549                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1550                                                   NULL,
1551                                                   "item not supported");
1552                 }
1553         }
1554         dev_flow->layers = item_flags;
1555         dev_flow->verbs.attr->priority =
1556                 mlx5_flow_adjust_priority(dev, priority, subpriority);
1557         dev_flow->verbs.attr->port = (uint8_t)priv->ibv_port;
1558         return 0;
1559 }
1560
1561 /**
1562  * Remove the flow from the NIC but keeps it in memory.
1563  *
1564  * @param[in] dev
1565  *   Pointer to the Ethernet device structure.
1566  * @param[in, out] flow
1567  *   Pointer to flow structure.
1568  */
1569 static void
1570 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1571 {
1572         struct mlx5_flow_verbs *verbs;
1573         struct mlx5_flow *dev_flow;
1574
1575         if (!flow)
1576                 return;
1577         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1578                 verbs = &dev_flow->verbs;
1579                 if (verbs->flow) {
1580                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1581                         verbs->flow = NULL;
1582                 }
1583                 if (verbs->hrxq) {
1584                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1585                                 mlx5_hrxq_drop_release(dev);
1586                         else
1587                                 mlx5_hrxq_release(dev, verbs->hrxq);
1588                         verbs->hrxq = NULL;
1589                 }
1590         }
1591 }
1592
1593 /**
1594  * Remove the flow from the NIC and the memory.
1595  *
1596  * @param[in] dev
1597  *   Pointer to the Ethernet device structure.
1598  * @param[in, out] flow
1599  *   Pointer to flow structure.
1600  */
1601 static void
1602 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1603 {
1604         struct mlx5_flow *dev_flow;
1605
1606         if (!flow)
1607                 return;
1608         flow_verbs_remove(dev, flow);
1609         while (!LIST_EMPTY(&flow->dev_flows)) {
1610                 dev_flow = LIST_FIRST(&flow->dev_flows);
1611                 LIST_REMOVE(dev_flow, next);
1612                 rte_free(dev_flow);
1613         }
1614         if (flow->counter) {
1615                 flow_verbs_counter_release(flow->counter);
1616                 flow->counter = NULL;
1617         }
1618 }
1619
1620 /**
1621  * Apply the flow to the NIC.
1622  *
1623  * @param[in] dev
1624  *   Pointer to the Ethernet device structure.
1625  * @param[in, out] flow
1626  *   Pointer to flow structure.
1627  * @param[out] error
1628  *   Pointer to error structure.
1629  *
1630  * @return
1631  *   0 on success, a negative errno value otherwise and rte_errno is set.
1632  */
1633 static int
1634 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1635                  struct rte_flow_error *error)
1636 {
1637         struct mlx5_flow_verbs *verbs;
1638         struct mlx5_flow *dev_flow;
1639         int err;
1640
1641         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1642                 verbs = &dev_flow->verbs;
1643                 if (flow->actions & MLX5_FLOW_ACTION_DROP) {
1644                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
1645                         if (!verbs->hrxq) {
1646                                 rte_flow_error_set
1647                                         (error, errno,
1648                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1649                                          "cannot get drop hash queue");
1650                                 goto error;
1651                         }
1652                 } else {
1653                         struct mlx5_hrxq *hrxq;
1654
1655                         hrxq = mlx5_hrxq_get(dev, flow->key,
1656                                              MLX5_RSS_HASH_KEY_LEN,
1657                                              verbs->hash_fields,
1658                                              (*flow->queue),
1659                                              flow->rss.queue_num);
1660                         if (!hrxq)
1661                                 hrxq = mlx5_hrxq_new(dev, flow->key,
1662                                                      MLX5_RSS_HASH_KEY_LEN,
1663                                                      verbs->hash_fields,
1664                                                      (*flow->queue),
1665                                                      flow->rss.queue_num,
1666                                                      !!(dev_flow->layers &
1667                                                       MLX5_FLOW_LAYER_TUNNEL));
1668                         if (!hrxq) {
1669                                 rte_flow_error_set
1670                                         (error, rte_errno,
1671                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1672                                          "cannot get hash queue");
1673                                 goto error;
1674                         }
1675                         verbs->hrxq = hrxq;
1676                 }
1677                 verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
1678                                                      verbs->attr);
1679                 if (!verbs->flow) {
1680                         rte_flow_error_set(error, errno,
1681                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1682                                            NULL,
1683                                            "hardware refuses to create flow");
1684                         goto error;
1685                 }
1686         }
1687         return 0;
1688 error:
1689         err = rte_errno; /* Save rte_errno before cleanup. */
1690         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1691                 verbs = &dev_flow->verbs;
1692                 if (verbs->hrxq) {
1693                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1694                                 mlx5_hrxq_drop_release(dev);
1695                         else
1696                                 mlx5_hrxq_release(dev, verbs->hrxq);
1697                         verbs->hrxq = NULL;
1698                 }
1699         }
1700         rte_errno = err; /* Restore rte_errno. */
1701         return -rte_errno;
1702 }
1703
1704 /**
1705  * Query a flow.
1706  *
1707  * @see rte_flow_query()
1708  * @see rte_flow_ops
1709  */
1710 static int
1711 flow_verbs_query(struct rte_eth_dev *dev,
1712                  struct rte_flow *flow,
1713                  const struct rte_flow_action *actions,
1714                  void *data,
1715                  struct rte_flow_error *error)
1716 {
1717         int ret = -EINVAL;
1718
1719         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1720                 switch (actions->type) {
1721                 case RTE_FLOW_ACTION_TYPE_VOID:
1722                         break;
1723                 case RTE_FLOW_ACTION_TYPE_COUNT:
1724                         ret = flow_verbs_counter_query(dev, flow, data, error);
1725                         break;
1726                 default:
1727                         return rte_flow_error_set(error, ENOTSUP,
1728                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1729                                                   actions,
1730                                                   "action not supported");
1731                 }
1732         }
1733         return ret;
1734 }
1735
1736 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1737         .validate = flow_verbs_validate,
1738         .prepare = flow_verbs_prepare,
1739         .translate = flow_verbs_translate,
1740         .apply = flow_verbs_apply,
1741         .remove = flow_verbs_remove,
1742         .destroy = flow_verbs_destroy,
1743         .query = flow_verbs_query,
1744 };