net/mlx5: switch to the shared IB device context
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_eth_ctrl.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_prm.h"
33 #include "mlx5_glue.h"
34 #include "mlx5_flow.h"
35
36 #define VERBS_SPEC_INNER(item_flags) \
37         (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
38
39 /**
40  * Create Verbs flow counter with Verbs library.
41  *
42  * @param[in] dev
43  *   Pointer to the Ethernet device structure.
44  * @param[in, out] counter
45  *   mlx5 flow counter object, contains the counter id,
46  *   handle of created Verbs flow counter is returned
47  *   in cs field (if counters are supported).
48  *
49  * @return
50  *   0 On success else a negative errno value is returned
51  *   and rte_errno is set.
52  */
53 static int
54 flow_verbs_counter_create(struct rte_eth_dev *dev,
55                           struct mlx5_flow_counter *counter)
56 {
57 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
58         struct mlx5_priv *priv = dev->data->dev_private;
59         struct ibv_context *ctx = priv->sh->ctx;
60         struct ibv_counter_set_init_attr init = {
61                          .counter_set_id = counter->id};
62
63         counter->cs = mlx5_glue->create_counter_set(ctx, &init);
64         if (!counter->cs) {
65                 rte_errno = ENOTSUP;
66                 return -ENOTSUP;
67         }
68         return 0;
69 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
70         struct mlx5_priv *priv = dev->data->dev_private;
71         struct ibv_context *ctx = priv->sh->ctx;
72         struct ibv_counters_init_attr init = {0};
73         struct ibv_counter_attach_attr attach;
74         int ret;
75
76         memset(&attach, 0, sizeof(attach));
77         counter->cs = mlx5_glue->create_counters(ctx, &init);
78         if (!counter->cs) {
79                 rte_errno = ENOTSUP;
80                 return -ENOTSUP;
81         }
82         attach.counter_desc = IBV_COUNTER_PACKETS;
83         attach.index = 0;
84         ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
85         if (!ret) {
86                 attach.counter_desc = IBV_COUNTER_BYTES;
87                 attach.index = 1;
88                 ret = mlx5_glue->attach_counters
89                                         (counter->cs, &attach, NULL);
90         }
91         if (ret) {
92                 claim_zero(mlx5_glue->destroy_counters(counter->cs));
93                 counter->cs = NULL;
94                 rte_errno = ret;
95                 return -ret;
96         }
97         return 0;
98 #else
99         (void)dev;
100         (void)counter;
101         rte_errno = ENOTSUP;
102         return -ENOTSUP;
103 #endif
104 }
105
106 /**
107  * Get a flow counter.
108  *
109  * @param[in] dev
110  *   Pointer to the Ethernet device structure.
111  * @param[in] shared
112  *   Indicate if this counter is shared with other flows.
113  * @param[in] id
114  *   Counter identifier.
115  *
116  * @return
117  *   A pointer to the counter, NULL otherwise and rte_errno is set.
118  */
119 static struct mlx5_flow_counter *
120 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
121 {
122         struct mlx5_priv *priv = dev->data->dev_private;
123         struct mlx5_flow_counter *cnt;
124         int ret;
125
126         if (shared) {
127                 LIST_FOREACH(cnt, &priv->flow_counters, next) {
128                         if (cnt->shared && cnt->id == id) {
129                                 cnt->ref_cnt++;
130                                 return cnt;
131                         }
132                 }
133         }
134         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
135         if (!cnt) {
136                 rte_errno = ENOMEM;
137                 return NULL;
138         }
139         cnt->id = id;
140         cnt->shared = shared;
141         cnt->ref_cnt = 1;
142         cnt->hits = 0;
143         cnt->bytes = 0;
144         /* Create counter with Verbs. */
145         ret = flow_verbs_counter_create(dev, cnt);
146         if (!ret) {
147                 LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
148                 return cnt;
149         }
150         /* Some error occurred in Verbs library. */
151         rte_free(cnt);
152         rte_errno = -ret;
153         return NULL;
154 }
155
156 /**
157  * Release a flow counter.
158  *
159  * @param[in] counter
160  *   Pointer to the counter handler.
161  */
162 static void
163 flow_verbs_counter_release(struct mlx5_flow_counter *counter)
164 {
165         if (--counter->ref_cnt == 0) {
166 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
167                 claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
168 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
169                 claim_zero(mlx5_glue->destroy_counters(counter->cs));
170 #endif
171                 LIST_REMOVE(counter, next);
172                 rte_free(counter);
173         }
174 }
175
176 /**
177  * Query a flow counter via Verbs library call.
178  *
179  * @see rte_flow_query()
180  * @see rte_flow_ops
181  */
182 static int
183 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
184                          struct rte_flow *flow, void *data,
185                          struct rte_flow_error *error)
186 {
187 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
188         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
189         if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
190                 struct rte_flow_query_count *qc = data;
191                 uint64_t counters[2] = {0, 0};
192 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
193                 struct ibv_query_counter_set_attr query_cs_attr = {
194                         .cs = flow->counter->cs,
195                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
196                 };
197                 struct ibv_counter_set_data query_out = {
198                         .out = counters,
199                         .outlen = 2 * sizeof(uint64_t),
200                 };
201                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
202                                                        &query_out);
203 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
204                 int err = mlx5_glue->query_counters
205                                (flow->counter->cs, counters,
206                                 RTE_DIM(counters),
207                                 IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
208 #endif
209                 if (err)
210                         return rte_flow_error_set
211                                 (error, err,
212                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
213                                  NULL,
214                                  "cannot read counter");
215                 qc->hits_set = 1;
216                 qc->bytes_set = 1;
217                 qc->hits = counters[0] - flow->counter->hits;
218                 qc->bytes = counters[1] - flow->counter->bytes;
219                 if (qc->reset) {
220                         flow->counter->hits = counters[0];
221                         flow->counter->bytes = counters[1];
222                 }
223                 return 0;
224         }
225         return rte_flow_error_set(error, EINVAL,
226                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
227                                   NULL,
228                                   "flow does not have counter");
229 #else
230         (void)flow;
231         (void)data;
232         return rte_flow_error_set(error, ENOTSUP,
233                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
234                                   NULL,
235                                   "counters are not available");
236 #endif
237 }
238
239 /**
240  * Add a verbs item specification into @p verbs.
241  *
242  * @param[out] verbs
243  *   Pointer to verbs structure.
244  * @param[in] src
245  *   Create specification.
246  * @param[in] size
247  *   Size in bytes of the specification to copy.
248  */
249 static void
250 flow_verbs_spec_add(struct mlx5_flow_verbs *verbs, void *src, unsigned int size)
251 {
252         void *dst;
253
254         if (!verbs)
255                 return;
256         assert(verbs->specs);
257         dst = (void *)(verbs->specs + verbs->size);
258         memcpy(dst, src, size);
259         ++verbs->attr->num_of_specs;
260         verbs->size += size;
261 }
262
263 /**
264  * Convert the @p item into a Verbs specification. This function assumes that
265  * the input is valid and that there is space to insert the requested item
266  * into the flow.
267  *
268  * @param[in, out] dev_flow
269  *   Pointer to dev_flow structure.
270  * @param[in] item
271  *   Item specification.
272  * @param[in] item_flags
273  *   Parsed item flags.
274  */
275 static void
276 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
277                               const struct rte_flow_item *item,
278                               uint64_t item_flags)
279 {
280         const struct rte_flow_item_eth *spec = item->spec;
281         const struct rte_flow_item_eth *mask = item->mask;
282         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
283         struct ibv_flow_spec_eth eth = {
284                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
285                 .size = size,
286         };
287
288         if (!mask)
289                 mask = &rte_flow_item_eth_mask;
290         if (spec) {
291                 unsigned int i;
292
293                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
294                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
295                 eth.val.ether_type = spec->type;
296                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
297                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
298                 eth.mask.ether_type = mask->type;
299                 /* Remove unwanted bits from values. */
300                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
301                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
302                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
303                 }
304                 eth.val.ether_type &= eth.mask.ether_type;
305         }
306         flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
307 }
308
309 /**
310  * Update the VLAN tag in the Verbs Ethernet specification.
311  * This function assumes that the input is valid and there is space to add
312  * the requested item.
313  *
314  * @param[in, out] attr
315  *   Pointer to Verbs attributes structure.
316  * @param[in] eth
317  *   Verbs structure containing the VLAN information to copy.
318  */
319 static void
320 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
321                             struct ibv_flow_spec_eth *eth)
322 {
323         unsigned int i;
324         const enum ibv_flow_spec_type search = eth->type;
325         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
326                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
327
328         for (i = 0; i != attr->num_of_specs; ++i) {
329                 if (hdr->type == search) {
330                         struct ibv_flow_spec_eth *e =
331                                 (struct ibv_flow_spec_eth *)hdr;
332
333                         e->val.vlan_tag = eth->val.vlan_tag;
334                         e->mask.vlan_tag = eth->mask.vlan_tag;
335                         e->val.ether_type = eth->val.ether_type;
336                         e->mask.ether_type = eth->mask.ether_type;
337                         break;
338                 }
339                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
340         }
341 }
342
343 /**
344  * Convert the @p item into a Verbs specification. This function assumes that
345  * the input is valid and that there is space to insert the requested item
346  * into the flow.
347  *
348  * @param[in, out] dev_flow
349  *   Pointer to dev_flow structure.
350  * @param[in] item
351  *   Item specification.
352  * @param[in] item_flags
353  *   Parsed item flags.
354  */
355 static void
356 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
357                                const struct rte_flow_item *item,
358                                uint64_t item_flags)
359 {
360         const struct rte_flow_item_vlan *spec = item->spec;
361         const struct rte_flow_item_vlan *mask = item->mask;
362         unsigned int size = sizeof(struct ibv_flow_spec_eth);
363         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
364         struct ibv_flow_spec_eth eth = {
365                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
366                 .size = size,
367         };
368         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
369                                       MLX5_FLOW_LAYER_OUTER_L2;
370
371         if (!mask)
372                 mask = &rte_flow_item_vlan_mask;
373         if (spec) {
374                 eth.val.vlan_tag = spec->tci;
375                 eth.mask.vlan_tag = mask->tci;
376                 eth.val.vlan_tag &= eth.mask.vlan_tag;
377                 eth.val.ether_type = spec->inner_type;
378                 eth.mask.ether_type = mask->inner_type;
379                 eth.val.ether_type &= eth.mask.ether_type;
380         }
381         if (!(item_flags & l2m))
382                 flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
383         else
384                 flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
385 }
386
387 /**
388  * Convert the @p item into a Verbs specification. This function assumes that
389  * the input is valid and that there is space to insert the requested item
390  * into the flow.
391  *
392  * @param[in, out] dev_flow
393  *   Pointer to dev_flow structure.
394  * @param[in] item
395  *   Item specification.
396  * @param[in] item_flags
397  *   Parsed item flags.
398  */
399 static void
400 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
401                                const struct rte_flow_item *item,
402                                uint64_t item_flags)
403 {
404         const struct rte_flow_item_ipv4 *spec = item->spec;
405         const struct rte_flow_item_ipv4 *mask = item->mask;
406         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
407         struct ibv_flow_spec_ipv4_ext ipv4 = {
408                 .type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
409                 .size = size,
410         };
411
412         if (!mask)
413                 mask = &rte_flow_item_ipv4_mask;
414         if (spec) {
415                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
416                         .src_ip = spec->hdr.src_addr,
417                         .dst_ip = spec->hdr.dst_addr,
418                         .proto = spec->hdr.next_proto_id,
419                         .tos = spec->hdr.type_of_service,
420                 };
421                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
422                         .src_ip = mask->hdr.src_addr,
423                         .dst_ip = mask->hdr.dst_addr,
424                         .proto = mask->hdr.next_proto_id,
425                         .tos = mask->hdr.type_of_service,
426                 };
427                 /* Remove unwanted bits from values. */
428                 ipv4.val.src_ip &= ipv4.mask.src_ip;
429                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
430                 ipv4.val.proto &= ipv4.mask.proto;
431                 ipv4.val.tos &= ipv4.mask.tos;
432         }
433         flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
434 }
435
436 /**
437  * Convert the @p item into a Verbs specification. This function assumes that
438  * the input is valid and that there is space to insert the requested item
439  * into the flow.
440  *
441  * @param[in, out] dev_flow
442  *   Pointer to dev_flow structure.
443  * @param[in] item
444  *   Item specification.
445  * @param[in] item_flags
446  *   Parsed item flags.
447  */
448 static void
449 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
450                                const struct rte_flow_item *item,
451                                uint64_t item_flags)
452 {
453         const struct rte_flow_item_ipv6 *spec = item->spec;
454         const struct rte_flow_item_ipv6 *mask = item->mask;
455         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
456         struct ibv_flow_spec_ipv6 ipv6 = {
457                 .type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
458                 .size = size,
459         };
460
461         if (!mask)
462                 mask = &rte_flow_item_ipv6_mask;
463         if (spec) {
464                 unsigned int i;
465                 uint32_t vtc_flow_val;
466                 uint32_t vtc_flow_mask;
467
468                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
469                        RTE_DIM(ipv6.val.src_ip));
470                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
471                        RTE_DIM(ipv6.val.dst_ip));
472                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
473                        RTE_DIM(ipv6.mask.src_ip));
474                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
475                        RTE_DIM(ipv6.mask.dst_ip));
476                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
477                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
478                 ipv6.val.flow_label =
479                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
480                                          IPV6_HDR_FL_SHIFT);
481                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
482                                          IPV6_HDR_TC_SHIFT;
483                 ipv6.val.next_hdr = spec->hdr.proto;
484                 ipv6.val.hop_limit = spec->hdr.hop_limits;
485                 ipv6.mask.flow_label =
486                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
487                                          IPV6_HDR_FL_SHIFT);
488                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
489                                           IPV6_HDR_TC_SHIFT;
490                 ipv6.mask.next_hdr = mask->hdr.proto;
491                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
492                 /* Remove unwanted bits from values. */
493                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
494                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
495                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
496                 }
497                 ipv6.val.flow_label &= ipv6.mask.flow_label;
498                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
499                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
500                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
501         }
502         flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
503 }
504
505 /**
506  * Convert the @p item into a Verbs specification. This function assumes that
507  * the input is valid and that there is space to insert the requested item
508  * into the flow.
509  *
510  * @param[in, out] dev_flow
511  *   Pointer to dev_flow structure.
512  * @param[in] item
513  *   Item specification.
514  * @param[in] item_flags
515  *   Parsed item flags.
516  */
517 static void
518 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
519                               const struct rte_flow_item *item,
520                               uint64_t item_flags __rte_unused)
521 {
522         const struct rte_flow_item_tcp *spec = item->spec;
523         const struct rte_flow_item_tcp *mask = item->mask;
524         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
525         struct ibv_flow_spec_tcp_udp tcp = {
526                 .type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
527                 .size = size,
528         };
529
530         if (!mask)
531                 mask = &rte_flow_item_tcp_mask;
532         if (spec) {
533                 tcp.val.dst_port = spec->hdr.dst_port;
534                 tcp.val.src_port = spec->hdr.src_port;
535                 tcp.mask.dst_port = mask->hdr.dst_port;
536                 tcp.mask.src_port = mask->hdr.src_port;
537                 /* Remove unwanted bits from values. */
538                 tcp.val.src_port &= tcp.mask.src_port;
539                 tcp.val.dst_port &= tcp.mask.dst_port;
540         }
541         flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
542 }
543
544 /**
545  * Convert the @p item into a Verbs specification. This function assumes that
546  * the input is valid and that there is space to insert the requested item
547  * into the flow.
548  *
549  * @param[in, out] dev_flow
550  *   Pointer to dev_flow structure.
551  * @param[in] item
552  *   Item specification.
553  * @param[in] item_flags
554  *   Parsed item flags.
555  */
556 static void
557 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
558                               const struct rte_flow_item *item,
559                               uint64_t item_flags __rte_unused)
560 {
561         const struct rte_flow_item_udp *spec = item->spec;
562         const struct rte_flow_item_udp *mask = item->mask;
563         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
564         struct ibv_flow_spec_tcp_udp udp = {
565                 .type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
566                 .size = size,
567         };
568
569         if (!mask)
570                 mask = &rte_flow_item_udp_mask;
571         if (spec) {
572                 udp.val.dst_port = spec->hdr.dst_port;
573                 udp.val.src_port = spec->hdr.src_port;
574                 udp.mask.dst_port = mask->hdr.dst_port;
575                 udp.mask.src_port = mask->hdr.src_port;
576                 /* Remove unwanted bits from values. */
577                 udp.val.src_port &= udp.mask.src_port;
578                 udp.val.dst_port &= udp.mask.dst_port;
579         }
580         flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
581 }
582
583 /**
584  * Convert the @p item into a Verbs specification. This function assumes that
585  * the input is valid and that there is space to insert the requested item
586  * into the flow.
587  *
588  * @param[in, out] dev_flow
589  *   Pointer to dev_flow structure.
590  * @param[in] item
591  *   Item specification.
592  * @param[in] item_flags
593  *   Parsed item flags.
594  */
595 static void
596 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
597                                 const struct rte_flow_item *item,
598                                 uint64_t item_flags __rte_unused)
599 {
600         const struct rte_flow_item_vxlan *spec = item->spec;
601         const struct rte_flow_item_vxlan *mask = item->mask;
602         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
603         struct ibv_flow_spec_tunnel vxlan = {
604                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
605                 .size = size,
606         };
607         union vni {
608                 uint32_t vlan_id;
609                 uint8_t vni[4];
610         } id = { .vlan_id = 0, };
611
612         if (!mask)
613                 mask = &rte_flow_item_vxlan_mask;
614         if (spec) {
615                 memcpy(&id.vni[1], spec->vni, 3);
616                 vxlan.val.tunnel_id = id.vlan_id;
617                 memcpy(&id.vni[1], mask->vni, 3);
618                 vxlan.mask.tunnel_id = id.vlan_id;
619                 /* Remove unwanted bits from values. */
620                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
621         }
622         flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
623 }
624
625 /**
626  * Convert the @p item into a Verbs specification. This function assumes that
627  * the input is valid and that there is space to insert the requested item
628  * into the flow.
629  *
630  * @param[in, out] dev_flow
631  *   Pointer to dev_flow structure.
632  * @param[in] item
633  *   Item specification.
634  * @param[in] item_flags
635  *   Parsed item flags.
636  */
637 static void
638 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
639                                     const struct rte_flow_item *item,
640                                     uint64_t item_flags __rte_unused)
641 {
642         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
643         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
644         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
645         struct ibv_flow_spec_tunnel vxlan_gpe = {
646                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
647                 .size = size,
648         };
649         union vni {
650                 uint32_t vlan_id;
651                 uint8_t vni[4];
652         } id = { .vlan_id = 0, };
653
654         if (!mask)
655                 mask = &rte_flow_item_vxlan_gpe_mask;
656         if (spec) {
657                 memcpy(&id.vni[1], spec->vni, 3);
658                 vxlan_gpe.val.tunnel_id = id.vlan_id;
659                 memcpy(&id.vni[1], mask->vni, 3);
660                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
661                 /* Remove unwanted bits from values. */
662                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
663         }
664         flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
665 }
666
667 /**
668  * Update the protocol in Verbs IPv4/IPv6 spec.
669  *
670  * @param[in, out] attr
671  *   Pointer to Verbs attributes structure.
672  * @param[in] search
673  *   Specification type to search in order to update the IP protocol.
674  * @param[in] protocol
675  *   Protocol value to set if none is present in the specification.
676  */
677 static void
678 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
679                                        enum ibv_flow_spec_type search,
680                                        uint8_t protocol)
681 {
682         unsigned int i;
683         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
684                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
685
686         if (!attr)
687                 return;
688         for (i = 0; i != attr->num_of_specs; ++i) {
689                 if (hdr->type == search) {
690                         union {
691                                 struct ibv_flow_spec_ipv4_ext *ipv4;
692                                 struct ibv_flow_spec_ipv6 *ipv6;
693                         } ip;
694
695                         switch (search) {
696                         case IBV_FLOW_SPEC_IPV4_EXT:
697                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
698                                 if (!ip.ipv4->val.proto) {
699                                         ip.ipv4->val.proto = protocol;
700                                         ip.ipv4->mask.proto = 0xff;
701                                 }
702                                 break;
703                         case IBV_FLOW_SPEC_IPV6:
704                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
705                                 if (!ip.ipv6->val.next_hdr) {
706                                         ip.ipv6->val.next_hdr = protocol;
707                                         ip.ipv6->mask.next_hdr = 0xff;
708                                 }
709                                 break;
710                         default:
711                                 break;
712                         }
713                         break;
714                 }
715                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
716         }
717 }
718
719 /**
720  * Convert the @p item into a Verbs specification. This function assumes that
721  * the input is valid and that there is space to insert the requested item
722  * into the flow.
723  *
724  * @param[in, out] dev_flow
725  *   Pointer to dev_flow structure.
726  * @param[in] item
727  *   Item specification.
728  * @param[in] item_flags
729  *   Parsed item flags.
730  */
731 static void
732 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
733                               const struct rte_flow_item *item __rte_unused,
734                               uint64_t item_flags)
735 {
736         struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
737 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
738         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
739         struct ibv_flow_spec_tunnel tunnel = {
740                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
741                 .size = size,
742         };
743 #else
744         const struct rte_flow_item_gre *spec = item->spec;
745         const struct rte_flow_item_gre *mask = item->mask;
746         unsigned int size = sizeof(struct ibv_flow_spec_gre);
747         struct ibv_flow_spec_gre tunnel = {
748                 .type = IBV_FLOW_SPEC_GRE,
749                 .size = size,
750         };
751
752         if (!mask)
753                 mask = &rte_flow_item_gre_mask;
754         if (spec) {
755                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
756                 tunnel.val.protocol = spec->protocol;
757                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
758                 tunnel.mask.protocol = mask->protocol;
759                 /* Remove unwanted bits from values. */
760                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
761                 tunnel.val.protocol &= tunnel.mask.protocol;
762                 tunnel.val.key &= tunnel.mask.key;
763         }
764 #endif
765         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
766                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
767                                                        IBV_FLOW_SPEC_IPV4_EXT,
768                                                        IPPROTO_GRE);
769         else
770                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
771                                                        IBV_FLOW_SPEC_IPV6,
772                                                        IPPROTO_GRE);
773         flow_verbs_spec_add(verbs, &tunnel, size);
774 }
775
776 /**
777  * Convert the @p action into a Verbs specification. This function assumes that
778  * the input is valid and that there is space to insert the requested action
779  * into the flow. This function also return the action that was added.
780  *
781  * @param[in, out] dev_flow
782  *   Pointer to dev_flow structure.
783  * @param[in] item
784  *   Item specification.
785  * @param[in] item_flags
786  *   Parsed item flags.
787  */
788 static void
789 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
790                                const struct rte_flow_item *item __rte_unused,
791                                uint64_t item_flags __rte_unused)
792 {
793 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
794         const struct rte_flow_item_mpls *spec = item->spec;
795         const struct rte_flow_item_mpls *mask = item->mask;
796         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
797         struct ibv_flow_spec_mpls mpls = {
798                 .type = IBV_FLOW_SPEC_MPLS,
799                 .size = size,
800         };
801
802         if (!mask)
803                 mask = &rte_flow_item_mpls_mask;
804         if (spec) {
805                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
806                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
807                 /* Remove unwanted bits from values.  */
808                 mpls.val.label &= mpls.mask.label;
809         }
810         flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
811 #endif
812 }
813
814 /**
815  * Convert the @p action into a Verbs specification. This function assumes that
816  * the input is valid and that there is space to insert the requested action
817  * into the flow.
818  *
819  * @param[in] dev_flow
820  *   Pointer to mlx5_flow.
821  * @param[in] action
822  *   Action configuration.
823  */
824 static void
825 flow_verbs_translate_action_drop
826         (struct mlx5_flow *dev_flow,
827          const struct rte_flow_action *action __rte_unused)
828 {
829         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
830         struct ibv_flow_spec_action_drop drop = {
831                         .type = IBV_FLOW_SPEC_ACTION_DROP,
832                         .size = size,
833         };
834
835         flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
836 }
837
838 /**
839  * Convert the @p action into a Verbs specification. This function assumes that
840  * the input is valid and that there is space to insert the requested action
841  * into the flow.
842  *
843  * @param[in] dev_flow
844  *   Pointer to mlx5_flow.
845  * @param[in] action
846  *   Action configuration.
847  */
848 static void
849 flow_verbs_translate_action_queue(struct mlx5_flow *dev_flow,
850                                   const struct rte_flow_action *action)
851 {
852         const struct rte_flow_action_queue *queue = action->conf;
853         struct rte_flow *flow = dev_flow->flow;
854
855         if (flow->queue)
856                 (*flow->queue)[0] = queue->index;
857         flow->rss.queue_num = 1;
858 }
859
860 /**
861  * Convert the @p action into a Verbs specification. This function assumes that
862  * the input is valid and that there is space to insert the requested action
863  * into the flow.
864  *
865  * @param[in] action
866  *   Action configuration.
867  * @param[in, out] action_flags
868  *   Pointer to the detected actions.
869  * @param[in] dev_flow
870  *   Pointer to mlx5_flow.
871  */
872 static void
873 flow_verbs_translate_action_rss(struct mlx5_flow *dev_flow,
874                                 const struct rte_flow_action *action)
875 {
876         const struct rte_flow_action_rss *rss = action->conf;
877         const uint8_t *rss_key;
878         struct rte_flow *flow = dev_flow->flow;
879
880         if (flow->queue)
881                 memcpy((*flow->queue), rss->queue,
882                        rss->queue_num * sizeof(uint16_t));
883         flow->rss.queue_num = rss->queue_num;
884         /* NULL RSS key indicates default RSS key. */
885         rss_key = !rss->key ? rss_hash_default_key : rss->key;
886         memcpy(flow->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
887         /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
888         flow->rss.types = !rss->types ? ETH_RSS_IP : rss->types;
889         flow->rss.level = rss->level;
890 }
891
892 /**
893  * Convert the @p action into a Verbs specification. This function assumes that
894  * the input is valid and that there is space to insert the requested action
895  * into the flow.
896  *
897  * @param[in] dev_flow
898  *   Pointer to mlx5_flow.
899  * @param[in] action
900  *   Action configuration.
901  */
902 static void
903 flow_verbs_translate_action_flag
904         (struct mlx5_flow *dev_flow,
905          const struct rte_flow_action *action __rte_unused)
906 {
907         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
908         struct ibv_flow_spec_action_tag tag = {
909                 .type = IBV_FLOW_SPEC_ACTION_TAG,
910                 .size = size,
911                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
912         };
913
914         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
915 }
916
917 /**
918  * Convert the @p action into a Verbs specification. This function assumes that
919  * the input is valid and that there is space to insert the requested action
920  * into the flow.
921  *
922  * @param[in] dev_flow
923  *   Pointer to mlx5_flow.
924  * @param[in] action
925  *   Action configuration.
926  */
927 static void
928 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
929                                  const struct rte_flow_action *action)
930 {
931         const struct rte_flow_action_mark *mark = action->conf;
932         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
933         struct ibv_flow_spec_action_tag tag = {
934                 .type = IBV_FLOW_SPEC_ACTION_TAG,
935                 .size = size,
936                 .tag_id = mlx5_flow_mark_set(mark->id),
937         };
938
939         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
940 }
941
942 /**
943  * Convert the @p action into a Verbs specification. This function assumes that
944  * the input is valid and that there is space to insert the requested action
945  * into the flow.
946  *
947  * @param[in] dev
948  *   Pointer to the Ethernet device structure.
949  * @param[in] action
950  *   Action configuration.
951  * @param[in] dev_flow
952  *   Pointer to mlx5_flow.
953  * @param[out] error
954  *   Pointer to error structure.
955  *
956  * @return
957  *   0 On success else a negative errno value is returned and rte_errno is set.
958  */
959 static int
960 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
961                                   const struct rte_flow_action *action,
962                                   struct rte_eth_dev *dev,
963                                   struct rte_flow_error *error)
964 {
965         const struct rte_flow_action_count *count = action->conf;
966         struct rte_flow *flow = dev_flow->flow;
967 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
968         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
969         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
970         struct ibv_flow_spec_counter_action counter = {
971                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
972                 .size = size,
973         };
974 #endif
975
976         if (!flow->counter) {
977                 flow->counter = flow_verbs_counter_new(dev, count->shared,
978                                                        count->id);
979                 if (!flow->counter)
980                         return rte_flow_error_set(error, rte_errno,
981                                                   RTE_FLOW_ERROR_TYPE_ACTION,
982                                                   action,
983                                                   "cannot get counter"
984                                                   " context.");
985         }
986 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
987         counter.counter_set_handle = flow->counter->cs->handle;
988         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
989 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
990         counter.counters = flow->counter->cs;
991         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
992 #endif
993         return 0;
994 }
995
996 /**
997  * Internal validation function. For validating both actions and items.
998  *
999  * @param[in] dev
1000  *   Pointer to the Ethernet device structure.
1001  * @param[in] attr
1002  *   Pointer to the flow attributes.
1003  * @param[in] items
1004  *   Pointer to the list of items.
1005  * @param[in] actions
1006  *   Pointer to the list of actions.
1007  * @param[out] error
1008  *   Pointer to the error structure.
1009  *
1010  * @return
1011  *   0 on success, a negative errno value otherwise and rte_errno is set.
1012  */
1013 static int
1014 flow_verbs_validate(struct rte_eth_dev *dev,
1015                     const struct rte_flow_attr *attr,
1016                     const struct rte_flow_item items[],
1017                     const struct rte_flow_action actions[],
1018                     struct rte_flow_error *error)
1019 {
1020         int ret;
1021         uint64_t action_flags = 0;
1022         uint64_t item_flags = 0;
1023         uint64_t last_item = 0;
1024         uint8_t next_protocol = 0xff;
1025
1026         if (items == NULL)
1027                 return -1;
1028         ret = mlx5_flow_validate_attributes(dev, attr, error);
1029         if (ret < 0)
1030                 return ret;
1031         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1032                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1033                 int ret = 0;
1034
1035                 switch (items->type) {
1036                 case RTE_FLOW_ITEM_TYPE_VOID:
1037                         break;
1038                 case RTE_FLOW_ITEM_TYPE_ETH:
1039                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1040                                                           error);
1041                         if (ret < 0)
1042                                 return ret;
1043                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1044                                              MLX5_FLOW_LAYER_OUTER_L2;
1045                         break;
1046                 case RTE_FLOW_ITEM_TYPE_VLAN:
1047                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1048                                                            error);
1049                         if (ret < 0)
1050                                 return ret;
1051                         last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1052                                               MLX5_FLOW_LAYER_INNER_VLAN) :
1053                                              (MLX5_FLOW_LAYER_OUTER_L2 |
1054                                               MLX5_FLOW_LAYER_OUTER_VLAN);
1055                         break;
1056                 case RTE_FLOW_ITEM_TYPE_IPV4:
1057                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1058                                                            NULL, error);
1059                         if (ret < 0)
1060                                 return ret;
1061                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1062                                              MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1063                         if (items->mask != NULL &&
1064                             ((const struct rte_flow_item_ipv4 *)
1065                              items->mask)->hdr.next_proto_id) {
1066                                 next_protocol =
1067                                         ((const struct rte_flow_item_ipv4 *)
1068                                          (items->spec))->hdr.next_proto_id;
1069                                 next_protocol &=
1070                                         ((const struct rte_flow_item_ipv4 *)
1071                                          (items->mask))->hdr.next_proto_id;
1072                         } else {
1073                                 /* Reset for inner layer. */
1074                                 next_protocol = 0xff;
1075                         }
1076                         break;
1077                 case RTE_FLOW_ITEM_TYPE_IPV6:
1078                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1079                                                            NULL, error);
1080                         if (ret < 0)
1081                                 return ret;
1082                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1083                                              MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1084                         if (items->mask != NULL &&
1085                             ((const struct rte_flow_item_ipv6 *)
1086                              items->mask)->hdr.proto) {
1087                                 next_protocol =
1088                                         ((const struct rte_flow_item_ipv6 *)
1089                                          items->spec)->hdr.proto;
1090                                 next_protocol &=
1091                                         ((const struct rte_flow_item_ipv6 *)
1092                                          items->mask)->hdr.proto;
1093                         } else {
1094                                 /* Reset for inner layer. */
1095                                 next_protocol = 0xff;
1096                         }
1097                         break;
1098                 case RTE_FLOW_ITEM_TYPE_UDP:
1099                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1100                                                           next_protocol,
1101                                                           error);
1102                         if (ret < 0)
1103                                 return ret;
1104                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1105                                              MLX5_FLOW_LAYER_OUTER_L4_UDP;
1106                         break;
1107                 case RTE_FLOW_ITEM_TYPE_TCP:
1108                         ret = mlx5_flow_validate_item_tcp
1109                                                 (items, item_flags,
1110                                                  next_protocol,
1111                                                  &rte_flow_item_tcp_mask,
1112                                                  error);
1113                         if (ret < 0)
1114                                 return ret;
1115                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1116                                              MLX5_FLOW_LAYER_OUTER_L4_TCP;
1117                         break;
1118                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1119                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1120                                                             error);
1121                         if (ret < 0)
1122                                 return ret;
1123                         last_item = MLX5_FLOW_LAYER_VXLAN;
1124                         break;
1125                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1126                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
1127                                                                 item_flags,
1128                                                                 dev, error);
1129                         if (ret < 0)
1130                                 return ret;
1131                         last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1132                         break;
1133                 case RTE_FLOW_ITEM_TYPE_GRE:
1134                         ret = mlx5_flow_validate_item_gre(items, item_flags,
1135                                                           next_protocol, error);
1136                         if (ret < 0)
1137                                 return ret;
1138                         last_item = MLX5_FLOW_LAYER_GRE;
1139                         break;
1140                 case RTE_FLOW_ITEM_TYPE_MPLS:
1141                         ret = mlx5_flow_validate_item_mpls(dev, items,
1142                                                            item_flags,
1143                                                            last_item, error);
1144                         if (ret < 0)
1145                                 return ret;
1146                         last_item = MLX5_FLOW_LAYER_MPLS;
1147                         break;
1148                 default:
1149                         return rte_flow_error_set(error, ENOTSUP,
1150                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1151                                                   NULL, "item not supported");
1152                 }
1153                 item_flags |= last_item;
1154         }
1155         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1156                 switch (actions->type) {
1157                 case RTE_FLOW_ACTION_TYPE_VOID:
1158                         break;
1159                 case RTE_FLOW_ACTION_TYPE_FLAG:
1160                         ret = mlx5_flow_validate_action_flag(action_flags,
1161                                                              attr,
1162                                                              error);
1163                         if (ret < 0)
1164                                 return ret;
1165                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1166                         break;
1167                 case RTE_FLOW_ACTION_TYPE_MARK:
1168                         ret = mlx5_flow_validate_action_mark(actions,
1169                                                              action_flags,
1170                                                              attr,
1171                                                              error);
1172                         if (ret < 0)
1173                                 return ret;
1174                         action_flags |= MLX5_FLOW_ACTION_MARK;
1175                         break;
1176                 case RTE_FLOW_ACTION_TYPE_DROP:
1177                         ret = mlx5_flow_validate_action_drop(action_flags,
1178                                                              attr,
1179                                                              error);
1180                         if (ret < 0)
1181                                 return ret;
1182                         action_flags |= MLX5_FLOW_ACTION_DROP;
1183                         break;
1184                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1185                         ret = mlx5_flow_validate_action_queue(actions,
1186                                                               action_flags, dev,
1187                                                               attr,
1188                                                               error);
1189                         if (ret < 0)
1190                                 return ret;
1191                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1192                         break;
1193                 case RTE_FLOW_ACTION_TYPE_RSS:
1194                         ret = mlx5_flow_validate_action_rss(actions,
1195                                                             action_flags, dev,
1196                                                             attr,
1197                                                             error);
1198                         if (ret < 0)
1199                                 return ret;
1200                         action_flags |= MLX5_FLOW_ACTION_RSS;
1201                         break;
1202                 case RTE_FLOW_ACTION_TYPE_COUNT:
1203                         ret = mlx5_flow_validate_action_count(dev, attr, error);
1204                         if (ret < 0)
1205                                 return ret;
1206                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1207                         break;
1208                 default:
1209                         return rte_flow_error_set(error, ENOTSUP,
1210                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1211                                                   actions,
1212                                                   "action not supported");
1213                 }
1214         }
1215         if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1216                 return rte_flow_error_set(error, EINVAL,
1217                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1218                                           "no fate action is found");
1219         return 0;
1220 }
1221
1222 /**
1223  * Calculate the required bytes that are needed for the action part of the verbs
1224  * flow.
1225  *
1226  * @param[in] actions
1227  *   Pointer to the list of actions.
1228  *
1229  * @return
1230  *   The size of the memory needed for all actions.
1231  */
1232 static int
1233 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1234 {
1235         int size = 0;
1236
1237         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1238                 switch (actions->type) {
1239                 case RTE_FLOW_ACTION_TYPE_VOID:
1240                         break;
1241                 case RTE_FLOW_ACTION_TYPE_FLAG:
1242                         size += sizeof(struct ibv_flow_spec_action_tag);
1243                         break;
1244                 case RTE_FLOW_ACTION_TYPE_MARK:
1245                         size += sizeof(struct ibv_flow_spec_action_tag);
1246                         break;
1247                 case RTE_FLOW_ACTION_TYPE_DROP:
1248                         size += sizeof(struct ibv_flow_spec_action_drop);
1249                         break;
1250                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1251                         break;
1252                 case RTE_FLOW_ACTION_TYPE_RSS:
1253                         break;
1254                 case RTE_FLOW_ACTION_TYPE_COUNT:
1255 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1256         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1257                         size += sizeof(struct ibv_flow_spec_counter_action);
1258 #endif
1259                         break;
1260                 default:
1261                         break;
1262                 }
1263         }
1264         return size;
1265 }
1266
1267 /**
1268  * Calculate the required bytes that are needed for the item part of the verbs
1269  * flow.
1270  *
1271  * @param[in] items
1272  *   Pointer to the list of items.
1273  *
1274  * @return
1275  *   The size of the memory needed for all items.
1276  */
1277 static int
1278 flow_verbs_get_items_size(const struct rte_flow_item items[])
1279 {
1280         int size = 0;
1281
1282         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1283                 switch (items->type) {
1284                 case RTE_FLOW_ITEM_TYPE_VOID:
1285                         break;
1286                 case RTE_FLOW_ITEM_TYPE_ETH:
1287                         size += sizeof(struct ibv_flow_spec_eth);
1288                         break;
1289                 case RTE_FLOW_ITEM_TYPE_VLAN:
1290                         size += sizeof(struct ibv_flow_spec_eth);
1291                         break;
1292                 case RTE_FLOW_ITEM_TYPE_IPV4:
1293                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
1294                         break;
1295                 case RTE_FLOW_ITEM_TYPE_IPV6:
1296                         size += sizeof(struct ibv_flow_spec_ipv6);
1297                         break;
1298                 case RTE_FLOW_ITEM_TYPE_UDP:
1299                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1300                         break;
1301                 case RTE_FLOW_ITEM_TYPE_TCP:
1302                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1303                         break;
1304                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1305                         size += sizeof(struct ibv_flow_spec_tunnel);
1306                         break;
1307                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1308                         size += sizeof(struct ibv_flow_spec_tunnel);
1309                         break;
1310 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1311                 case RTE_FLOW_ITEM_TYPE_GRE:
1312                         size += sizeof(struct ibv_flow_spec_gre);
1313                         break;
1314                 case RTE_FLOW_ITEM_TYPE_MPLS:
1315                         size += sizeof(struct ibv_flow_spec_mpls);
1316                         break;
1317 #else
1318                 case RTE_FLOW_ITEM_TYPE_GRE:
1319                         size += sizeof(struct ibv_flow_spec_tunnel);
1320                         break;
1321 #endif
1322                 default:
1323                         break;
1324                 }
1325         }
1326         return size;
1327 }
1328
1329 /**
1330  * Internal preparation function. Allocate mlx5_flow with the required size.
1331  * The required size is calculate based on the actions and items. This function
1332  * also returns the detected actions and items for later use.
1333  *
1334  * @param[in] attr
1335  *   Pointer to the flow attributes.
1336  * @param[in] items
1337  *   Pointer to the list of items.
1338  * @param[in] actions
1339  *   Pointer to the list of actions.
1340  * @param[out] error
1341  *   Pointer to the error structure.
1342  *
1343  * @return
1344  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1345  *   is set.
1346  */
1347 static struct mlx5_flow *
1348 flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
1349                    const struct rte_flow_item items[],
1350                    const struct rte_flow_action actions[],
1351                    struct rte_flow_error *error)
1352 {
1353         uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
1354         struct mlx5_flow *flow;
1355
1356         size += flow_verbs_get_actions_size(actions);
1357         size += flow_verbs_get_items_size(items);
1358         flow = rte_calloc(__func__, 1, size, 0);
1359         if (!flow) {
1360                 rte_flow_error_set(error, ENOMEM,
1361                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1362                                    "not enough memory to create flow");
1363                 return NULL;
1364         }
1365         flow->verbs.attr = (void *)(flow + 1);
1366         flow->verbs.specs =
1367                 (uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
1368         return flow;
1369 }
1370
1371 /**
1372  * Fill the flow with verb spec.
1373  *
1374  * @param[in] dev
1375  *   Pointer to Ethernet device.
1376  * @param[in, out] dev_flow
1377  *   Pointer to the mlx5 flow.
1378  * @param[in] attr
1379  *   Pointer to the flow attributes.
1380  * @param[in] items
1381  *   Pointer to the list of items.
1382  * @param[in] actions
1383  *   Pointer to the list of actions.
1384  * @param[out] error
1385  *   Pointer to the error structure.
1386  *
1387  * @return
1388  *   0 on success, else a negative errno value otherwise and rte_ernno is set.
1389  */
1390 static int
1391 flow_verbs_translate(struct rte_eth_dev *dev,
1392                      struct mlx5_flow *dev_flow,
1393                      const struct rte_flow_attr *attr,
1394                      const struct rte_flow_item items[],
1395                      const struct rte_flow_action actions[],
1396                      struct rte_flow_error *error)
1397 {
1398         struct rte_flow *flow = dev_flow->flow;
1399         uint64_t item_flags = 0;
1400         uint64_t action_flags = 0;
1401         uint64_t priority = attr->priority;
1402         uint32_t subpriority = 0;
1403         struct mlx5_priv *priv = dev->data->dev_private;
1404
1405         if (priority == MLX5_FLOW_PRIO_RSVD)
1406                 priority = priv->config.flow_prio - 1;
1407         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1408                 int ret;
1409
1410                 switch (actions->type) {
1411                 case RTE_FLOW_ACTION_TYPE_VOID:
1412                         break;
1413                 case RTE_FLOW_ACTION_TYPE_FLAG:
1414                         flow_verbs_translate_action_flag(dev_flow, actions);
1415                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1416                         break;
1417                 case RTE_FLOW_ACTION_TYPE_MARK:
1418                         flow_verbs_translate_action_mark(dev_flow, actions);
1419                         action_flags |= MLX5_FLOW_ACTION_MARK;
1420                         break;
1421                 case RTE_FLOW_ACTION_TYPE_DROP:
1422                         flow_verbs_translate_action_drop(dev_flow, actions);
1423                         action_flags |= MLX5_FLOW_ACTION_DROP;
1424                         break;
1425                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1426                         flow_verbs_translate_action_queue(dev_flow, actions);
1427                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1428                         break;
1429                 case RTE_FLOW_ACTION_TYPE_RSS:
1430                         flow_verbs_translate_action_rss(dev_flow, actions);
1431                         action_flags |= MLX5_FLOW_ACTION_RSS;
1432                         break;
1433                 case RTE_FLOW_ACTION_TYPE_COUNT:
1434                         ret = flow_verbs_translate_action_count(dev_flow,
1435                                                                 actions,
1436                                                                 dev, error);
1437                         if (ret < 0)
1438                                 return ret;
1439                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1440                         break;
1441                 default:
1442                         return rte_flow_error_set(error, ENOTSUP,
1443                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1444                                                   actions,
1445                                                   "action not supported");
1446                 }
1447         }
1448         flow->actions = action_flags;
1449         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1450                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1451
1452                 switch (items->type) {
1453                 case RTE_FLOW_ITEM_TYPE_VOID:
1454                         break;
1455                 case RTE_FLOW_ITEM_TYPE_ETH:
1456                         flow_verbs_translate_item_eth(dev_flow, items,
1457                                                       item_flags);
1458                         subpriority = MLX5_PRIORITY_MAP_L2;
1459                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1460                                                MLX5_FLOW_LAYER_OUTER_L2;
1461                         break;
1462                 case RTE_FLOW_ITEM_TYPE_VLAN:
1463                         flow_verbs_translate_item_vlan(dev_flow, items,
1464                                                        item_flags);
1465                         subpriority = MLX5_PRIORITY_MAP_L2;
1466                         item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1467                                                 MLX5_FLOW_LAYER_INNER_VLAN) :
1468                                                (MLX5_FLOW_LAYER_OUTER_L2 |
1469                                                 MLX5_FLOW_LAYER_OUTER_VLAN);
1470                         break;
1471                 case RTE_FLOW_ITEM_TYPE_IPV4:
1472                         flow_verbs_translate_item_ipv4(dev_flow, items,
1473                                                        item_flags);
1474                         subpriority = MLX5_PRIORITY_MAP_L3;
1475                         dev_flow->verbs.hash_fields |=
1476                                 mlx5_flow_hashfields_adjust
1477                                         (dev_flow, tunnel,
1478                                          MLX5_IPV4_LAYER_TYPES,
1479                                          MLX5_IPV4_IBV_RX_HASH);
1480                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1481                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1482                         break;
1483                 case RTE_FLOW_ITEM_TYPE_IPV6:
1484                         flow_verbs_translate_item_ipv6(dev_flow, items,
1485                                                        item_flags);
1486                         subpriority = MLX5_PRIORITY_MAP_L3;
1487                         dev_flow->verbs.hash_fields |=
1488                                 mlx5_flow_hashfields_adjust
1489                                         (dev_flow, tunnel,
1490                                          MLX5_IPV6_LAYER_TYPES,
1491                                          MLX5_IPV6_IBV_RX_HASH);
1492                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1493                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1494                         break;
1495                 case RTE_FLOW_ITEM_TYPE_TCP:
1496                         flow_verbs_translate_item_tcp(dev_flow, items,
1497                                                       item_flags);
1498                         subpriority = MLX5_PRIORITY_MAP_L4;
1499                         dev_flow->verbs.hash_fields |=
1500                                 mlx5_flow_hashfields_adjust
1501                                         (dev_flow, tunnel, ETH_RSS_TCP,
1502                                          (IBV_RX_HASH_SRC_PORT_TCP |
1503                                           IBV_RX_HASH_DST_PORT_TCP));
1504                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1505                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1506                         break;
1507                 case RTE_FLOW_ITEM_TYPE_UDP:
1508                         flow_verbs_translate_item_udp(dev_flow, items,
1509                                                       item_flags);
1510                         subpriority = MLX5_PRIORITY_MAP_L4;
1511                         dev_flow->verbs.hash_fields |=
1512                                 mlx5_flow_hashfields_adjust
1513                                         (dev_flow, tunnel, ETH_RSS_UDP,
1514                                          (IBV_RX_HASH_SRC_PORT_UDP |
1515                                           IBV_RX_HASH_DST_PORT_UDP));
1516                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1517                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1518                         break;
1519                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1520                         flow_verbs_translate_item_vxlan(dev_flow, items,
1521                                                         item_flags);
1522                         subpriority = MLX5_PRIORITY_MAP_L2;
1523                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1524                         break;
1525                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1526                         flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1527                                                             item_flags);
1528                         subpriority = MLX5_PRIORITY_MAP_L2;
1529                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1530                         break;
1531                 case RTE_FLOW_ITEM_TYPE_GRE:
1532                         flow_verbs_translate_item_gre(dev_flow, items,
1533                                                       item_flags);
1534                         subpriority = MLX5_PRIORITY_MAP_L2;
1535                         item_flags |= MLX5_FLOW_LAYER_GRE;
1536                         break;
1537                 case RTE_FLOW_ITEM_TYPE_MPLS:
1538                         flow_verbs_translate_item_mpls(dev_flow, items,
1539                                                        item_flags);
1540                         subpriority = MLX5_PRIORITY_MAP_L2;
1541                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1542                         break;
1543                 default:
1544                         return rte_flow_error_set(error, ENOTSUP,
1545                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1546                                                   NULL,
1547                                                   "item not supported");
1548                 }
1549         }
1550         dev_flow->layers = item_flags;
1551         dev_flow->verbs.attr->priority =
1552                 mlx5_flow_adjust_priority(dev, priority, subpriority);
1553         return 0;
1554 }
1555
1556 /**
1557  * Remove the flow from the NIC but keeps it in memory.
1558  *
1559  * @param[in] dev
1560  *   Pointer to the Ethernet device structure.
1561  * @param[in, out] flow
1562  *   Pointer to flow structure.
1563  */
1564 static void
1565 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1566 {
1567         struct mlx5_flow_verbs *verbs;
1568         struct mlx5_flow *dev_flow;
1569
1570         if (!flow)
1571                 return;
1572         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1573                 verbs = &dev_flow->verbs;
1574                 if (verbs->flow) {
1575                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1576                         verbs->flow = NULL;
1577                 }
1578                 if (verbs->hrxq) {
1579                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1580                                 mlx5_hrxq_drop_release(dev);
1581                         else
1582                                 mlx5_hrxq_release(dev, verbs->hrxq);
1583                         verbs->hrxq = NULL;
1584                 }
1585         }
1586 }
1587
1588 /**
1589  * Remove the flow from the NIC and the memory.
1590  *
1591  * @param[in] dev
1592  *   Pointer to the Ethernet device structure.
1593  * @param[in, out] flow
1594  *   Pointer to flow structure.
1595  */
1596 static void
1597 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1598 {
1599         struct mlx5_flow *dev_flow;
1600
1601         if (!flow)
1602                 return;
1603         flow_verbs_remove(dev, flow);
1604         while (!LIST_EMPTY(&flow->dev_flows)) {
1605                 dev_flow = LIST_FIRST(&flow->dev_flows);
1606                 LIST_REMOVE(dev_flow, next);
1607                 rte_free(dev_flow);
1608         }
1609         if (flow->counter) {
1610                 flow_verbs_counter_release(flow->counter);
1611                 flow->counter = NULL;
1612         }
1613 }
1614
1615 /**
1616  * Apply the flow to the NIC.
1617  *
1618  * @param[in] dev
1619  *   Pointer to the Ethernet device structure.
1620  * @param[in, out] flow
1621  *   Pointer to flow structure.
1622  * @param[out] error
1623  *   Pointer to error structure.
1624  *
1625  * @return
1626  *   0 on success, a negative errno value otherwise and rte_errno is set.
1627  */
1628 static int
1629 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1630                  struct rte_flow_error *error)
1631 {
1632         struct mlx5_flow_verbs *verbs;
1633         struct mlx5_flow *dev_flow;
1634         int err;
1635
1636         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1637                 verbs = &dev_flow->verbs;
1638                 if (flow->actions & MLX5_FLOW_ACTION_DROP) {
1639                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
1640                         if (!verbs->hrxq) {
1641                                 rte_flow_error_set
1642                                         (error, errno,
1643                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1644                                          "cannot get drop hash queue");
1645                                 goto error;
1646                         }
1647                 } else {
1648                         struct mlx5_hrxq *hrxq;
1649
1650                         hrxq = mlx5_hrxq_get(dev, flow->key,
1651                                              MLX5_RSS_HASH_KEY_LEN,
1652                                              verbs->hash_fields,
1653                                              (*flow->queue),
1654                                              flow->rss.queue_num);
1655                         if (!hrxq)
1656                                 hrxq = mlx5_hrxq_new(dev, flow->key,
1657                                                      MLX5_RSS_HASH_KEY_LEN,
1658                                                      verbs->hash_fields,
1659                                                      (*flow->queue),
1660                                                      flow->rss.queue_num,
1661                                                      !!(dev_flow->layers &
1662                                                       MLX5_FLOW_LAYER_TUNNEL));
1663                         if (!hrxq) {
1664                                 rte_flow_error_set
1665                                         (error, rte_errno,
1666                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1667                                          "cannot get hash queue");
1668                                 goto error;
1669                         }
1670                         verbs->hrxq = hrxq;
1671                 }
1672                 verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
1673                                                      verbs->attr);
1674                 if (!verbs->flow) {
1675                         rte_flow_error_set(error, errno,
1676                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1677                                            NULL,
1678                                            "hardware refuses to create flow");
1679                         goto error;
1680                 }
1681         }
1682         return 0;
1683 error:
1684         err = rte_errno; /* Save rte_errno before cleanup. */
1685         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1686                 verbs = &dev_flow->verbs;
1687                 if (verbs->hrxq) {
1688                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1689                                 mlx5_hrxq_drop_release(dev);
1690                         else
1691                                 mlx5_hrxq_release(dev, verbs->hrxq);
1692                         verbs->hrxq = NULL;
1693                 }
1694         }
1695         rte_errno = err; /* Restore rte_errno. */
1696         return -rte_errno;
1697 }
1698
1699 /**
1700  * Query a flow.
1701  *
1702  * @see rte_flow_query()
1703  * @see rte_flow_ops
1704  */
1705 static int
1706 flow_verbs_query(struct rte_eth_dev *dev,
1707                  struct rte_flow *flow,
1708                  const struct rte_flow_action *actions,
1709                  void *data,
1710                  struct rte_flow_error *error)
1711 {
1712         int ret = -EINVAL;
1713
1714         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1715                 switch (actions->type) {
1716                 case RTE_FLOW_ACTION_TYPE_VOID:
1717                         break;
1718                 case RTE_FLOW_ACTION_TYPE_COUNT:
1719                         ret = flow_verbs_counter_query(dev, flow, data, error);
1720                         break;
1721                 default:
1722                         return rte_flow_error_set(error, ENOTSUP,
1723                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1724                                                   actions,
1725                                                   "action not supported");
1726                 }
1727         }
1728         return ret;
1729 }
1730
1731 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1732         .validate = flow_verbs_validate,
1733         .prepare = flow_verbs_prepare,
1734         .translate = flow_verbs_translate,
1735         .apply = flow_verbs_apply,
1736         .remove = flow_verbs_remove,
1737         .destroy = flow_verbs_destroy,
1738         .query = flow_verbs_query,
1739 };