net/mlx5: fix RSS validation function
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_eth_ctrl.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_flow.h"
33 #include "mlx5_glue.h"
34 #include "mlx5_prm.h"
35 #include "mlx5_rxtx.h"
36
37 #define VERBS_SPEC_INNER(item_flags) \
38         (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
39
40 /**
41  * Create Verbs flow counter with Verbs library.
42  *
43  * @param[in] dev
44  *   Pointer to the Ethernet device structure.
45  * @param[in, out] counter
46  *   mlx5 flow counter object, contains the counter id,
47  *   handle of created Verbs flow counter is returned
48  *   in cs field (if counters are supported).
49  *
50  * @return
51  *   0 On success else a negative errno value is returned
52  *   and rte_errno is set.
53  */
54 static int
55 flow_verbs_counter_create(struct rte_eth_dev *dev,
56                           struct mlx5_flow_counter *counter)
57 {
58 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
59         struct mlx5_priv *priv = dev->data->dev_private;
60         struct ibv_context *ctx = priv->sh->ctx;
61         struct ibv_counter_set_init_attr init = {
62                          .counter_set_id = counter->id};
63
64         counter->cs = mlx5_glue->create_counter_set(ctx, &init);
65         if (!counter->cs) {
66                 rte_errno = ENOTSUP;
67                 return -ENOTSUP;
68         }
69         return 0;
70 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
71         struct mlx5_priv *priv = dev->data->dev_private;
72         struct ibv_context *ctx = priv->sh->ctx;
73         struct ibv_counters_init_attr init = {0};
74         struct ibv_counter_attach_attr attach;
75         int ret;
76
77         memset(&attach, 0, sizeof(attach));
78         counter->cs = mlx5_glue->create_counters(ctx, &init);
79         if (!counter->cs) {
80                 rte_errno = ENOTSUP;
81                 return -ENOTSUP;
82         }
83         attach.counter_desc = IBV_COUNTER_PACKETS;
84         attach.index = 0;
85         ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
86         if (!ret) {
87                 attach.counter_desc = IBV_COUNTER_BYTES;
88                 attach.index = 1;
89                 ret = mlx5_glue->attach_counters
90                                         (counter->cs, &attach, NULL);
91         }
92         if (ret) {
93                 claim_zero(mlx5_glue->destroy_counters(counter->cs));
94                 counter->cs = NULL;
95                 rte_errno = ret;
96                 return -ret;
97         }
98         return 0;
99 #else
100         (void)dev;
101         (void)counter;
102         rte_errno = ENOTSUP;
103         return -ENOTSUP;
104 #endif
105 }
106
107 /**
108  * Get a flow counter.
109  *
110  * @param[in] dev
111  *   Pointer to the Ethernet device structure.
112  * @param[in] shared
113  *   Indicate if this counter is shared with other flows.
114  * @param[in] id
115  *   Counter identifier.
116  *
117  * @return
118  *   A pointer to the counter, NULL otherwise and rte_errno is set.
119  */
120 static struct mlx5_flow_counter *
121 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
122 {
123         struct mlx5_priv *priv = dev->data->dev_private;
124         struct mlx5_flow_counter *cnt;
125         int ret;
126
127         if (shared) {
128                 LIST_FOREACH(cnt, &priv->flow_counters, next) {
129                         if (cnt->shared && cnt->id == id) {
130                                 cnt->ref_cnt++;
131                                 return cnt;
132                         }
133                 }
134         }
135         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
136         if (!cnt) {
137                 rte_errno = ENOMEM;
138                 return NULL;
139         }
140         cnt->id = id;
141         cnt->shared = shared;
142         cnt->ref_cnt = 1;
143         cnt->hits = 0;
144         cnt->bytes = 0;
145         /* Create counter with Verbs. */
146         ret = flow_verbs_counter_create(dev, cnt);
147         if (!ret) {
148                 LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
149                 return cnt;
150         }
151         /* Some error occurred in Verbs library. */
152         rte_free(cnt);
153         rte_errno = -ret;
154         return NULL;
155 }
156
157 /**
158  * Release a flow counter.
159  *
160  * @param[in] counter
161  *   Pointer to the counter handler.
162  */
163 static void
164 flow_verbs_counter_release(struct mlx5_flow_counter *counter)
165 {
166         if (--counter->ref_cnt == 0) {
167 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
168                 claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
169 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
170                 claim_zero(mlx5_glue->destroy_counters(counter->cs));
171 #endif
172                 LIST_REMOVE(counter, next);
173                 rte_free(counter);
174         }
175 }
176
177 /**
178  * Query a flow counter via Verbs library call.
179  *
180  * @see rte_flow_query()
181  * @see rte_flow_ops
182  */
183 static int
184 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
185                          struct rte_flow *flow, void *data,
186                          struct rte_flow_error *error)
187 {
188 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
189         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
190         if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
191                 struct rte_flow_query_count *qc = data;
192                 uint64_t counters[2] = {0, 0};
193 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
194                 struct ibv_query_counter_set_attr query_cs_attr = {
195                         .cs = flow->counter->cs,
196                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
197                 };
198                 struct ibv_counter_set_data query_out = {
199                         .out = counters,
200                         .outlen = 2 * sizeof(uint64_t),
201                 };
202                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
203                                                        &query_out);
204 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
205                 int err = mlx5_glue->query_counters
206                                (flow->counter->cs, counters,
207                                 RTE_DIM(counters),
208                                 IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
209 #endif
210                 if (err)
211                         return rte_flow_error_set
212                                 (error, err,
213                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
214                                  NULL,
215                                  "cannot read counter");
216                 qc->hits_set = 1;
217                 qc->bytes_set = 1;
218                 qc->hits = counters[0] - flow->counter->hits;
219                 qc->bytes = counters[1] - flow->counter->bytes;
220                 if (qc->reset) {
221                         flow->counter->hits = counters[0];
222                         flow->counter->bytes = counters[1];
223                 }
224                 return 0;
225         }
226         return rte_flow_error_set(error, EINVAL,
227                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
228                                   NULL,
229                                   "flow does not have counter");
230 #else
231         (void)flow;
232         (void)data;
233         return rte_flow_error_set(error, ENOTSUP,
234                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
235                                   NULL,
236                                   "counters are not available");
237 #endif
238 }
239
240 /**
241  * Add a verbs item specification into @p verbs.
242  *
243  * @param[out] verbs
244  *   Pointer to verbs structure.
245  * @param[in] src
246  *   Create specification.
247  * @param[in] size
248  *   Size in bytes of the specification to copy.
249  */
250 static void
251 flow_verbs_spec_add(struct mlx5_flow_verbs *verbs, void *src, unsigned int size)
252 {
253         void *dst;
254
255         if (!verbs)
256                 return;
257         assert(verbs->specs);
258         dst = (void *)(verbs->specs + verbs->size);
259         memcpy(dst, src, size);
260         ++verbs->attr->num_of_specs;
261         verbs->size += size;
262 }
263
264 /**
265  * Convert the @p item into a Verbs specification. This function assumes that
266  * the input is valid and that there is space to insert the requested item
267  * into the flow.
268  *
269  * @param[in, out] dev_flow
270  *   Pointer to dev_flow structure.
271  * @param[in] item
272  *   Item specification.
273  * @param[in] item_flags
274  *   Parsed item flags.
275  */
276 static void
277 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
278                               const struct rte_flow_item *item,
279                               uint64_t item_flags)
280 {
281         const struct rte_flow_item_eth *spec = item->spec;
282         const struct rte_flow_item_eth *mask = item->mask;
283         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
284         struct ibv_flow_spec_eth eth = {
285                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
286                 .size = size,
287         };
288
289         if (!mask)
290                 mask = &rte_flow_item_eth_mask;
291         if (spec) {
292                 unsigned int i;
293
294                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
295                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
296                 eth.val.ether_type = spec->type;
297                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
298                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
299                 eth.mask.ether_type = mask->type;
300                 /* Remove unwanted bits from values. */
301                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
302                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
303                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
304                 }
305                 eth.val.ether_type &= eth.mask.ether_type;
306         }
307         flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
308 }
309
310 /**
311  * Update the VLAN tag in the Verbs Ethernet specification.
312  * This function assumes that the input is valid and there is space to add
313  * the requested item.
314  *
315  * @param[in, out] attr
316  *   Pointer to Verbs attributes structure.
317  * @param[in] eth
318  *   Verbs structure containing the VLAN information to copy.
319  */
320 static void
321 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
322                             struct ibv_flow_spec_eth *eth)
323 {
324         unsigned int i;
325         const enum ibv_flow_spec_type search = eth->type;
326         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
327                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
328
329         for (i = 0; i != attr->num_of_specs; ++i) {
330                 if (hdr->type == search) {
331                         struct ibv_flow_spec_eth *e =
332                                 (struct ibv_flow_spec_eth *)hdr;
333
334                         e->val.vlan_tag = eth->val.vlan_tag;
335                         e->mask.vlan_tag = eth->mask.vlan_tag;
336                         e->val.ether_type = eth->val.ether_type;
337                         e->mask.ether_type = eth->mask.ether_type;
338                         break;
339                 }
340                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
341         }
342 }
343
344 /**
345  * Convert the @p item into a Verbs specification. This function assumes that
346  * the input is valid and that there is space to insert the requested item
347  * into the flow.
348  *
349  * @param[in, out] dev_flow
350  *   Pointer to dev_flow structure.
351  * @param[in] item
352  *   Item specification.
353  * @param[in] item_flags
354  *   Parsed item flags.
355  */
356 static void
357 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
358                                const struct rte_flow_item *item,
359                                uint64_t item_flags)
360 {
361         const struct rte_flow_item_vlan *spec = item->spec;
362         const struct rte_flow_item_vlan *mask = item->mask;
363         unsigned int size = sizeof(struct ibv_flow_spec_eth);
364         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
365         struct ibv_flow_spec_eth eth = {
366                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
367                 .size = size,
368         };
369         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
370                                       MLX5_FLOW_LAYER_OUTER_L2;
371
372         if (!mask)
373                 mask = &rte_flow_item_vlan_mask;
374         if (spec) {
375                 eth.val.vlan_tag = spec->tci;
376                 eth.mask.vlan_tag = mask->tci;
377                 eth.val.vlan_tag &= eth.mask.vlan_tag;
378                 eth.val.ether_type = spec->inner_type;
379                 eth.mask.ether_type = mask->inner_type;
380                 eth.val.ether_type &= eth.mask.ether_type;
381         }
382         if (!(item_flags & l2m))
383                 flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
384         else
385                 flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
386 }
387
388 /**
389  * Convert the @p item into a Verbs specification. This function assumes that
390  * the input is valid and that there is space to insert the requested item
391  * into the flow.
392  *
393  * @param[in, out] dev_flow
394  *   Pointer to dev_flow structure.
395  * @param[in] item
396  *   Item specification.
397  * @param[in] item_flags
398  *   Parsed item flags.
399  */
400 static void
401 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
402                                const struct rte_flow_item *item,
403                                uint64_t item_flags)
404 {
405         const struct rte_flow_item_ipv4 *spec = item->spec;
406         const struct rte_flow_item_ipv4 *mask = item->mask;
407         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
408         struct ibv_flow_spec_ipv4_ext ipv4 = {
409                 .type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
410                 .size = size,
411         };
412
413         if (!mask)
414                 mask = &rte_flow_item_ipv4_mask;
415         if (spec) {
416                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
417                         .src_ip = spec->hdr.src_addr,
418                         .dst_ip = spec->hdr.dst_addr,
419                         .proto = spec->hdr.next_proto_id,
420                         .tos = spec->hdr.type_of_service,
421                 };
422                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
423                         .src_ip = mask->hdr.src_addr,
424                         .dst_ip = mask->hdr.dst_addr,
425                         .proto = mask->hdr.next_proto_id,
426                         .tos = mask->hdr.type_of_service,
427                 };
428                 /* Remove unwanted bits from values. */
429                 ipv4.val.src_ip &= ipv4.mask.src_ip;
430                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
431                 ipv4.val.proto &= ipv4.mask.proto;
432                 ipv4.val.tos &= ipv4.mask.tos;
433         }
434         flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
435 }
436
437 /**
438  * Convert the @p item into a Verbs specification. This function assumes that
439  * the input is valid and that there is space to insert the requested item
440  * into the flow.
441  *
442  * @param[in, out] dev_flow
443  *   Pointer to dev_flow structure.
444  * @param[in] item
445  *   Item specification.
446  * @param[in] item_flags
447  *   Parsed item flags.
448  */
449 static void
450 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
451                                const struct rte_flow_item *item,
452                                uint64_t item_flags)
453 {
454         const struct rte_flow_item_ipv6 *spec = item->spec;
455         const struct rte_flow_item_ipv6 *mask = item->mask;
456         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
457         struct ibv_flow_spec_ipv6 ipv6 = {
458                 .type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
459                 .size = size,
460         };
461
462         if (!mask)
463                 mask = &rte_flow_item_ipv6_mask;
464         if (spec) {
465                 unsigned int i;
466                 uint32_t vtc_flow_val;
467                 uint32_t vtc_flow_mask;
468
469                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
470                        RTE_DIM(ipv6.val.src_ip));
471                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
472                        RTE_DIM(ipv6.val.dst_ip));
473                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
474                        RTE_DIM(ipv6.mask.src_ip));
475                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
476                        RTE_DIM(ipv6.mask.dst_ip));
477                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
478                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
479                 ipv6.val.flow_label =
480                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
481                                          IPV6_HDR_FL_SHIFT);
482                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
483                                          IPV6_HDR_TC_SHIFT;
484                 ipv6.val.next_hdr = spec->hdr.proto;
485                 ipv6.val.hop_limit = spec->hdr.hop_limits;
486                 ipv6.mask.flow_label =
487                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
488                                          IPV6_HDR_FL_SHIFT);
489                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
490                                           IPV6_HDR_TC_SHIFT;
491                 ipv6.mask.next_hdr = mask->hdr.proto;
492                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
493                 /* Remove unwanted bits from values. */
494                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
495                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
496                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
497                 }
498                 ipv6.val.flow_label &= ipv6.mask.flow_label;
499                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
500                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
501                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
502         }
503         flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
504 }
505
506 /**
507  * Convert the @p item into a Verbs specification. This function assumes that
508  * the input is valid and that there is space to insert the requested item
509  * into the flow.
510  *
511  * @param[in, out] dev_flow
512  *   Pointer to dev_flow structure.
513  * @param[in] item
514  *   Item specification.
515  * @param[in] item_flags
516  *   Parsed item flags.
517  */
518 static void
519 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
520                               const struct rte_flow_item *item,
521                               uint64_t item_flags __rte_unused)
522 {
523         const struct rte_flow_item_tcp *spec = item->spec;
524         const struct rte_flow_item_tcp *mask = item->mask;
525         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
526         struct ibv_flow_spec_tcp_udp tcp = {
527                 .type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
528                 .size = size,
529         };
530
531         if (!mask)
532                 mask = &rte_flow_item_tcp_mask;
533         if (spec) {
534                 tcp.val.dst_port = spec->hdr.dst_port;
535                 tcp.val.src_port = spec->hdr.src_port;
536                 tcp.mask.dst_port = mask->hdr.dst_port;
537                 tcp.mask.src_port = mask->hdr.src_port;
538                 /* Remove unwanted bits from values. */
539                 tcp.val.src_port &= tcp.mask.src_port;
540                 tcp.val.dst_port &= tcp.mask.dst_port;
541         }
542         flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
543 }
544
545 /**
546  * Convert the @p item into a Verbs specification. This function assumes that
547  * the input is valid and that there is space to insert the requested item
548  * into the flow.
549  *
550  * @param[in, out] dev_flow
551  *   Pointer to dev_flow structure.
552  * @param[in] item
553  *   Item specification.
554  * @param[in] item_flags
555  *   Parsed item flags.
556  */
557 static void
558 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
559                               const struct rte_flow_item *item,
560                               uint64_t item_flags __rte_unused)
561 {
562         const struct rte_flow_item_udp *spec = item->spec;
563         const struct rte_flow_item_udp *mask = item->mask;
564         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
565         struct ibv_flow_spec_tcp_udp udp = {
566                 .type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
567                 .size = size,
568         };
569
570         if (!mask)
571                 mask = &rte_flow_item_udp_mask;
572         if (spec) {
573                 udp.val.dst_port = spec->hdr.dst_port;
574                 udp.val.src_port = spec->hdr.src_port;
575                 udp.mask.dst_port = mask->hdr.dst_port;
576                 udp.mask.src_port = mask->hdr.src_port;
577                 /* Remove unwanted bits from values. */
578                 udp.val.src_port &= udp.mask.src_port;
579                 udp.val.dst_port &= udp.mask.dst_port;
580         }
581         flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
582 }
583
584 /**
585  * Convert the @p item into a Verbs specification. This function assumes that
586  * the input is valid and that there is space to insert the requested item
587  * into the flow.
588  *
589  * @param[in, out] dev_flow
590  *   Pointer to dev_flow structure.
591  * @param[in] item
592  *   Item specification.
593  * @param[in] item_flags
594  *   Parsed item flags.
595  */
596 static void
597 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
598                                 const struct rte_flow_item *item,
599                                 uint64_t item_flags __rte_unused)
600 {
601         const struct rte_flow_item_vxlan *spec = item->spec;
602         const struct rte_flow_item_vxlan *mask = item->mask;
603         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
604         struct ibv_flow_spec_tunnel vxlan = {
605                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
606                 .size = size,
607         };
608         union vni {
609                 uint32_t vlan_id;
610                 uint8_t vni[4];
611         } id = { .vlan_id = 0, };
612
613         if (!mask)
614                 mask = &rte_flow_item_vxlan_mask;
615         if (spec) {
616                 memcpy(&id.vni[1], spec->vni, 3);
617                 vxlan.val.tunnel_id = id.vlan_id;
618                 memcpy(&id.vni[1], mask->vni, 3);
619                 vxlan.mask.tunnel_id = id.vlan_id;
620                 /* Remove unwanted bits from values. */
621                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
622         }
623         flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
624 }
625
626 /**
627  * Convert the @p item into a Verbs specification. This function assumes that
628  * the input is valid and that there is space to insert the requested item
629  * into the flow.
630  *
631  * @param[in, out] dev_flow
632  *   Pointer to dev_flow structure.
633  * @param[in] item
634  *   Item specification.
635  * @param[in] item_flags
636  *   Parsed item flags.
637  */
638 static void
639 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
640                                     const struct rte_flow_item *item,
641                                     uint64_t item_flags __rte_unused)
642 {
643         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
644         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
645         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
646         struct ibv_flow_spec_tunnel vxlan_gpe = {
647                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
648                 .size = size,
649         };
650         union vni {
651                 uint32_t vlan_id;
652                 uint8_t vni[4];
653         } id = { .vlan_id = 0, };
654
655         if (!mask)
656                 mask = &rte_flow_item_vxlan_gpe_mask;
657         if (spec) {
658                 memcpy(&id.vni[1], spec->vni, 3);
659                 vxlan_gpe.val.tunnel_id = id.vlan_id;
660                 memcpy(&id.vni[1], mask->vni, 3);
661                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
662                 /* Remove unwanted bits from values. */
663                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
664         }
665         flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
666 }
667
668 /**
669  * Update the protocol in Verbs IPv4/IPv6 spec.
670  *
671  * @param[in, out] attr
672  *   Pointer to Verbs attributes structure.
673  * @param[in] search
674  *   Specification type to search in order to update the IP protocol.
675  * @param[in] protocol
676  *   Protocol value to set if none is present in the specification.
677  */
678 static void
679 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
680                                        enum ibv_flow_spec_type search,
681                                        uint8_t protocol)
682 {
683         unsigned int i;
684         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
685                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
686
687         if (!attr)
688                 return;
689         for (i = 0; i != attr->num_of_specs; ++i) {
690                 if (hdr->type == search) {
691                         union {
692                                 struct ibv_flow_spec_ipv4_ext *ipv4;
693                                 struct ibv_flow_spec_ipv6 *ipv6;
694                         } ip;
695
696                         switch (search) {
697                         case IBV_FLOW_SPEC_IPV4_EXT:
698                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
699                                 if (!ip.ipv4->val.proto) {
700                                         ip.ipv4->val.proto = protocol;
701                                         ip.ipv4->mask.proto = 0xff;
702                                 }
703                                 break;
704                         case IBV_FLOW_SPEC_IPV6:
705                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
706                                 if (!ip.ipv6->val.next_hdr) {
707                                         ip.ipv6->val.next_hdr = protocol;
708                                         ip.ipv6->mask.next_hdr = 0xff;
709                                 }
710                                 break;
711                         default:
712                                 break;
713                         }
714                         break;
715                 }
716                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
717         }
718 }
719
720 /**
721  * Convert the @p item into a Verbs specification. This function assumes that
722  * the input is valid and that there is space to insert the requested item
723  * into the flow.
724  *
725  * @param[in, out] dev_flow
726  *   Pointer to dev_flow structure.
727  * @param[in] item
728  *   Item specification.
729  * @param[in] item_flags
730  *   Parsed item flags.
731  */
732 static void
733 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
734                               const struct rte_flow_item *item __rte_unused,
735                               uint64_t item_flags)
736 {
737         struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
738 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
739         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
740         struct ibv_flow_spec_tunnel tunnel = {
741                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
742                 .size = size,
743         };
744 #else
745         const struct rte_flow_item_gre *spec = item->spec;
746         const struct rte_flow_item_gre *mask = item->mask;
747         unsigned int size = sizeof(struct ibv_flow_spec_gre);
748         struct ibv_flow_spec_gre tunnel = {
749                 .type = IBV_FLOW_SPEC_GRE,
750                 .size = size,
751         };
752
753         if (!mask)
754                 mask = &rte_flow_item_gre_mask;
755         if (spec) {
756                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
757                 tunnel.val.protocol = spec->protocol;
758                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
759                 tunnel.mask.protocol = mask->protocol;
760                 /* Remove unwanted bits from values. */
761                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
762                 tunnel.val.protocol &= tunnel.mask.protocol;
763                 tunnel.val.key &= tunnel.mask.key;
764         }
765 #endif
766         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
767                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
768                                                        IBV_FLOW_SPEC_IPV4_EXT,
769                                                        IPPROTO_GRE);
770         else
771                 flow_verbs_item_gre_ip_protocol_update(verbs->attr,
772                                                        IBV_FLOW_SPEC_IPV6,
773                                                        IPPROTO_GRE);
774         flow_verbs_spec_add(verbs, &tunnel, size);
775 }
776
777 /**
778  * Convert the @p action into a Verbs specification. This function assumes that
779  * the input is valid and that there is space to insert the requested action
780  * into the flow. This function also return the action that was added.
781  *
782  * @param[in, out] dev_flow
783  *   Pointer to dev_flow structure.
784  * @param[in] item
785  *   Item specification.
786  * @param[in] item_flags
787  *   Parsed item flags.
788  */
789 static void
790 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
791                                const struct rte_flow_item *item __rte_unused,
792                                uint64_t item_flags __rte_unused)
793 {
794 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
795         const struct rte_flow_item_mpls *spec = item->spec;
796         const struct rte_flow_item_mpls *mask = item->mask;
797         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
798         struct ibv_flow_spec_mpls mpls = {
799                 .type = IBV_FLOW_SPEC_MPLS,
800                 .size = size,
801         };
802
803         if (!mask)
804                 mask = &rte_flow_item_mpls_mask;
805         if (spec) {
806                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
807                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
808                 /* Remove unwanted bits from values.  */
809                 mpls.val.label &= mpls.mask.label;
810         }
811         flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
812 #endif
813 }
814
815 /**
816  * Convert the @p action into a Verbs specification. This function assumes that
817  * the input is valid and that there is space to insert the requested action
818  * into the flow.
819  *
820  * @param[in] dev_flow
821  *   Pointer to mlx5_flow.
822  * @param[in] action
823  *   Action configuration.
824  */
825 static void
826 flow_verbs_translate_action_drop
827         (struct mlx5_flow *dev_flow,
828          const struct rte_flow_action *action __rte_unused)
829 {
830         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
831         struct ibv_flow_spec_action_drop drop = {
832                         .type = IBV_FLOW_SPEC_ACTION_DROP,
833                         .size = size,
834         };
835
836         flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
837 }
838
839 /**
840  * Convert the @p action into a Verbs specification. This function assumes that
841  * the input is valid and that there is space to insert the requested action
842  * into the flow.
843  *
844  * @param[in] dev_flow
845  *   Pointer to mlx5_flow.
846  * @param[in] action
847  *   Action configuration.
848  */
849 static void
850 flow_verbs_translate_action_queue(struct mlx5_flow *dev_flow,
851                                   const struct rte_flow_action *action)
852 {
853         const struct rte_flow_action_queue *queue = action->conf;
854         struct rte_flow *flow = dev_flow->flow;
855
856         if (flow->queue)
857                 (*flow->queue)[0] = queue->index;
858         flow->rss.queue_num = 1;
859 }
860
861 /**
862  * Convert the @p action into a Verbs specification. This function assumes that
863  * the input is valid and that there is space to insert the requested action
864  * into the flow.
865  *
866  * @param[in] action
867  *   Action configuration.
868  * @param[in, out] action_flags
869  *   Pointer to the detected actions.
870  * @param[in] dev_flow
871  *   Pointer to mlx5_flow.
872  */
873 static void
874 flow_verbs_translate_action_rss(struct mlx5_flow *dev_flow,
875                                 const struct rte_flow_action *action)
876 {
877         const struct rte_flow_action_rss *rss = action->conf;
878         const uint8_t *rss_key;
879         struct rte_flow *flow = dev_flow->flow;
880
881         if (flow->queue)
882                 memcpy((*flow->queue), rss->queue,
883                        rss->queue_num * sizeof(uint16_t));
884         flow->rss.queue_num = rss->queue_num;
885         /* NULL RSS key indicates default RSS key. */
886         rss_key = !rss->key ? rss_hash_default_key : rss->key;
887         memcpy(flow->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
888         /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
889         flow->rss.types = !rss->types ? ETH_RSS_IP : rss->types;
890         flow->rss.level = rss->level;
891 }
892
893 /**
894  * Convert the @p action into a Verbs specification. This function assumes that
895  * the input is valid and that there is space to insert the requested action
896  * into the flow.
897  *
898  * @param[in] dev_flow
899  *   Pointer to mlx5_flow.
900  * @param[in] action
901  *   Action configuration.
902  */
903 static void
904 flow_verbs_translate_action_flag
905         (struct mlx5_flow *dev_flow,
906          const struct rte_flow_action *action __rte_unused)
907 {
908         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
909         struct ibv_flow_spec_action_tag tag = {
910                 .type = IBV_FLOW_SPEC_ACTION_TAG,
911                 .size = size,
912                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
913         };
914
915         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
916 }
917
918 /**
919  * Convert the @p action into a Verbs specification. This function assumes that
920  * the input is valid and that there is space to insert the requested action
921  * into the flow.
922  *
923  * @param[in] dev_flow
924  *   Pointer to mlx5_flow.
925  * @param[in] action
926  *   Action configuration.
927  */
928 static void
929 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
930                                  const struct rte_flow_action *action)
931 {
932         const struct rte_flow_action_mark *mark = action->conf;
933         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
934         struct ibv_flow_spec_action_tag tag = {
935                 .type = IBV_FLOW_SPEC_ACTION_TAG,
936                 .size = size,
937                 .tag_id = mlx5_flow_mark_set(mark->id),
938         };
939
940         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
941 }
942
943 /**
944  * Convert the @p action into a Verbs specification. This function assumes that
945  * the input is valid and that there is space to insert the requested action
946  * into the flow.
947  *
948  * @param[in] dev
949  *   Pointer to the Ethernet device structure.
950  * @param[in] action
951  *   Action configuration.
952  * @param[in] dev_flow
953  *   Pointer to mlx5_flow.
954  * @param[out] error
955  *   Pointer to error structure.
956  *
957  * @return
958  *   0 On success else a negative errno value is returned and rte_errno is set.
959  */
960 static int
961 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
962                                   const struct rte_flow_action *action,
963                                   struct rte_eth_dev *dev,
964                                   struct rte_flow_error *error)
965 {
966         const struct rte_flow_action_count *count = action->conf;
967         struct rte_flow *flow = dev_flow->flow;
968 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
969         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
970         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
971         struct ibv_flow_spec_counter_action counter = {
972                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
973                 .size = size,
974         };
975 #endif
976
977         if (!flow->counter) {
978                 flow->counter = flow_verbs_counter_new(dev, count->shared,
979                                                        count->id);
980                 if (!flow->counter)
981                         return rte_flow_error_set(error, rte_errno,
982                                                   RTE_FLOW_ERROR_TYPE_ACTION,
983                                                   action,
984                                                   "cannot get counter"
985                                                   " context.");
986         }
987 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
988         counter.counter_set_handle = flow->counter->cs->handle;
989         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
990 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
991         counter.counters = flow->counter->cs;
992         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
993 #endif
994         return 0;
995 }
996
997 /**
998  * Internal validation function. For validating both actions and items.
999  *
1000  * @param[in] dev
1001  *   Pointer to the Ethernet device structure.
1002  * @param[in] attr
1003  *   Pointer to the flow attributes.
1004  * @param[in] items
1005  *   Pointer to the list of items.
1006  * @param[in] actions
1007  *   Pointer to the list of actions.
1008  * @param[out] error
1009  *   Pointer to the error structure.
1010  *
1011  * @return
1012  *   0 on success, a negative errno value otherwise and rte_errno is set.
1013  */
1014 static int
1015 flow_verbs_validate(struct rte_eth_dev *dev,
1016                     const struct rte_flow_attr *attr,
1017                     const struct rte_flow_item items[],
1018                     const struct rte_flow_action actions[],
1019                     struct rte_flow_error *error)
1020 {
1021         int ret;
1022         uint64_t action_flags = 0;
1023         uint64_t item_flags = 0;
1024         uint64_t last_item = 0;
1025         uint8_t next_protocol = 0xff;
1026
1027         if (items == NULL)
1028                 return -1;
1029         ret = mlx5_flow_validate_attributes(dev, attr, error);
1030         if (ret < 0)
1031                 return ret;
1032         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1033                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1034                 int ret = 0;
1035
1036                 switch (items->type) {
1037                 case RTE_FLOW_ITEM_TYPE_VOID:
1038                         break;
1039                 case RTE_FLOW_ITEM_TYPE_ETH:
1040                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1041                                                           error);
1042                         if (ret < 0)
1043                                 return ret;
1044                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1045                                              MLX5_FLOW_LAYER_OUTER_L2;
1046                         break;
1047                 case RTE_FLOW_ITEM_TYPE_VLAN:
1048                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1049                                                            error);
1050                         if (ret < 0)
1051                                 return ret;
1052                         last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1053                                               MLX5_FLOW_LAYER_INNER_VLAN) :
1054                                              (MLX5_FLOW_LAYER_OUTER_L2 |
1055                                               MLX5_FLOW_LAYER_OUTER_VLAN);
1056                         break;
1057                 case RTE_FLOW_ITEM_TYPE_IPV4:
1058                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1059                                                            NULL, error);
1060                         if (ret < 0)
1061                                 return ret;
1062                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1063                                              MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1064                         if (items->mask != NULL &&
1065                             ((const struct rte_flow_item_ipv4 *)
1066                              items->mask)->hdr.next_proto_id) {
1067                                 next_protocol =
1068                                         ((const struct rte_flow_item_ipv4 *)
1069                                          (items->spec))->hdr.next_proto_id;
1070                                 next_protocol &=
1071                                         ((const struct rte_flow_item_ipv4 *)
1072                                          (items->mask))->hdr.next_proto_id;
1073                         } else {
1074                                 /* Reset for inner layer. */
1075                                 next_protocol = 0xff;
1076                         }
1077                         break;
1078                 case RTE_FLOW_ITEM_TYPE_IPV6:
1079                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1080                                                            NULL, error);
1081                         if (ret < 0)
1082                                 return ret;
1083                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1084                                              MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1085                         if (items->mask != NULL &&
1086                             ((const struct rte_flow_item_ipv6 *)
1087                              items->mask)->hdr.proto) {
1088                                 next_protocol =
1089                                         ((const struct rte_flow_item_ipv6 *)
1090                                          items->spec)->hdr.proto;
1091                                 next_protocol &=
1092                                         ((const struct rte_flow_item_ipv6 *)
1093                                          items->mask)->hdr.proto;
1094                         } else {
1095                                 /* Reset for inner layer. */
1096                                 next_protocol = 0xff;
1097                         }
1098                         break;
1099                 case RTE_FLOW_ITEM_TYPE_UDP:
1100                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1101                                                           next_protocol,
1102                                                           error);
1103                         if (ret < 0)
1104                                 return ret;
1105                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1106                                              MLX5_FLOW_LAYER_OUTER_L4_UDP;
1107                         break;
1108                 case RTE_FLOW_ITEM_TYPE_TCP:
1109                         ret = mlx5_flow_validate_item_tcp
1110                                                 (items, item_flags,
1111                                                  next_protocol,
1112                                                  &rte_flow_item_tcp_mask,
1113                                                  error);
1114                         if (ret < 0)
1115                                 return ret;
1116                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1117                                              MLX5_FLOW_LAYER_OUTER_L4_TCP;
1118                         break;
1119                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1120                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1121                                                             error);
1122                         if (ret < 0)
1123                                 return ret;
1124                         last_item = MLX5_FLOW_LAYER_VXLAN;
1125                         break;
1126                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1127                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
1128                                                                 item_flags,
1129                                                                 dev, error);
1130                         if (ret < 0)
1131                                 return ret;
1132                         last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1133                         break;
1134                 case RTE_FLOW_ITEM_TYPE_GRE:
1135                         ret = mlx5_flow_validate_item_gre(items, item_flags,
1136                                                           next_protocol, error);
1137                         if (ret < 0)
1138                                 return ret;
1139                         last_item = MLX5_FLOW_LAYER_GRE;
1140                         break;
1141                 case RTE_FLOW_ITEM_TYPE_MPLS:
1142                         ret = mlx5_flow_validate_item_mpls(dev, items,
1143                                                            item_flags,
1144                                                            last_item, error);
1145                         if (ret < 0)
1146                                 return ret;
1147                         last_item = MLX5_FLOW_LAYER_MPLS;
1148                         break;
1149                 default:
1150                         return rte_flow_error_set(error, ENOTSUP,
1151                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1152                                                   NULL, "item not supported");
1153                 }
1154                 item_flags |= last_item;
1155         }
1156         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1157                 switch (actions->type) {
1158                 case RTE_FLOW_ACTION_TYPE_VOID:
1159                         break;
1160                 case RTE_FLOW_ACTION_TYPE_FLAG:
1161                         ret = mlx5_flow_validate_action_flag(action_flags,
1162                                                              attr,
1163                                                              error);
1164                         if (ret < 0)
1165                                 return ret;
1166                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1167                         break;
1168                 case RTE_FLOW_ACTION_TYPE_MARK:
1169                         ret = mlx5_flow_validate_action_mark(actions,
1170                                                              action_flags,
1171                                                              attr,
1172                                                              error);
1173                         if (ret < 0)
1174                                 return ret;
1175                         action_flags |= MLX5_FLOW_ACTION_MARK;
1176                         break;
1177                 case RTE_FLOW_ACTION_TYPE_DROP:
1178                         ret = mlx5_flow_validate_action_drop(action_flags,
1179                                                              attr,
1180                                                              error);
1181                         if (ret < 0)
1182                                 return ret;
1183                         action_flags |= MLX5_FLOW_ACTION_DROP;
1184                         break;
1185                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1186                         ret = mlx5_flow_validate_action_queue(actions,
1187                                                               action_flags, dev,
1188                                                               attr,
1189                                                               error);
1190                         if (ret < 0)
1191                                 return ret;
1192                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1193                         break;
1194                 case RTE_FLOW_ACTION_TYPE_RSS:
1195                         ret = mlx5_flow_validate_action_rss(actions,
1196                                                             action_flags, dev,
1197                                                             attr, item_flags,
1198                                                             error);
1199                         if (ret < 0)
1200                                 return ret;
1201                         action_flags |= MLX5_FLOW_ACTION_RSS;
1202                         break;
1203                 case RTE_FLOW_ACTION_TYPE_COUNT:
1204                         ret = mlx5_flow_validate_action_count(dev, attr, error);
1205                         if (ret < 0)
1206                                 return ret;
1207                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1208                         break;
1209                 default:
1210                         return rte_flow_error_set(error, ENOTSUP,
1211                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1212                                                   actions,
1213                                                   "action not supported");
1214                 }
1215         }
1216         if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1217                 return rte_flow_error_set(error, EINVAL,
1218                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1219                                           "no fate action is found");
1220         return 0;
1221 }
1222
1223 /**
1224  * Calculate the required bytes that are needed for the action part of the verbs
1225  * flow.
1226  *
1227  * @param[in] actions
1228  *   Pointer to the list of actions.
1229  *
1230  * @return
1231  *   The size of the memory needed for all actions.
1232  */
1233 static int
1234 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1235 {
1236         int size = 0;
1237
1238         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1239                 switch (actions->type) {
1240                 case RTE_FLOW_ACTION_TYPE_VOID:
1241                         break;
1242                 case RTE_FLOW_ACTION_TYPE_FLAG:
1243                         size += sizeof(struct ibv_flow_spec_action_tag);
1244                         break;
1245                 case RTE_FLOW_ACTION_TYPE_MARK:
1246                         size += sizeof(struct ibv_flow_spec_action_tag);
1247                         break;
1248                 case RTE_FLOW_ACTION_TYPE_DROP:
1249                         size += sizeof(struct ibv_flow_spec_action_drop);
1250                         break;
1251                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1252                         break;
1253                 case RTE_FLOW_ACTION_TYPE_RSS:
1254                         break;
1255                 case RTE_FLOW_ACTION_TYPE_COUNT:
1256 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1257         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1258                         size += sizeof(struct ibv_flow_spec_counter_action);
1259 #endif
1260                         break;
1261                 default:
1262                         break;
1263                 }
1264         }
1265         return size;
1266 }
1267
1268 /**
1269  * Calculate the required bytes that are needed for the item part of the verbs
1270  * flow.
1271  *
1272  * @param[in] items
1273  *   Pointer to the list of items.
1274  *
1275  * @return
1276  *   The size of the memory needed for all items.
1277  */
1278 static int
1279 flow_verbs_get_items_size(const struct rte_flow_item items[])
1280 {
1281         int size = 0;
1282
1283         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1284                 switch (items->type) {
1285                 case RTE_FLOW_ITEM_TYPE_VOID:
1286                         break;
1287                 case RTE_FLOW_ITEM_TYPE_ETH:
1288                         size += sizeof(struct ibv_flow_spec_eth);
1289                         break;
1290                 case RTE_FLOW_ITEM_TYPE_VLAN:
1291                         size += sizeof(struct ibv_flow_spec_eth);
1292                         break;
1293                 case RTE_FLOW_ITEM_TYPE_IPV4:
1294                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
1295                         break;
1296                 case RTE_FLOW_ITEM_TYPE_IPV6:
1297                         size += sizeof(struct ibv_flow_spec_ipv6);
1298                         break;
1299                 case RTE_FLOW_ITEM_TYPE_UDP:
1300                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1301                         break;
1302                 case RTE_FLOW_ITEM_TYPE_TCP:
1303                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1304                         break;
1305                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1306                         size += sizeof(struct ibv_flow_spec_tunnel);
1307                         break;
1308                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1309                         size += sizeof(struct ibv_flow_spec_tunnel);
1310                         break;
1311 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1312                 case RTE_FLOW_ITEM_TYPE_GRE:
1313                         size += sizeof(struct ibv_flow_spec_gre);
1314                         break;
1315                 case RTE_FLOW_ITEM_TYPE_MPLS:
1316                         size += sizeof(struct ibv_flow_spec_mpls);
1317                         break;
1318 #else
1319                 case RTE_FLOW_ITEM_TYPE_GRE:
1320                         size += sizeof(struct ibv_flow_spec_tunnel);
1321                         break;
1322 #endif
1323                 default:
1324                         break;
1325                 }
1326         }
1327         return size;
1328 }
1329
1330 /**
1331  * Internal preparation function. Allocate mlx5_flow with the required size.
1332  * The required size is calculate based on the actions and items. This function
1333  * also returns the detected actions and items for later use.
1334  *
1335  * @param[in] attr
1336  *   Pointer to the flow attributes.
1337  * @param[in] items
1338  *   Pointer to the list of items.
1339  * @param[in] actions
1340  *   Pointer to the list of actions.
1341  * @param[out] error
1342  *   Pointer to the error structure.
1343  *
1344  * @return
1345  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1346  *   is set.
1347  */
1348 static struct mlx5_flow *
1349 flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
1350                    const struct rte_flow_item items[],
1351                    const struct rte_flow_action actions[],
1352                    struct rte_flow_error *error)
1353 {
1354         uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
1355         struct mlx5_flow *flow;
1356
1357         size += flow_verbs_get_actions_size(actions);
1358         size += flow_verbs_get_items_size(items);
1359         flow = rte_calloc(__func__, 1, size, 0);
1360         if (!flow) {
1361                 rte_flow_error_set(error, ENOMEM,
1362                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1363                                    "not enough memory to create flow");
1364                 return NULL;
1365         }
1366         flow->verbs.attr = (void *)(flow + 1);
1367         flow->verbs.specs =
1368                 (uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
1369         return flow;
1370 }
1371
1372 /**
1373  * Fill the flow with verb spec.
1374  *
1375  * @param[in] dev
1376  *   Pointer to Ethernet device.
1377  * @param[in, out] dev_flow
1378  *   Pointer to the mlx5 flow.
1379  * @param[in] attr
1380  *   Pointer to the flow attributes.
1381  * @param[in] items
1382  *   Pointer to the list of items.
1383  * @param[in] actions
1384  *   Pointer to the list of actions.
1385  * @param[out] error
1386  *   Pointer to the error structure.
1387  *
1388  * @return
1389  *   0 on success, else a negative errno value otherwise and rte_errno is set.
1390  */
1391 static int
1392 flow_verbs_translate(struct rte_eth_dev *dev,
1393                      struct mlx5_flow *dev_flow,
1394                      const struct rte_flow_attr *attr,
1395                      const struct rte_flow_item items[],
1396                      const struct rte_flow_action actions[],
1397                      struct rte_flow_error *error)
1398 {
1399         struct rte_flow *flow = dev_flow->flow;
1400         uint64_t item_flags = 0;
1401         uint64_t action_flags = 0;
1402         uint64_t priority = attr->priority;
1403         uint32_t subpriority = 0;
1404         struct mlx5_priv *priv = dev->data->dev_private;
1405
1406         if (priority == MLX5_FLOW_PRIO_RSVD)
1407                 priority = priv->config.flow_prio - 1;
1408         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1409                 int ret;
1410
1411                 switch (actions->type) {
1412                 case RTE_FLOW_ACTION_TYPE_VOID:
1413                         break;
1414                 case RTE_FLOW_ACTION_TYPE_FLAG:
1415                         flow_verbs_translate_action_flag(dev_flow, actions);
1416                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1417                         break;
1418                 case RTE_FLOW_ACTION_TYPE_MARK:
1419                         flow_verbs_translate_action_mark(dev_flow, actions);
1420                         action_flags |= MLX5_FLOW_ACTION_MARK;
1421                         break;
1422                 case RTE_FLOW_ACTION_TYPE_DROP:
1423                         flow_verbs_translate_action_drop(dev_flow, actions);
1424                         action_flags |= MLX5_FLOW_ACTION_DROP;
1425                         break;
1426                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1427                         flow_verbs_translate_action_queue(dev_flow, actions);
1428                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1429                         break;
1430                 case RTE_FLOW_ACTION_TYPE_RSS:
1431                         flow_verbs_translate_action_rss(dev_flow, actions);
1432                         action_flags |= MLX5_FLOW_ACTION_RSS;
1433                         break;
1434                 case RTE_FLOW_ACTION_TYPE_COUNT:
1435                         ret = flow_verbs_translate_action_count(dev_flow,
1436                                                                 actions,
1437                                                                 dev, error);
1438                         if (ret < 0)
1439                                 return ret;
1440                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1441                         break;
1442                 default:
1443                         return rte_flow_error_set(error, ENOTSUP,
1444                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1445                                                   actions,
1446                                                   "action not supported");
1447                 }
1448         }
1449         flow->actions = action_flags;
1450         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1451                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1452
1453                 switch (items->type) {
1454                 case RTE_FLOW_ITEM_TYPE_VOID:
1455                         break;
1456                 case RTE_FLOW_ITEM_TYPE_ETH:
1457                         flow_verbs_translate_item_eth(dev_flow, items,
1458                                                       item_flags);
1459                         subpriority = MLX5_PRIORITY_MAP_L2;
1460                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1461                                                MLX5_FLOW_LAYER_OUTER_L2;
1462                         break;
1463                 case RTE_FLOW_ITEM_TYPE_VLAN:
1464                         flow_verbs_translate_item_vlan(dev_flow, items,
1465                                                        item_flags);
1466                         subpriority = MLX5_PRIORITY_MAP_L2;
1467                         item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1468                                                 MLX5_FLOW_LAYER_INNER_VLAN) :
1469                                                (MLX5_FLOW_LAYER_OUTER_L2 |
1470                                                 MLX5_FLOW_LAYER_OUTER_VLAN);
1471                         break;
1472                 case RTE_FLOW_ITEM_TYPE_IPV4:
1473                         flow_verbs_translate_item_ipv4(dev_flow, items,
1474                                                        item_flags);
1475                         subpriority = MLX5_PRIORITY_MAP_L3;
1476                         dev_flow->verbs.hash_fields |=
1477                                 mlx5_flow_hashfields_adjust
1478                                         (dev_flow, tunnel,
1479                                          MLX5_IPV4_LAYER_TYPES,
1480                                          MLX5_IPV4_IBV_RX_HASH);
1481                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1482                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1483                         break;
1484                 case RTE_FLOW_ITEM_TYPE_IPV6:
1485                         flow_verbs_translate_item_ipv6(dev_flow, items,
1486                                                        item_flags);
1487                         subpriority = MLX5_PRIORITY_MAP_L3;
1488                         dev_flow->verbs.hash_fields |=
1489                                 mlx5_flow_hashfields_adjust
1490                                         (dev_flow, tunnel,
1491                                          MLX5_IPV6_LAYER_TYPES,
1492                                          MLX5_IPV6_IBV_RX_HASH);
1493                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1494                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1495                         break;
1496                 case RTE_FLOW_ITEM_TYPE_TCP:
1497                         flow_verbs_translate_item_tcp(dev_flow, items,
1498                                                       item_flags);
1499                         subpriority = MLX5_PRIORITY_MAP_L4;
1500                         dev_flow->verbs.hash_fields |=
1501                                 mlx5_flow_hashfields_adjust
1502                                         (dev_flow, tunnel, ETH_RSS_TCP,
1503                                          (IBV_RX_HASH_SRC_PORT_TCP |
1504                                           IBV_RX_HASH_DST_PORT_TCP));
1505                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1506                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1507                         break;
1508                 case RTE_FLOW_ITEM_TYPE_UDP:
1509                         flow_verbs_translate_item_udp(dev_flow, items,
1510                                                       item_flags);
1511                         subpriority = MLX5_PRIORITY_MAP_L4;
1512                         dev_flow->verbs.hash_fields |=
1513                                 mlx5_flow_hashfields_adjust
1514                                         (dev_flow, tunnel, ETH_RSS_UDP,
1515                                          (IBV_RX_HASH_SRC_PORT_UDP |
1516                                           IBV_RX_HASH_DST_PORT_UDP));
1517                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1518                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1519                         break;
1520                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1521                         flow_verbs_translate_item_vxlan(dev_flow, items,
1522                                                         item_flags);
1523                         subpriority = MLX5_PRIORITY_MAP_L2;
1524                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1525                         break;
1526                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1527                         flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1528                                                             item_flags);
1529                         subpriority = MLX5_PRIORITY_MAP_L2;
1530                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1531                         break;
1532                 case RTE_FLOW_ITEM_TYPE_GRE:
1533                         flow_verbs_translate_item_gre(dev_flow, items,
1534                                                       item_flags);
1535                         subpriority = MLX5_PRIORITY_MAP_L2;
1536                         item_flags |= MLX5_FLOW_LAYER_GRE;
1537                         break;
1538                 case RTE_FLOW_ITEM_TYPE_MPLS:
1539                         flow_verbs_translate_item_mpls(dev_flow, items,
1540                                                        item_flags);
1541                         subpriority = MLX5_PRIORITY_MAP_L2;
1542                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1543                         break;
1544                 default:
1545                         return rte_flow_error_set(error, ENOTSUP,
1546                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1547                                                   NULL,
1548                                                   "item not supported");
1549                 }
1550         }
1551         dev_flow->layers = item_flags;
1552         dev_flow->verbs.attr->priority =
1553                 mlx5_flow_adjust_priority(dev, priority, subpriority);
1554         dev_flow->verbs.attr->port = (uint8_t)priv->ibv_port;
1555         return 0;
1556 }
1557
1558 /**
1559  * Remove the flow from the NIC but keeps it in memory.
1560  *
1561  * @param[in] dev
1562  *   Pointer to the Ethernet device structure.
1563  * @param[in, out] flow
1564  *   Pointer to flow structure.
1565  */
1566 static void
1567 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1568 {
1569         struct mlx5_flow_verbs *verbs;
1570         struct mlx5_flow *dev_flow;
1571
1572         if (!flow)
1573                 return;
1574         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1575                 verbs = &dev_flow->verbs;
1576                 if (verbs->flow) {
1577                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1578                         verbs->flow = NULL;
1579                 }
1580                 if (verbs->hrxq) {
1581                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1582                                 mlx5_hrxq_drop_release(dev);
1583                         else
1584                                 mlx5_hrxq_release(dev, verbs->hrxq);
1585                         verbs->hrxq = NULL;
1586                 }
1587         }
1588 }
1589
1590 /**
1591  * Remove the flow from the NIC and the memory.
1592  *
1593  * @param[in] dev
1594  *   Pointer to the Ethernet device structure.
1595  * @param[in, out] flow
1596  *   Pointer to flow structure.
1597  */
1598 static void
1599 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1600 {
1601         struct mlx5_flow *dev_flow;
1602
1603         if (!flow)
1604                 return;
1605         flow_verbs_remove(dev, flow);
1606         while (!LIST_EMPTY(&flow->dev_flows)) {
1607                 dev_flow = LIST_FIRST(&flow->dev_flows);
1608                 LIST_REMOVE(dev_flow, next);
1609                 rte_free(dev_flow);
1610         }
1611         if (flow->counter) {
1612                 flow_verbs_counter_release(flow->counter);
1613                 flow->counter = NULL;
1614         }
1615 }
1616
1617 /**
1618  * Apply the flow to the NIC.
1619  *
1620  * @param[in] dev
1621  *   Pointer to the Ethernet device structure.
1622  * @param[in, out] flow
1623  *   Pointer to flow structure.
1624  * @param[out] error
1625  *   Pointer to error structure.
1626  *
1627  * @return
1628  *   0 on success, a negative errno value otherwise and rte_errno is set.
1629  */
1630 static int
1631 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1632                  struct rte_flow_error *error)
1633 {
1634         struct mlx5_flow_verbs *verbs;
1635         struct mlx5_flow *dev_flow;
1636         int err;
1637
1638         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1639                 verbs = &dev_flow->verbs;
1640                 if (flow->actions & MLX5_FLOW_ACTION_DROP) {
1641                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
1642                         if (!verbs->hrxq) {
1643                                 rte_flow_error_set
1644                                         (error, errno,
1645                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1646                                          "cannot get drop hash queue");
1647                                 goto error;
1648                         }
1649                 } else {
1650                         struct mlx5_hrxq *hrxq;
1651
1652                         hrxq = mlx5_hrxq_get(dev, flow->key,
1653                                              MLX5_RSS_HASH_KEY_LEN,
1654                                              verbs->hash_fields,
1655                                              (*flow->queue),
1656                                              flow->rss.queue_num);
1657                         if (!hrxq)
1658                                 hrxq = mlx5_hrxq_new(dev, flow->key,
1659                                                      MLX5_RSS_HASH_KEY_LEN,
1660                                                      verbs->hash_fields,
1661                                                      (*flow->queue),
1662                                                      flow->rss.queue_num,
1663                                                      !!(dev_flow->layers &
1664                                                       MLX5_FLOW_LAYER_TUNNEL));
1665                         if (!hrxq) {
1666                                 rte_flow_error_set
1667                                         (error, rte_errno,
1668                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1669                                          "cannot get hash queue");
1670                                 goto error;
1671                         }
1672                         verbs->hrxq = hrxq;
1673                 }
1674                 verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
1675                                                      verbs->attr);
1676                 if (!verbs->flow) {
1677                         rte_flow_error_set(error, errno,
1678                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1679                                            NULL,
1680                                            "hardware refuses to create flow");
1681                         goto error;
1682                 }
1683         }
1684         return 0;
1685 error:
1686         err = rte_errno; /* Save rte_errno before cleanup. */
1687         LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
1688                 verbs = &dev_flow->verbs;
1689                 if (verbs->hrxq) {
1690                         if (flow->actions & MLX5_FLOW_ACTION_DROP)
1691                                 mlx5_hrxq_drop_release(dev);
1692                         else
1693                                 mlx5_hrxq_release(dev, verbs->hrxq);
1694                         verbs->hrxq = NULL;
1695                 }
1696         }
1697         rte_errno = err; /* Restore rte_errno. */
1698         return -rte_errno;
1699 }
1700
1701 /**
1702  * Query a flow.
1703  *
1704  * @see rte_flow_query()
1705  * @see rte_flow_ops
1706  */
1707 static int
1708 flow_verbs_query(struct rte_eth_dev *dev,
1709                  struct rte_flow *flow,
1710                  const struct rte_flow_action *actions,
1711                  void *data,
1712                  struct rte_flow_error *error)
1713 {
1714         int ret = -EINVAL;
1715
1716         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1717                 switch (actions->type) {
1718                 case RTE_FLOW_ACTION_TYPE_VOID:
1719                         break;
1720                 case RTE_FLOW_ACTION_TYPE_COUNT:
1721                         ret = flow_verbs_counter_query(dev, flow, data, error);
1722                         break;
1723                 default:
1724                         return rte_flow_error_set(error, ENOTSUP,
1725                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1726                                                   actions,
1727                                                   "action not supported");
1728                 }
1729         }
1730         return ret;
1731 }
1732
1733 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1734         .validate = flow_verbs_validate,
1735         .prepare = flow_verbs_prepare,
1736         .translate = flow_verbs_translate,
1737         .apply = flow_verbs_apply,
1738         .remove = flow_verbs_remove,
1739         .destroy = flow_verbs_destroy,
1740         .query = flow_verbs_query,
1741 };