net/mlx5: fix packet length assert in MPRQ
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include <mlx5_glue.h>
30 #include <mlx5_prm.h>
31
32 #include "mlx5_defs.h"
33 #include "mlx5.h"
34 #include "mlx5_flow.h"
35 #include "mlx5_rxtx.h"
36
37 #define VERBS_SPEC_INNER(item_flags) \
38         (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
39
40 /**
41  * Get Verbs flow counter by index.
42  *
43  * @param[in] dev
44  *   Pointer to the Ethernet device structure.
45  * @param[in] idx
46  *   mlx5 flow counter index in the container.
47  * @param[out] ppool
48  *   mlx5 flow counter pool in the container,
49  *
50  * @return
51  *   A pointer to the counter, NULL otherwise.
52  */
53 static struct mlx5_flow_counter *
54 flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev,
55                               uint32_t idx,
56                               struct mlx5_flow_counter_pool **ppool)
57 {
58         struct mlx5_priv *priv = dev->data->dev_private;
59         struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
60         struct mlx5_flow_counter_pool *pool;
61
62         idx--;
63         pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
64         MLX5_ASSERT(pool);
65         if (ppool)
66                 *ppool = pool;
67         return &pool->counters_raw[idx % MLX5_COUNTERS_PER_POOL];
68 }
69
70 /**
71  * Create Verbs flow counter with Verbs library.
72  *
73  * @param[in] dev
74  *   Pointer to the Ethernet device structure.
75  * @param[in, out] counter
76  *   mlx5 flow counter object, contains the counter id,
77  *   handle of created Verbs flow counter is returned
78  *   in cs field (if counters are supported).
79  *
80  * @return
81  *   0 On success else a negative errno value is returned
82  *   and rte_errno is set.
83  */
84 static int
85 flow_verbs_counter_create(struct rte_eth_dev *dev,
86                           struct mlx5_flow_counter_ext *counter)
87 {
88 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
89         struct mlx5_priv *priv = dev->data->dev_private;
90         struct ibv_context *ctx = priv->sh->ctx;
91         struct ibv_counter_set_init_attr init = {
92                          .counter_set_id = counter->id};
93
94         counter->cs = mlx5_glue->create_counter_set(ctx, &init);
95         if (!counter->cs) {
96                 rte_errno = ENOTSUP;
97                 return -ENOTSUP;
98         }
99         return 0;
100 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
101         struct mlx5_priv *priv = dev->data->dev_private;
102         struct ibv_context *ctx = priv->sh->ctx;
103         struct ibv_counters_init_attr init = {0};
104         struct ibv_counter_attach_attr attach;
105         int ret;
106
107         memset(&attach, 0, sizeof(attach));
108         counter->cs = mlx5_glue->create_counters(ctx, &init);
109         if (!counter->cs) {
110                 rte_errno = ENOTSUP;
111                 return -ENOTSUP;
112         }
113         attach.counter_desc = IBV_COUNTER_PACKETS;
114         attach.index = 0;
115         ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
116         if (!ret) {
117                 attach.counter_desc = IBV_COUNTER_BYTES;
118                 attach.index = 1;
119                 ret = mlx5_glue->attach_counters
120                                         (counter->cs, &attach, NULL);
121         }
122         if (ret) {
123                 claim_zero(mlx5_glue->destroy_counters(counter->cs));
124                 counter->cs = NULL;
125                 rte_errno = ret;
126                 return -ret;
127         }
128         return 0;
129 #else
130         (void)dev;
131         (void)counter;
132         rte_errno = ENOTSUP;
133         return -ENOTSUP;
134 #endif
135 }
136
137 /**
138  * Get a flow counter.
139  *
140  * @param[in] dev
141  *   Pointer to the Ethernet device structure.
142  * @param[in] shared
143  *   Indicate if this counter is shared with other flows.
144  * @param[in] id
145  *   Counter identifier.
146  *
147  * @return
148  *   Index to the counter, 0 otherwise and rte_errno is set.
149  */
150 static uint32_t
151 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
152 {
153         struct mlx5_priv *priv = dev->data->dev_private;
154         struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
155         struct mlx5_flow_counter_pool *pool = NULL;
156         struct mlx5_flow_counter_ext *cnt_ext = NULL;
157         struct mlx5_flow_counter *cnt = NULL;
158         uint32_t n_valid = rte_atomic16_read(&cont->n_valid);
159         uint32_t pool_idx;
160         uint32_t i;
161         int ret;
162
163         if (shared) {
164                 for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) {
165                         pool = cont->pools[pool_idx];
166                         for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
167                                 cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
168                                 if (cnt_ext->shared && cnt_ext->id == id) {
169                                         cnt_ext->ref_cnt++;
170                                         return MLX5_MAKE_CNT_IDX(pool_idx, i);
171                                 }
172                         }
173                 }
174         }
175         for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) {
176                 pool = cont->pools[pool_idx];
177                 if (!pool)
178                         continue;
179                 cnt = TAILQ_FIRST(&pool->counters);
180                 if (cnt)
181                         break;
182         }
183         if (!cnt) {
184                 struct mlx5_flow_counter_pool **pools;
185                 uint32_t size;
186
187                 if (n_valid == cont->n) {
188                         /* Resize the container pool array. */
189                         size = sizeof(struct mlx5_flow_counter_pool *) *
190                                      (n_valid + MLX5_CNT_CONTAINER_RESIZE);
191                         pools = rte_zmalloc(__func__, size, 0);
192                         if (!pools)
193                                 return 0;
194                         if (n_valid) {
195                                 memcpy(pools, cont->pools,
196                                        sizeof(struct mlx5_flow_counter_pool *) *
197                                        n_valid);
198                                 rte_free(cont->pools);
199                         }
200                         cont->pools = pools;
201                         cont->n += MLX5_CNT_CONTAINER_RESIZE;
202                 }
203                 /* Allocate memory for new pool*/
204                 size = sizeof(*pool) + sizeof(*cnt_ext) *
205                        MLX5_COUNTERS_PER_POOL;
206                 pool = rte_calloc(__func__, 1, size, 0);
207                 if (!pool)
208                         return 0;
209                 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
210                         cnt = &pool->counters_raw[i];
211                         TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
212                 }
213                 cnt = &pool->counters_raw[0];
214                 cont->pools[n_valid] = pool;
215                 pool_idx = n_valid;
216                 rte_atomic16_add(&cont->n_valid, 1);
217                 TAILQ_INSERT_HEAD(&cont->pool_list, pool, next);
218         }
219         i = cnt - pool->counters_raw;
220         cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
221         cnt_ext->id = id;
222         cnt_ext->shared = shared;
223         cnt_ext->ref_cnt = 1;
224         cnt->hits = 0;
225         cnt->bytes = 0;
226         /* Create counter with Verbs. */
227         ret = flow_verbs_counter_create(dev, cnt_ext);
228         if (!ret) {
229                 TAILQ_REMOVE(&pool->counters, cnt, next);
230                 return MLX5_MAKE_CNT_IDX(pool_idx, i);
231         }
232         /* Some error occurred in Verbs library. */
233         rte_errno = -ret;
234         return 0;
235 }
236
237 /**
238  * Release a flow counter.
239  *
240  * @param[in] dev
241  *   Pointer to the Ethernet device structure.
242  * @param[in] counter
243  *   Index to the counter handler.
244  */
245 static void
246 flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter)
247 {
248         struct mlx5_flow_counter_pool *pool;
249         struct mlx5_flow_counter *cnt;
250         struct mlx5_flow_counter_ext *cnt_ext;
251
252         cnt = flow_verbs_counter_get_by_idx(dev, counter,
253                                             &pool);
254         cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
255         if (--cnt_ext->ref_cnt == 0) {
256 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
257                 claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs));
258                 cnt_ext->cs = NULL;
259 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
260                 claim_zero(mlx5_glue->destroy_counters(cnt_ext->cs));
261                 cnt_ext->cs = NULL;
262 #endif
263                 TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
264         }
265 }
266
267 /**
268  * Query a flow counter via Verbs library call.
269  *
270  * @see rte_flow_query()
271  * @see rte_flow_ops
272  */
273 static int
274 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
275                          struct rte_flow *flow, void *data,
276                          struct rte_flow_error *error)
277 {
278 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
279         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
280         if (flow->counter) {
281                 struct mlx5_flow_counter_pool *pool;
282                 struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx
283                                                 (dev, flow->counter, &pool);
284                 struct mlx5_flow_counter_ext *cnt_ext = MLX5_CNT_TO_CNT_EXT
285                                                         (pool, cnt);
286                 struct rte_flow_query_count *qc = data;
287                 uint64_t counters[2] = {0, 0};
288 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
289                 struct ibv_query_counter_set_attr query_cs_attr = {
290                         .cs = cnt_ext->cs,
291                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
292                 };
293                 struct ibv_counter_set_data query_out = {
294                         .out = counters,
295                         .outlen = 2 * sizeof(uint64_t),
296                 };
297                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
298                                                        &query_out);
299 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
300                 int err = mlx5_glue->query_counters
301                                (cnt_ext->cs, counters,
302                                 RTE_DIM(counters),
303                                 IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
304 #endif
305                 if (err)
306                         return rte_flow_error_set
307                                 (error, err,
308                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
309                                  NULL,
310                                  "cannot read counter");
311                 qc->hits_set = 1;
312                 qc->bytes_set = 1;
313                 qc->hits = counters[0] - cnt->hits;
314                 qc->bytes = counters[1] - cnt->bytes;
315                 if (qc->reset) {
316                         cnt->hits = counters[0];
317                         cnt->bytes = counters[1];
318                 }
319                 return 0;
320         }
321         return rte_flow_error_set(error, EINVAL,
322                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
323                                   NULL,
324                                   "flow does not have counter");
325 #else
326         (void)flow;
327         (void)data;
328         return rte_flow_error_set(error, ENOTSUP,
329                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
330                                   NULL,
331                                   "counters are not available");
332 #endif
333 }
334
335 /**
336  * Add a verbs item specification into @p verbs.
337  *
338  * @param[out] verbs
339  *   Pointer to verbs structure.
340  * @param[in] src
341  *   Create specification.
342  * @param[in] size
343  *   Size in bytes of the specification to copy.
344  */
345 static void
346 flow_verbs_spec_add(struct mlx5_flow_verbs_workspace *verbs,
347                     void *src, unsigned int size)
348 {
349         void *dst;
350
351         if (!verbs)
352                 return;
353         MLX5_ASSERT(verbs->specs);
354         dst = (void *)(verbs->specs + verbs->size);
355         memcpy(dst, src, size);
356         ++verbs->attr.num_of_specs;
357         verbs->size += size;
358 }
359
360 /**
361  * Convert the @p item into a Verbs specification. This function assumes that
362  * the input is valid and that there is space to insert the requested item
363  * into the flow.
364  *
365  * @param[in, out] dev_flow
366  *   Pointer to dev_flow structure.
367  * @param[in] item
368  *   Item specification.
369  * @param[in] item_flags
370  *   Parsed item flags.
371  */
372 static void
373 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
374                               const struct rte_flow_item *item,
375                               uint64_t item_flags)
376 {
377         const struct rte_flow_item_eth *spec = item->spec;
378         const struct rte_flow_item_eth *mask = item->mask;
379         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
380         struct ibv_flow_spec_eth eth = {
381                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
382                 .size = size,
383         };
384
385         if (!mask)
386                 mask = &rte_flow_item_eth_mask;
387         if (spec) {
388                 unsigned int i;
389
390                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes,
391                         RTE_ETHER_ADDR_LEN);
392                 memcpy(&eth.val.src_mac, spec->src.addr_bytes,
393                         RTE_ETHER_ADDR_LEN);
394                 eth.val.ether_type = spec->type;
395                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes,
396                         RTE_ETHER_ADDR_LEN);
397                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes,
398                         RTE_ETHER_ADDR_LEN);
399                 eth.mask.ether_type = mask->type;
400                 /* Remove unwanted bits from values. */
401                 for (i = 0; i < RTE_ETHER_ADDR_LEN; ++i) {
402                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
403                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
404                 }
405                 eth.val.ether_type &= eth.mask.ether_type;
406         }
407         flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
408 }
409
410 /**
411  * Update the VLAN tag in the Verbs Ethernet specification.
412  * This function assumes that the input is valid and there is space to add
413  * the requested item.
414  *
415  * @param[in, out] attr
416  *   Pointer to Verbs attributes structure.
417  * @param[in] eth
418  *   Verbs structure containing the VLAN information to copy.
419  */
420 static void
421 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
422                             struct ibv_flow_spec_eth *eth)
423 {
424         unsigned int i;
425         const enum ibv_flow_spec_type search = eth->type;
426         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
427                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
428
429         for (i = 0; i != attr->num_of_specs; ++i) {
430                 if (hdr->type == search) {
431                         struct ibv_flow_spec_eth *e =
432                                 (struct ibv_flow_spec_eth *)hdr;
433
434                         e->val.vlan_tag = eth->val.vlan_tag;
435                         e->mask.vlan_tag = eth->mask.vlan_tag;
436                         e->val.ether_type = eth->val.ether_type;
437                         e->mask.ether_type = eth->mask.ether_type;
438                         break;
439                 }
440                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
441         }
442 }
443
444 /**
445  * Convert the @p item into a Verbs specification. This function assumes that
446  * the input is valid and that there is space to insert the requested item
447  * into the flow.
448  *
449  * @param[in, out] dev_flow
450  *   Pointer to dev_flow structure.
451  * @param[in] item
452  *   Item specification.
453  * @param[in] item_flags
454  *   Parsed item flags.
455  */
456 static void
457 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
458                                const struct rte_flow_item *item,
459                                uint64_t item_flags)
460 {
461         const struct rte_flow_item_vlan *spec = item->spec;
462         const struct rte_flow_item_vlan *mask = item->mask;
463         unsigned int size = sizeof(struct ibv_flow_spec_eth);
464         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
465         struct ibv_flow_spec_eth eth = {
466                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
467                 .size = size,
468         };
469         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
470                                       MLX5_FLOW_LAYER_OUTER_L2;
471
472         if (!mask)
473                 mask = &rte_flow_item_vlan_mask;
474         if (spec) {
475                 eth.val.vlan_tag = spec->tci;
476                 eth.mask.vlan_tag = mask->tci;
477                 eth.val.vlan_tag &= eth.mask.vlan_tag;
478                 eth.val.ether_type = spec->inner_type;
479                 eth.mask.ether_type = mask->inner_type;
480                 eth.val.ether_type &= eth.mask.ether_type;
481         }
482         if (!(item_flags & l2m))
483                 flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
484         else
485                 flow_verbs_item_vlan_update(&dev_flow->verbs.attr, &eth);
486         if (!tunnel)
487                 dev_flow->handle->vf_vlan.tag =
488                         rte_be_to_cpu_16(spec->tci) & 0x0fff;
489 }
490
491 /**
492  * Convert the @p item into a Verbs specification. This function assumes that
493  * the input is valid and that there is space to insert the requested item
494  * into the flow.
495  *
496  * @param[in, out] dev_flow
497  *   Pointer to dev_flow structure.
498  * @param[in] item
499  *   Item specification.
500  * @param[in] item_flags
501  *   Parsed item flags.
502  */
503 static void
504 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
505                                const struct rte_flow_item *item,
506                                uint64_t item_flags)
507 {
508         const struct rte_flow_item_ipv4 *spec = item->spec;
509         const struct rte_flow_item_ipv4 *mask = item->mask;
510         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
511         struct ibv_flow_spec_ipv4_ext ipv4 = {
512                 .type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
513                 .size = size,
514         };
515
516         if (!mask)
517                 mask = &rte_flow_item_ipv4_mask;
518         if (spec) {
519                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
520                         .src_ip = spec->hdr.src_addr,
521                         .dst_ip = spec->hdr.dst_addr,
522                         .proto = spec->hdr.next_proto_id,
523                         .tos = spec->hdr.type_of_service,
524                 };
525                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
526                         .src_ip = mask->hdr.src_addr,
527                         .dst_ip = mask->hdr.dst_addr,
528                         .proto = mask->hdr.next_proto_id,
529                         .tos = mask->hdr.type_of_service,
530                 };
531                 /* Remove unwanted bits from values. */
532                 ipv4.val.src_ip &= ipv4.mask.src_ip;
533                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
534                 ipv4.val.proto &= ipv4.mask.proto;
535                 ipv4.val.tos &= ipv4.mask.tos;
536         }
537         flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
538 }
539
540 /**
541  * Convert the @p item into a Verbs specification. This function assumes that
542  * the input is valid and that there is space to insert the requested item
543  * into the flow.
544  *
545  * @param[in, out] dev_flow
546  *   Pointer to dev_flow structure.
547  * @param[in] item
548  *   Item specification.
549  * @param[in] item_flags
550  *   Parsed item flags.
551  */
552 static void
553 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
554                                const struct rte_flow_item *item,
555                                uint64_t item_flags)
556 {
557         const struct rte_flow_item_ipv6 *spec = item->spec;
558         const struct rte_flow_item_ipv6 *mask = item->mask;
559         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
560         struct ibv_flow_spec_ipv6 ipv6 = {
561                 .type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
562                 .size = size,
563         };
564
565         if (!mask)
566                 mask = &rte_flow_item_ipv6_mask;
567         if (spec) {
568                 unsigned int i;
569                 uint32_t vtc_flow_val;
570                 uint32_t vtc_flow_mask;
571
572                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
573                        RTE_DIM(ipv6.val.src_ip));
574                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
575                        RTE_DIM(ipv6.val.dst_ip));
576                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
577                        RTE_DIM(ipv6.mask.src_ip));
578                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
579                        RTE_DIM(ipv6.mask.dst_ip));
580                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
581                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
582                 ipv6.val.flow_label =
583                         rte_cpu_to_be_32((vtc_flow_val & RTE_IPV6_HDR_FL_MASK) >>
584                                          RTE_IPV6_HDR_FL_SHIFT);
585                 ipv6.val.traffic_class = (vtc_flow_val & RTE_IPV6_HDR_TC_MASK) >>
586                                          RTE_IPV6_HDR_TC_SHIFT;
587                 ipv6.val.next_hdr = spec->hdr.proto;
588                 ipv6.mask.flow_label =
589                         rte_cpu_to_be_32((vtc_flow_mask & RTE_IPV6_HDR_FL_MASK) >>
590                                          RTE_IPV6_HDR_FL_SHIFT);
591                 ipv6.mask.traffic_class = (vtc_flow_mask & RTE_IPV6_HDR_TC_MASK) >>
592                                           RTE_IPV6_HDR_TC_SHIFT;
593                 ipv6.mask.next_hdr = mask->hdr.proto;
594                 /* Remove unwanted bits from values. */
595                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
596                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
597                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
598                 }
599                 ipv6.val.flow_label &= ipv6.mask.flow_label;
600                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
601                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
602         }
603         flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
604 }
605
606 /**
607  * Convert the @p item into a Verbs specification. This function assumes that
608  * the input is valid and that there is space to insert the requested item
609  * into the flow.
610  *
611  * @param[in, out] dev_flow
612  *   Pointer to dev_flow structure.
613  * @param[in] item
614  *   Item specification.
615  * @param[in] item_flags
616  *   Parsed item flags.
617  */
618 static void
619 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
620                               const struct rte_flow_item *item,
621                               uint64_t item_flags __rte_unused)
622 {
623         const struct rte_flow_item_tcp *spec = item->spec;
624         const struct rte_flow_item_tcp *mask = item->mask;
625         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
626         struct ibv_flow_spec_tcp_udp tcp = {
627                 .type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
628                 .size = size,
629         };
630
631         if (!mask)
632                 mask = &rte_flow_item_tcp_mask;
633         if (spec) {
634                 tcp.val.dst_port = spec->hdr.dst_port;
635                 tcp.val.src_port = spec->hdr.src_port;
636                 tcp.mask.dst_port = mask->hdr.dst_port;
637                 tcp.mask.src_port = mask->hdr.src_port;
638                 /* Remove unwanted bits from values. */
639                 tcp.val.src_port &= tcp.mask.src_port;
640                 tcp.val.dst_port &= tcp.mask.dst_port;
641         }
642         flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
643 }
644
645 /**
646  * Convert the @p item into a Verbs specification. This function assumes that
647  * the input is valid and that there is space to insert the requested item
648  * into the flow.
649  *
650  * @param[in, out] dev_flow
651  *   Pointer to dev_flow structure.
652  * @param[in] item
653  *   Item specification.
654  * @param[in] item_flags
655  *   Parsed item flags.
656  */
657 static void
658 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
659                               const struct rte_flow_item *item,
660                               uint64_t item_flags __rte_unused)
661 {
662         const struct rte_flow_item_udp *spec = item->spec;
663         const struct rte_flow_item_udp *mask = item->mask;
664         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
665         struct ibv_flow_spec_tcp_udp udp = {
666                 .type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
667                 .size = size,
668         };
669
670         if (!mask)
671                 mask = &rte_flow_item_udp_mask;
672         if (spec) {
673                 udp.val.dst_port = spec->hdr.dst_port;
674                 udp.val.src_port = spec->hdr.src_port;
675                 udp.mask.dst_port = mask->hdr.dst_port;
676                 udp.mask.src_port = mask->hdr.src_port;
677                 /* Remove unwanted bits from values. */
678                 udp.val.src_port &= udp.mask.src_port;
679                 udp.val.dst_port &= udp.mask.dst_port;
680         }
681         flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
682 }
683
684 /**
685  * Convert the @p item into a Verbs specification. This function assumes that
686  * the input is valid and that there is space to insert the requested item
687  * into the flow.
688  *
689  * @param[in, out] dev_flow
690  *   Pointer to dev_flow structure.
691  * @param[in] item
692  *   Item specification.
693  * @param[in] item_flags
694  *   Parsed item flags.
695  */
696 static void
697 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
698                                 const struct rte_flow_item *item,
699                                 uint64_t item_flags __rte_unused)
700 {
701         const struct rte_flow_item_vxlan *spec = item->spec;
702         const struct rte_flow_item_vxlan *mask = item->mask;
703         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
704         struct ibv_flow_spec_tunnel vxlan = {
705                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
706                 .size = size,
707         };
708         union vni {
709                 uint32_t vlan_id;
710                 uint8_t vni[4];
711         } id = { .vlan_id = 0, };
712
713         if (!mask)
714                 mask = &rte_flow_item_vxlan_mask;
715         if (spec) {
716                 memcpy(&id.vni[1], spec->vni, 3);
717                 vxlan.val.tunnel_id = id.vlan_id;
718                 memcpy(&id.vni[1], mask->vni, 3);
719                 vxlan.mask.tunnel_id = id.vlan_id;
720                 /* Remove unwanted bits from values. */
721                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
722         }
723         flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
724 }
725
726 /**
727  * Convert the @p item into a Verbs specification. This function assumes that
728  * the input is valid and that there is space to insert the requested item
729  * into the flow.
730  *
731  * @param[in, out] dev_flow
732  *   Pointer to dev_flow structure.
733  * @param[in] item
734  *   Item specification.
735  * @param[in] item_flags
736  *   Parsed item flags.
737  */
738 static void
739 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
740                                     const struct rte_flow_item *item,
741                                     uint64_t item_flags __rte_unused)
742 {
743         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
744         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
745         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
746         struct ibv_flow_spec_tunnel vxlan_gpe = {
747                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
748                 .size = size,
749         };
750         union vni {
751                 uint32_t vlan_id;
752                 uint8_t vni[4];
753         } id = { .vlan_id = 0, };
754
755         if (!mask)
756                 mask = &rte_flow_item_vxlan_gpe_mask;
757         if (spec) {
758                 memcpy(&id.vni[1], spec->vni, 3);
759                 vxlan_gpe.val.tunnel_id = id.vlan_id;
760                 memcpy(&id.vni[1], mask->vni, 3);
761                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
762                 /* Remove unwanted bits from values. */
763                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
764         }
765         flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
766 }
767
768 /**
769  * Update the protocol in Verbs IPv4/IPv6 spec.
770  *
771  * @param[in, out] attr
772  *   Pointer to Verbs attributes structure.
773  * @param[in] search
774  *   Specification type to search in order to update the IP protocol.
775  * @param[in] protocol
776  *   Protocol value to set if none is present in the specification.
777  */
778 static void
779 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
780                                        enum ibv_flow_spec_type search,
781                                        uint8_t protocol)
782 {
783         unsigned int i;
784         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
785                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
786
787         if (!attr)
788                 return;
789         for (i = 0; i != attr->num_of_specs; ++i) {
790                 if (hdr->type == search) {
791                         union {
792                                 struct ibv_flow_spec_ipv4_ext *ipv4;
793                                 struct ibv_flow_spec_ipv6 *ipv6;
794                         } ip;
795
796                         switch (search) {
797                         case IBV_FLOW_SPEC_IPV4_EXT:
798                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
799                                 if (!ip.ipv4->val.proto) {
800                                         ip.ipv4->val.proto = protocol;
801                                         ip.ipv4->mask.proto = 0xff;
802                                 }
803                                 break;
804                         case IBV_FLOW_SPEC_IPV6:
805                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
806                                 if (!ip.ipv6->val.next_hdr) {
807                                         ip.ipv6->val.next_hdr = protocol;
808                                         ip.ipv6->mask.next_hdr = 0xff;
809                                 }
810                                 break;
811                         default:
812                                 break;
813                         }
814                         break;
815                 }
816                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
817         }
818 }
819
820 /**
821  * Convert the @p item into a Verbs specification. This function assumes that
822  * the input is valid and that there is space to insert the requested item
823  * into the flow.
824  *
825  * @param[in, out] dev_flow
826  *   Pointer to dev_flow structure.
827  * @param[in] item
828  *   Item specification.
829  * @param[in] item_flags
830  *   Parsed item flags.
831  */
832 static void
833 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
834                               const struct rte_flow_item *item __rte_unused,
835                               uint64_t item_flags)
836 {
837         struct mlx5_flow_verbs_workspace *verbs = &dev_flow->verbs;
838 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
839         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
840         struct ibv_flow_spec_tunnel tunnel = {
841                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
842                 .size = size,
843         };
844 #else
845         const struct rte_flow_item_gre *spec = item->spec;
846         const struct rte_flow_item_gre *mask = item->mask;
847         unsigned int size = sizeof(struct ibv_flow_spec_gre);
848         struct ibv_flow_spec_gre tunnel = {
849                 .type = IBV_FLOW_SPEC_GRE,
850                 .size = size,
851         };
852
853         if (!mask)
854                 mask = &rte_flow_item_gre_mask;
855         if (spec) {
856                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
857                 tunnel.val.protocol = spec->protocol;
858                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
859                 tunnel.mask.protocol = mask->protocol;
860                 /* Remove unwanted bits from values. */
861                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
862                 tunnel.val.protocol &= tunnel.mask.protocol;
863                 tunnel.val.key &= tunnel.mask.key;
864         }
865 #endif
866         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
867                 flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
868                                                        IBV_FLOW_SPEC_IPV4_EXT,
869                                                        IPPROTO_GRE);
870         else
871                 flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
872                                                        IBV_FLOW_SPEC_IPV6,
873                                                        IPPROTO_GRE);
874         flow_verbs_spec_add(verbs, &tunnel, size);
875 }
876
877 /**
878  * Convert the @p action into a Verbs specification. This function assumes that
879  * the input is valid and that there is space to insert the requested action
880  * into the flow. This function also return the action that was added.
881  *
882  * @param[in, out] dev_flow
883  *   Pointer to dev_flow structure.
884  * @param[in] item
885  *   Item specification.
886  * @param[in] item_flags
887  *   Parsed item flags.
888  */
889 static void
890 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
891                                const struct rte_flow_item *item __rte_unused,
892                                uint64_t item_flags __rte_unused)
893 {
894 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
895         const struct rte_flow_item_mpls *spec = item->spec;
896         const struct rte_flow_item_mpls *mask = item->mask;
897         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
898         struct ibv_flow_spec_mpls mpls = {
899                 .type = IBV_FLOW_SPEC_MPLS,
900                 .size = size,
901         };
902
903         if (!mask)
904                 mask = &rte_flow_item_mpls_mask;
905         if (spec) {
906                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
907                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
908                 /* Remove unwanted bits from values.  */
909                 mpls.val.label &= mpls.mask.label;
910         }
911         flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
912 #endif
913 }
914
915 /**
916  * Convert the @p action into a Verbs specification. This function assumes that
917  * the input is valid and that there is space to insert the requested action
918  * into the flow.
919  *
920  * @param[in] dev_flow
921  *   Pointer to mlx5_flow.
922  * @param[in] action
923  *   Action configuration.
924  */
925 static void
926 flow_verbs_translate_action_drop
927         (struct mlx5_flow *dev_flow,
928          const struct rte_flow_action *action __rte_unused)
929 {
930         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
931         struct ibv_flow_spec_action_drop drop = {
932                         .type = IBV_FLOW_SPEC_ACTION_DROP,
933                         .size = size,
934         };
935
936         flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
937 }
938
939 /**
940  * Convert the @p action into a Verbs specification. This function assumes that
941  * the input is valid and that there is space to insert the requested action
942  * into the flow.
943  *
944  * @param[in] rss_desc
945  *   Pointer to mlx5_flow_rss_desc.
946  * @param[in] action
947  *   Action configuration.
948  */
949 static void
950 flow_verbs_translate_action_queue(struct mlx5_flow_rss_desc *rss_desc,
951                                   const struct rte_flow_action *action)
952 {
953         const struct rte_flow_action_queue *queue = action->conf;
954
955         rss_desc->queue[0] = queue->index;
956         rss_desc->queue_num = 1;
957 }
958
959 /**
960  * Convert the @p action into a Verbs specification. This function assumes that
961  * the input is valid and that there is space to insert the requested action
962  * into the flow.
963  *
964  * @param[in] rss_desc
965  *   Pointer to mlx5_flow_rss_desc.
966  * @param[in] action
967  *   Action configuration.
968  */
969 static void
970 flow_verbs_translate_action_rss(struct mlx5_flow_rss_desc *rss_desc,
971                                 const struct rte_flow_action *action)
972 {
973         const struct rte_flow_action_rss *rss = action->conf;
974         const uint8_t *rss_key;
975
976         memcpy(rss_desc->queue, rss->queue, rss->queue_num * sizeof(uint16_t));
977         rss_desc->queue_num = rss->queue_num;
978         /* NULL RSS key indicates default RSS key. */
979         rss_key = !rss->key ? rss_hash_default_key : rss->key;
980         memcpy(rss_desc->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
981         /*
982          * rss->level and rss.types should be set in advance when expanding
983          * items for RSS.
984          */
985 }
986
987 /**
988  * Convert the @p action into a Verbs specification. This function assumes that
989  * the input is valid and that there is space to insert the requested action
990  * into the flow.
991  *
992  * @param[in] dev_flow
993  *   Pointer to mlx5_flow.
994  * @param[in] action
995  *   Action configuration.
996  */
997 static void
998 flow_verbs_translate_action_flag
999         (struct mlx5_flow *dev_flow,
1000          const struct rte_flow_action *action __rte_unused)
1001 {
1002         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1003         struct ibv_flow_spec_action_tag tag = {
1004                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1005                 .size = size,
1006                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1007         };
1008
1009         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1010 }
1011
1012 /**
1013  * Convert the @p action into a Verbs specification. This function assumes that
1014  * the input is valid and that there is space to insert the requested action
1015  * into the flow.
1016  *
1017  * @param[in] dev_flow
1018  *   Pointer to mlx5_flow.
1019  * @param[in] action
1020  *   Action configuration.
1021  */
1022 static void
1023 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
1024                                  const struct rte_flow_action *action)
1025 {
1026         const struct rte_flow_action_mark *mark = action->conf;
1027         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1028         struct ibv_flow_spec_action_tag tag = {
1029                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1030                 .size = size,
1031                 .tag_id = mlx5_flow_mark_set(mark->id),
1032         };
1033
1034         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1035 }
1036
1037 /**
1038  * Convert the @p action into a Verbs specification. This function assumes that
1039  * the input is valid and that there is space to insert the requested action
1040  * into the flow.
1041  *
1042  * @param[in] dev
1043  *   Pointer to the Ethernet device structure.
1044  * @param[in] action
1045  *   Action configuration.
1046  * @param[in] dev_flow
1047  *   Pointer to mlx5_flow.
1048  * @param[out] error
1049  *   Pointer to error structure.
1050  *
1051  * @return
1052  *   0 On success else a negative errno value is returned and rte_errno is set.
1053  */
1054 static int
1055 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
1056                                   const struct rte_flow_action *action,
1057                                   struct rte_eth_dev *dev,
1058                                   struct rte_flow_error *error)
1059 {
1060         const struct rte_flow_action_count *count = action->conf;
1061         struct rte_flow *flow = dev_flow->flow;
1062 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1063         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1064         struct mlx5_flow_counter_pool *pool;
1065         struct mlx5_flow_counter *cnt = NULL;
1066         struct mlx5_flow_counter_ext *cnt_ext;
1067         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1068         struct ibv_flow_spec_counter_action counter = {
1069                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1070                 .size = size,
1071         };
1072 #endif
1073
1074         if (!flow->counter) {
1075                 flow->counter = flow_verbs_counter_new(dev, count->shared,
1076                                                        count->id);
1077                 if (!flow->counter)
1078                         return rte_flow_error_set(error, rte_errno,
1079                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1080                                                   action,
1081                                                   "cannot get counter"
1082                                                   " context.");
1083         }
1084 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
1085         cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1086         cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
1087         counter.counter_set_handle = cnt_ext->cs->handle;
1088         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1089 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1090         cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1091         cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
1092         counter.counters = cnt_ext->cs;
1093         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1094 #endif
1095         return 0;
1096 }
1097
1098 /**
1099  * Internal validation function. For validating both actions and items.
1100  *
1101  * @param[in] dev
1102  *   Pointer to the Ethernet device structure.
1103  * @param[in] attr
1104  *   Pointer to the flow attributes.
1105  * @param[in] items
1106  *   Pointer to the list of items.
1107  * @param[in] actions
1108  *   Pointer to the list of actions.
1109  * @param[in] external
1110  *   This flow rule is created by request external to PMD.
1111  * @param[in] hairpin
1112  *   Number of hairpin TX actions, 0 means classic flow.
1113  * @param[out] error
1114  *   Pointer to the error structure.
1115  *
1116  * @return
1117  *   0 on success, a negative errno value otherwise and rte_errno is set.
1118  */
1119 static int
1120 flow_verbs_validate(struct rte_eth_dev *dev,
1121                     const struct rte_flow_attr *attr,
1122                     const struct rte_flow_item items[],
1123                     const struct rte_flow_action actions[],
1124                     bool external __rte_unused,
1125                     int hairpin __rte_unused,
1126                     struct rte_flow_error *error)
1127 {
1128         int ret;
1129         uint64_t action_flags = 0;
1130         uint64_t item_flags = 0;
1131         uint64_t last_item = 0;
1132         uint8_t next_protocol = 0xff;
1133         uint16_t ether_type = 0;
1134
1135         if (items == NULL)
1136                 return -1;
1137         ret = mlx5_flow_validate_attributes(dev, attr, error);
1138         if (ret < 0)
1139                 return ret;
1140         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1141                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1142                 int ret = 0;
1143
1144                 switch (items->type) {
1145                 case RTE_FLOW_ITEM_TYPE_VOID:
1146                         break;
1147                 case RTE_FLOW_ITEM_TYPE_ETH:
1148                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1149                                                           error);
1150                         if (ret < 0)
1151                                 return ret;
1152                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1153                                              MLX5_FLOW_LAYER_OUTER_L2;
1154                         if (items->mask != NULL && items->spec != NULL) {
1155                                 ether_type =
1156                                         ((const struct rte_flow_item_eth *)
1157                                          items->spec)->type;
1158                                 ether_type &=
1159                                         ((const struct rte_flow_item_eth *)
1160                                          items->mask)->type;
1161                                 ether_type = rte_be_to_cpu_16(ether_type);
1162                         } else {
1163                                 ether_type = 0;
1164                         }
1165                         break;
1166                 case RTE_FLOW_ITEM_TYPE_VLAN:
1167                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1168                                                            dev, error);
1169                         if (ret < 0)
1170                                 return ret;
1171                         last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1172                                               MLX5_FLOW_LAYER_INNER_VLAN) :
1173                                              (MLX5_FLOW_LAYER_OUTER_L2 |
1174                                               MLX5_FLOW_LAYER_OUTER_VLAN);
1175                         if (items->mask != NULL && items->spec != NULL) {
1176                                 ether_type =
1177                                         ((const struct rte_flow_item_vlan *)
1178                                          items->spec)->inner_type;
1179                                 ether_type &=
1180                                         ((const struct rte_flow_item_vlan *)
1181                                          items->mask)->inner_type;
1182                                 ether_type = rte_be_to_cpu_16(ether_type);
1183                         } else {
1184                                 ether_type = 0;
1185                         }
1186                         break;
1187                 case RTE_FLOW_ITEM_TYPE_IPV4:
1188                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1189                                                            last_item,
1190                                                            ether_type, NULL,
1191                                                            error);
1192                         if (ret < 0)
1193                                 return ret;
1194                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1195                                              MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1196                         if (items->mask != NULL &&
1197                             ((const struct rte_flow_item_ipv4 *)
1198                              items->mask)->hdr.next_proto_id) {
1199                                 next_protocol =
1200                                         ((const struct rte_flow_item_ipv4 *)
1201                                          (items->spec))->hdr.next_proto_id;
1202                                 next_protocol &=
1203                                         ((const struct rte_flow_item_ipv4 *)
1204                                          (items->mask))->hdr.next_proto_id;
1205                         } else {
1206                                 /* Reset for inner layer. */
1207                                 next_protocol = 0xff;
1208                         }
1209                         break;
1210                 case RTE_FLOW_ITEM_TYPE_IPV6:
1211                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1212                                                            last_item,
1213                                                            ether_type, NULL,
1214                                                            error);
1215                         if (ret < 0)
1216                                 return ret;
1217                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1218                                              MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1219                         if (items->mask != NULL &&
1220                             ((const struct rte_flow_item_ipv6 *)
1221                              items->mask)->hdr.proto) {
1222                                 next_protocol =
1223                                         ((const struct rte_flow_item_ipv6 *)
1224                                          items->spec)->hdr.proto;
1225                                 next_protocol &=
1226                                         ((const struct rte_flow_item_ipv6 *)
1227                                          items->mask)->hdr.proto;
1228                         } else {
1229                                 /* Reset for inner layer. */
1230                                 next_protocol = 0xff;
1231                         }
1232                         break;
1233                 case RTE_FLOW_ITEM_TYPE_UDP:
1234                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1235                                                           next_protocol,
1236                                                           error);
1237                         if (ret < 0)
1238                                 return ret;
1239                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1240                                              MLX5_FLOW_LAYER_OUTER_L4_UDP;
1241                         break;
1242                 case RTE_FLOW_ITEM_TYPE_TCP:
1243                         ret = mlx5_flow_validate_item_tcp
1244                                                 (items, item_flags,
1245                                                  next_protocol,
1246                                                  &rte_flow_item_tcp_mask,
1247                                                  error);
1248                         if (ret < 0)
1249                                 return ret;
1250                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1251                                              MLX5_FLOW_LAYER_OUTER_L4_TCP;
1252                         break;
1253                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1254                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1255                                                             error);
1256                         if (ret < 0)
1257                                 return ret;
1258                         last_item = MLX5_FLOW_LAYER_VXLAN;
1259                         break;
1260                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1261                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
1262                                                                 item_flags,
1263                                                                 dev, error);
1264                         if (ret < 0)
1265                                 return ret;
1266                         last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1267                         break;
1268                 case RTE_FLOW_ITEM_TYPE_GRE:
1269                         ret = mlx5_flow_validate_item_gre(items, item_flags,
1270                                                           next_protocol, error);
1271                         if (ret < 0)
1272                                 return ret;
1273                         last_item = MLX5_FLOW_LAYER_GRE;
1274                         break;
1275                 case RTE_FLOW_ITEM_TYPE_MPLS:
1276                         ret = mlx5_flow_validate_item_mpls(dev, items,
1277                                                            item_flags,
1278                                                            last_item, error);
1279                         if (ret < 0)
1280                                 return ret;
1281                         last_item = MLX5_FLOW_LAYER_MPLS;
1282                         break;
1283                 default:
1284                         return rte_flow_error_set(error, ENOTSUP,
1285                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1286                                                   NULL, "item not supported");
1287                 }
1288                 item_flags |= last_item;
1289         }
1290         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1291                 switch (actions->type) {
1292                 case RTE_FLOW_ACTION_TYPE_VOID:
1293                         break;
1294                 case RTE_FLOW_ACTION_TYPE_FLAG:
1295                         ret = mlx5_flow_validate_action_flag(action_flags,
1296                                                              attr,
1297                                                              error);
1298                         if (ret < 0)
1299                                 return ret;
1300                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1301                         break;
1302                 case RTE_FLOW_ACTION_TYPE_MARK:
1303                         ret = mlx5_flow_validate_action_mark(actions,
1304                                                              action_flags,
1305                                                              attr,
1306                                                              error);
1307                         if (ret < 0)
1308                                 return ret;
1309                         action_flags |= MLX5_FLOW_ACTION_MARK;
1310                         break;
1311                 case RTE_FLOW_ACTION_TYPE_DROP:
1312                         ret = mlx5_flow_validate_action_drop(action_flags,
1313                                                              attr,
1314                                                              error);
1315                         if (ret < 0)
1316                                 return ret;
1317                         action_flags |= MLX5_FLOW_ACTION_DROP;
1318                         break;
1319                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1320                         ret = mlx5_flow_validate_action_queue(actions,
1321                                                               action_flags, dev,
1322                                                               attr,
1323                                                               error);
1324                         if (ret < 0)
1325                                 return ret;
1326                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1327                         break;
1328                 case RTE_FLOW_ACTION_TYPE_RSS:
1329                         ret = mlx5_flow_validate_action_rss(actions,
1330                                                             action_flags, dev,
1331                                                             attr, item_flags,
1332                                                             error);
1333                         if (ret < 0)
1334                                 return ret;
1335                         action_flags |= MLX5_FLOW_ACTION_RSS;
1336                         break;
1337                 case RTE_FLOW_ACTION_TYPE_COUNT:
1338                         ret = mlx5_flow_validate_action_count(dev, attr, error);
1339                         if (ret < 0)
1340                                 return ret;
1341                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1342                         break;
1343                 default:
1344                         return rte_flow_error_set(error, ENOTSUP,
1345                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1346                                                   actions,
1347                                                   "action not supported");
1348                 }
1349         }
1350         /*
1351          * Validate the drop action mutual exclusion with other actions.
1352          * Drop action is mutually-exclusive with any other action, except for
1353          * Count action.
1354          */
1355         if ((action_flags & MLX5_FLOW_ACTION_DROP) &&
1356             (action_flags & ~(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_COUNT)))
1357                 return rte_flow_error_set(error, EINVAL,
1358                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1359                                           "Drop action is mutually-exclusive "
1360                                           "with any other action, except for "
1361                                           "Count action");
1362         if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1363                 return rte_flow_error_set(error, EINVAL,
1364                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1365                                           "no fate action is found");
1366         return 0;
1367 }
1368
1369 /**
1370  * Calculate the required bytes that are needed for the action part of the verbs
1371  * flow.
1372  *
1373  * @param[in] actions
1374  *   Pointer to the list of actions.
1375  *
1376  * @return
1377  *   The size of the memory needed for all actions.
1378  */
1379 static int
1380 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1381 {
1382         int size = 0;
1383
1384         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1385                 switch (actions->type) {
1386                 case RTE_FLOW_ACTION_TYPE_VOID:
1387                         break;
1388                 case RTE_FLOW_ACTION_TYPE_FLAG:
1389                         size += sizeof(struct ibv_flow_spec_action_tag);
1390                         break;
1391                 case RTE_FLOW_ACTION_TYPE_MARK:
1392                         size += sizeof(struct ibv_flow_spec_action_tag);
1393                         break;
1394                 case RTE_FLOW_ACTION_TYPE_DROP:
1395                         size += sizeof(struct ibv_flow_spec_action_drop);
1396                         break;
1397                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1398                         break;
1399                 case RTE_FLOW_ACTION_TYPE_RSS:
1400                         break;
1401                 case RTE_FLOW_ACTION_TYPE_COUNT:
1402 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1403         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1404                         size += sizeof(struct ibv_flow_spec_counter_action);
1405 #endif
1406                         break;
1407                 default:
1408                         break;
1409                 }
1410         }
1411         return size;
1412 }
1413
1414 /**
1415  * Calculate the required bytes that are needed for the item part of the verbs
1416  * flow.
1417  *
1418  * @param[in] items
1419  *   Pointer to the list of items.
1420  *
1421  * @return
1422  *   The size of the memory needed for all items.
1423  */
1424 static int
1425 flow_verbs_get_items_size(const struct rte_flow_item items[])
1426 {
1427         int size = 0;
1428
1429         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1430                 switch (items->type) {
1431                 case RTE_FLOW_ITEM_TYPE_VOID:
1432                         break;
1433                 case RTE_FLOW_ITEM_TYPE_ETH:
1434                         size += sizeof(struct ibv_flow_spec_eth);
1435                         break;
1436                 case RTE_FLOW_ITEM_TYPE_VLAN:
1437                         size += sizeof(struct ibv_flow_spec_eth);
1438                         break;
1439                 case RTE_FLOW_ITEM_TYPE_IPV4:
1440                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
1441                         break;
1442                 case RTE_FLOW_ITEM_TYPE_IPV6:
1443                         size += sizeof(struct ibv_flow_spec_ipv6);
1444                         break;
1445                 case RTE_FLOW_ITEM_TYPE_UDP:
1446                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1447                         break;
1448                 case RTE_FLOW_ITEM_TYPE_TCP:
1449                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1450                         break;
1451                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1452                         size += sizeof(struct ibv_flow_spec_tunnel);
1453                         break;
1454                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1455                         size += sizeof(struct ibv_flow_spec_tunnel);
1456                         break;
1457 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1458                 case RTE_FLOW_ITEM_TYPE_GRE:
1459                         size += sizeof(struct ibv_flow_spec_gre);
1460                         break;
1461                 case RTE_FLOW_ITEM_TYPE_MPLS:
1462                         size += sizeof(struct ibv_flow_spec_mpls);
1463                         break;
1464 #else
1465                 case RTE_FLOW_ITEM_TYPE_GRE:
1466                         size += sizeof(struct ibv_flow_spec_tunnel);
1467                         break;
1468 #endif
1469                 default:
1470                         break;
1471                 }
1472         }
1473         return size;
1474 }
1475
1476 /**
1477  * Internal preparation function. Allocate mlx5_flow with the required size.
1478  * The required size is calculate based on the actions and items. This function
1479  * also returns the detected actions and items for later use.
1480  *
1481  * @param[in] dev
1482  *   Pointer to Ethernet device.
1483  * @param[in] attr
1484  *   Pointer to the flow attributes.
1485  * @param[in] items
1486  *   Pointer to the list of items.
1487  * @param[in] actions
1488  *   Pointer to the list of actions.
1489  * @param[out] error
1490  *   Pointer to the error structure.
1491  *
1492  * @return
1493  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1494  *   is set.
1495  */
1496 static struct mlx5_flow *
1497 flow_verbs_prepare(struct rte_eth_dev *dev,
1498                    const struct rte_flow_attr *attr __rte_unused,
1499                    const struct rte_flow_item items[],
1500                    const struct rte_flow_action actions[],
1501                    struct rte_flow_error *error)
1502 {
1503         size_t size = 0;
1504         uint32_t handle_idx = 0;
1505         struct mlx5_flow *dev_flow;
1506         struct mlx5_flow_handle *dev_handle;
1507         struct mlx5_priv *priv = dev->data->dev_private;
1508
1509         size += flow_verbs_get_actions_size(actions);
1510         size += flow_verbs_get_items_size(items);
1511         if (size > MLX5_VERBS_MAX_SPEC_ACT_SIZE) {
1512                 rte_flow_error_set(error, E2BIG,
1513                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1514                                    "Verbs spec/action size too large");
1515                 return NULL;
1516         }
1517         /* In case of corrupting the memory. */
1518         if (priv->flow_idx >= MLX5_NUM_MAX_DEV_FLOWS) {
1519                 rte_flow_error_set(error, ENOSPC,
1520                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1521                                    "not free temporary device flow");
1522                 return NULL;
1523         }
1524         dev_handle = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1525                                    &handle_idx);
1526         if (!dev_handle) {
1527                 rte_flow_error_set(error, ENOMEM,
1528                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1529                                    "not enough memory to create flow handle");
1530                 return NULL;
1531         }
1532         /* No multi-thread supporting. */
1533         dev_flow = &((struct mlx5_flow *)priv->inter_flows)[priv->flow_idx++];
1534         dev_flow->handle = dev_handle;
1535         dev_flow->handle_idx = handle_idx;
1536         /* Memcpy is used, only size needs to be cleared to 0. */
1537         dev_flow->verbs.size = 0;
1538         dev_flow->verbs.attr.num_of_specs = 0;
1539         dev_flow->ingress = attr->ingress;
1540         /* Need to set transfer attribute: not supported in Verbs mode. */
1541         return dev_flow;
1542 }
1543
1544 /**
1545  * Fill the flow with verb spec.
1546  *
1547  * @param[in] dev
1548  *   Pointer to Ethernet device.
1549  * @param[in, out] dev_flow
1550  *   Pointer to the mlx5 flow.
1551  * @param[in] attr
1552  *   Pointer to the flow attributes.
1553  * @param[in] items
1554  *   Pointer to the list of items.
1555  * @param[in] actions
1556  *   Pointer to the list of actions.
1557  * @param[out] error
1558  *   Pointer to the error structure.
1559  *
1560  * @return
1561  *   0 on success, else a negative errno value otherwise and rte_errno is set.
1562  */
1563 static int
1564 flow_verbs_translate(struct rte_eth_dev *dev,
1565                      struct mlx5_flow *dev_flow,
1566                      const struct rte_flow_attr *attr,
1567                      const struct rte_flow_item items[],
1568                      const struct rte_flow_action actions[],
1569                      struct rte_flow_error *error)
1570 {
1571         uint64_t item_flags = 0;
1572         uint64_t action_flags = 0;
1573         uint64_t priority = attr->priority;
1574         uint32_t subpriority = 0;
1575         struct mlx5_priv *priv = dev->data->dev_private;
1576         struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
1577                                               priv->rss_desc)
1578                                               [!!priv->flow_nested_idx];
1579
1580         if (priority == MLX5_FLOW_PRIO_RSVD)
1581                 priority = priv->config.flow_prio - 1;
1582         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1583                 int ret;
1584
1585                 switch (actions->type) {
1586                 case RTE_FLOW_ACTION_TYPE_VOID:
1587                         break;
1588                 case RTE_FLOW_ACTION_TYPE_FLAG:
1589                         flow_verbs_translate_action_flag(dev_flow, actions);
1590                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1591                         dev_flow->handle->mark = 1;
1592                         break;
1593                 case RTE_FLOW_ACTION_TYPE_MARK:
1594                         flow_verbs_translate_action_mark(dev_flow, actions);
1595                         action_flags |= MLX5_FLOW_ACTION_MARK;
1596                         dev_flow->handle->mark = 1;
1597                         break;
1598                 case RTE_FLOW_ACTION_TYPE_DROP:
1599                         flow_verbs_translate_action_drop(dev_flow, actions);
1600                         action_flags |= MLX5_FLOW_ACTION_DROP;
1601                         dev_flow->handle->fate_action = MLX5_FLOW_FATE_DROP;
1602                         break;
1603                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1604                         flow_verbs_translate_action_queue(rss_desc, actions);
1605                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1606                         dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1607                         break;
1608                 case RTE_FLOW_ACTION_TYPE_RSS:
1609                         flow_verbs_translate_action_rss(rss_desc, actions);
1610                         action_flags |= MLX5_FLOW_ACTION_RSS;
1611                         dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1612                         break;
1613                 case RTE_FLOW_ACTION_TYPE_COUNT:
1614                         ret = flow_verbs_translate_action_count(dev_flow,
1615                                                                 actions,
1616                                                                 dev, error);
1617                         if (ret < 0)
1618                                 return ret;
1619                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1620                         break;
1621                 default:
1622                         return rte_flow_error_set(error, ENOTSUP,
1623                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1624                                                   actions,
1625                                                   "action not supported");
1626                 }
1627         }
1628         dev_flow->act_flags = action_flags;
1629         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1630                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1631
1632                 switch (items->type) {
1633                 case RTE_FLOW_ITEM_TYPE_VOID:
1634                         break;
1635                 case RTE_FLOW_ITEM_TYPE_ETH:
1636                         flow_verbs_translate_item_eth(dev_flow, items,
1637                                                       item_flags);
1638                         subpriority = MLX5_PRIORITY_MAP_L2;
1639                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1640                                                MLX5_FLOW_LAYER_OUTER_L2;
1641                         break;
1642                 case RTE_FLOW_ITEM_TYPE_VLAN:
1643                         flow_verbs_translate_item_vlan(dev_flow, items,
1644                                                        item_flags);
1645                         subpriority = MLX5_PRIORITY_MAP_L2;
1646                         item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1647                                                 MLX5_FLOW_LAYER_INNER_VLAN) :
1648                                                (MLX5_FLOW_LAYER_OUTER_L2 |
1649                                                 MLX5_FLOW_LAYER_OUTER_VLAN);
1650                         break;
1651                 case RTE_FLOW_ITEM_TYPE_IPV4:
1652                         flow_verbs_translate_item_ipv4(dev_flow, items,
1653                                                        item_flags);
1654                         subpriority = MLX5_PRIORITY_MAP_L3;
1655                         dev_flow->hash_fields |=
1656                                 mlx5_flow_hashfields_adjust
1657                                         (rss_desc, tunnel,
1658                                          MLX5_IPV4_LAYER_TYPES,
1659                                          MLX5_IPV4_IBV_RX_HASH);
1660                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1661                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1662                         break;
1663                 case RTE_FLOW_ITEM_TYPE_IPV6:
1664                         flow_verbs_translate_item_ipv6(dev_flow, items,
1665                                                        item_flags);
1666                         subpriority = MLX5_PRIORITY_MAP_L3;
1667                         dev_flow->hash_fields |=
1668                                 mlx5_flow_hashfields_adjust
1669                                         (rss_desc, tunnel,
1670                                          MLX5_IPV6_LAYER_TYPES,
1671                                          MLX5_IPV6_IBV_RX_HASH);
1672                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1673                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1674                         break;
1675                 case RTE_FLOW_ITEM_TYPE_TCP:
1676                         flow_verbs_translate_item_tcp(dev_flow, items,
1677                                                       item_flags);
1678                         subpriority = MLX5_PRIORITY_MAP_L4;
1679                         dev_flow->hash_fields |=
1680                                 mlx5_flow_hashfields_adjust
1681                                         (rss_desc, tunnel, ETH_RSS_TCP,
1682                                          (IBV_RX_HASH_SRC_PORT_TCP |
1683                                           IBV_RX_HASH_DST_PORT_TCP));
1684                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1685                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1686                         break;
1687                 case RTE_FLOW_ITEM_TYPE_UDP:
1688                         flow_verbs_translate_item_udp(dev_flow, items,
1689                                                       item_flags);
1690                         subpriority = MLX5_PRIORITY_MAP_L4;
1691                         dev_flow->hash_fields |=
1692                                 mlx5_flow_hashfields_adjust
1693                                         (rss_desc, tunnel, ETH_RSS_UDP,
1694                                          (IBV_RX_HASH_SRC_PORT_UDP |
1695                                           IBV_RX_HASH_DST_PORT_UDP));
1696                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1697                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1698                         break;
1699                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1700                         flow_verbs_translate_item_vxlan(dev_flow, items,
1701                                                         item_flags);
1702                         subpriority = MLX5_PRIORITY_MAP_L2;
1703                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1704                         break;
1705                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1706                         flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1707                                                             item_flags);
1708                         subpriority = MLX5_PRIORITY_MAP_L2;
1709                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1710                         break;
1711                 case RTE_FLOW_ITEM_TYPE_GRE:
1712                         flow_verbs_translate_item_gre(dev_flow, items,
1713                                                       item_flags);
1714                         subpriority = MLX5_PRIORITY_MAP_L2;
1715                         item_flags |= MLX5_FLOW_LAYER_GRE;
1716                         break;
1717                 case RTE_FLOW_ITEM_TYPE_MPLS:
1718                         flow_verbs_translate_item_mpls(dev_flow, items,
1719                                                        item_flags);
1720                         subpriority = MLX5_PRIORITY_MAP_L2;
1721                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1722                         break;
1723                 default:
1724                         return rte_flow_error_set(error, ENOTSUP,
1725                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1726                                                   NULL,
1727                                                   "item not supported");
1728                 }
1729         }
1730         dev_flow->handle->layers = item_flags;
1731         /* Other members of attr will be ignored. */
1732         dev_flow->verbs.attr.priority =
1733                 mlx5_flow_adjust_priority(dev, priority, subpriority);
1734         dev_flow->verbs.attr.port = (uint8_t)priv->ibv_port;
1735         return 0;
1736 }
1737
1738 /**
1739  * Remove the flow from the NIC but keeps it in memory.
1740  *
1741  * @param[in] dev
1742  *   Pointer to the Ethernet device structure.
1743  * @param[in, out] flow
1744  *   Pointer to flow structure.
1745  */
1746 static void
1747 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1748 {
1749         struct mlx5_priv *priv = dev->data->dev_private;
1750         struct mlx5_flow_handle *handle;
1751         uint32_t handle_idx;
1752
1753         if (!flow)
1754                 return;
1755         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1756                        handle_idx, handle, next) {
1757                 if (handle->ib_flow) {
1758                         claim_zero(mlx5_glue->destroy_flow(handle->ib_flow));
1759                         handle->ib_flow = NULL;
1760                 }
1761                 /* hrxq is union, don't touch it only the flag is set. */
1762                 if (handle->rix_hrxq) {
1763                         if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1764                                 mlx5_hrxq_drop_release(dev);
1765                                 handle->rix_hrxq = 0;
1766                         } else if (handle->fate_action ==
1767                                    MLX5_FLOW_FATE_QUEUE) {
1768                                 mlx5_hrxq_release(dev, handle->rix_hrxq);
1769                                 handle->rix_hrxq = 0;
1770                         }
1771                 }
1772                 if (handle->vf_vlan.tag && handle->vf_vlan.created)
1773                         mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
1774         }
1775 }
1776
1777 /**
1778  * Remove the flow from the NIC and the memory.
1779  *
1780  * @param[in] dev
1781  *   Pointer to the Ethernet device structure.
1782  * @param[in, out] flow
1783  *   Pointer to flow structure.
1784  */
1785 static void
1786 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1787 {
1788         struct mlx5_priv *priv = dev->data->dev_private;
1789         struct mlx5_flow_handle *handle;
1790
1791         if (!flow)
1792                 return;
1793         flow_verbs_remove(dev, flow);
1794         while (flow->dev_handles) {
1795                 uint32_t tmp_idx = flow->dev_handles;
1796
1797                 handle = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1798                                    tmp_idx);
1799                 if (!handle)
1800                         return;
1801                 flow->dev_handles = handle->next.next;
1802                 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1803                            tmp_idx);
1804         }
1805         if (flow->counter) {
1806                 flow_verbs_counter_release(dev, flow->counter);
1807                 flow->counter = 0;
1808         }
1809 }
1810
1811 /**
1812  * Apply the flow to the NIC.
1813  *
1814  * @param[in] dev
1815  *   Pointer to the Ethernet device structure.
1816  * @param[in, out] flow
1817  *   Pointer to flow structure.
1818  * @param[out] error
1819  *   Pointer to error structure.
1820  *
1821  * @return
1822  *   0 on success, a negative errno value otherwise and rte_errno is set.
1823  */
1824 static int
1825 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1826                  struct rte_flow_error *error)
1827 {
1828         struct mlx5_priv *priv = dev->data->dev_private;
1829         struct mlx5_flow_handle *handle;
1830         struct mlx5_flow *dev_flow;
1831         struct mlx5_hrxq *hrxq;
1832         uint32_t dev_handles;
1833         int err;
1834         int idx;
1835
1836         for (idx = priv->flow_idx - 1; idx >= priv->flow_nested_idx; idx--) {
1837                 dev_flow = &((struct mlx5_flow *)priv->inter_flows)[idx];
1838                 handle = dev_flow->handle;
1839                 if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1840                         hrxq = mlx5_hrxq_drop_new(dev);
1841                         if (!hrxq) {
1842                                 rte_flow_error_set
1843                                         (error, errno,
1844                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1845                                          "cannot get drop hash queue");
1846                                 goto error;
1847                         }
1848                 } else {
1849                         uint32_t hrxq_idx;
1850                         struct mlx5_flow_rss_desc *rss_desc =
1851                                 &((struct mlx5_flow_rss_desc *)priv->rss_desc)
1852                                 [!!priv->flow_nested_idx];
1853
1854                         MLX5_ASSERT(rss_desc->queue_num);
1855                         hrxq_idx = mlx5_hrxq_get(dev, rss_desc->key,
1856                                              MLX5_RSS_HASH_KEY_LEN,
1857                                              dev_flow->hash_fields,
1858                                              rss_desc->queue,
1859                                              rss_desc->queue_num);
1860                         if (!hrxq_idx)
1861                                 hrxq_idx = mlx5_hrxq_new(dev, rss_desc->key,
1862                                                 MLX5_RSS_HASH_KEY_LEN,
1863                                                 dev_flow->hash_fields,
1864                                                 rss_desc->queue,
1865                                                 rss_desc->queue_num,
1866                                                 !!(handle->layers &
1867                                                 MLX5_FLOW_LAYER_TUNNEL));
1868                         hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1869                                          hrxq_idx);
1870                         if (!hrxq) {
1871                                 rte_flow_error_set
1872                                         (error, rte_errno,
1873                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1874                                          "cannot get hash queue");
1875                                 goto error;
1876                         }
1877                         handle->rix_hrxq = hrxq_idx;
1878                 }
1879                 MLX5_ASSERT(hrxq);
1880                 handle->ib_flow = mlx5_glue->create_flow(hrxq->qp,
1881                                                      &dev_flow->verbs.attr);
1882                 if (!handle->ib_flow) {
1883                         rte_flow_error_set(error, errno,
1884                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1885                                            NULL,
1886                                            "hardware refuses to create flow");
1887                         goto error;
1888                 }
1889                 if (priv->vmwa_context &&
1890                     handle->vf_vlan.tag && !handle->vf_vlan.created) {
1891                         /*
1892                          * The rule contains the VLAN pattern.
1893                          * For VF we are going to create VLAN
1894                          * interface to make hypervisor set correct
1895                          * e-Switch vport context.
1896                          */
1897                         mlx5_vlan_vmwa_acquire(dev, &handle->vf_vlan);
1898                 }
1899         }
1900         return 0;
1901 error:
1902         err = rte_errno; /* Save rte_errno before cleanup. */
1903         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1904                        dev_handles, handle, next) {
1905                 /* hrxq is union, don't touch it only the flag is set. */
1906                 if (handle->rix_hrxq) {
1907                         if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1908                                 mlx5_hrxq_drop_release(dev);
1909                                 handle->rix_hrxq = 0;
1910                         } else if (handle->fate_action ==
1911                                    MLX5_FLOW_FATE_QUEUE) {
1912                                 mlx5_hrxq_release(dev, handle->rix_hrxq);
1913                                 handle->rix_hrxq = 0;
1914                         }
1915                 }
1916                 if (handle->vf_vlan.tag && handle->vf_vlan.created)
1917                         mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
1918         }
1919         rte_errno = err; /* Restore rte_errno. */
1920         return -rte_errno;
1921 }
1922
1923 /**
1924  * Query a flow.
1925  *
1926  * @see rte_flow_query()
1927  * @see rte_flow_ops
1928  */
1929 static int
1930 flow_verbs_query(struct rte_eth_dev *dev,
1931                  struct rte_flow *flow,
1932                  const struct rte_flow_action *actions,
1933                  void *data,
1934                  struct rte_flow_error *error)
1935 {
1936         int ret = -EINVAL;
1937
1938         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1939                 switch (actions->type) {
1940                 case RTE_FLOW_ACTION_TYPE_VOID:
1941                         break;
1942                 case RTE_FLOW_ACTION_TYPE_COUNT:
1943                         ret = flow_verbs_counter_query(dev, flow, data, error);
1944                         break;
1945                 default:
1946                         return rte_flow_error_set(error, ENOTSUP,
1947                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1948                                                   actions,
1949                                                   "action not supported");
1950                 }
1951         }
1952         return ret;
1953 }
1954
1955 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1956         .validate = flow_verbs_validate,
1957         .prepare = flow_verbs_prepare,
1958         .translate = flow_verbs_translate,
1959         .apply = flow_verbs_apply,
1960         .remove = flow_verbs_remove,
1961         .destroy = flow_verbs_destroy,
1962         .query = flow_verbs_query,
1963 };