4659f0a7f7c834f75403bcffa6711933fcaf993b
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include <mlx5_glue.h>
30 #include <mlx5_prm.h>
31
32 #include "mlx5_defs.h"
33 #include "mlx5.h"
34 #include "mlx5_flow.h"
35 #include "mlx5_rxtx.h"
36
37 #define VERBS_SPEC_INNER(item_flags) \
38         (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
39
40 /**
41  * Get Verbs flow counter by index.
42  *
43  * @param[in] dev
44  *   Pointer to the Ethernet device structure.
45  * @param[in] idx
46  *   mlx5 flow counter index in the container.
47  * @param[out] ppool
48  *   mlx5 flow counter pool in the container,
49  *
50  * @return
51  *   A pointer to the counter, NULL otherwise.
52  */
53 static struct mlx5_flow_counter *
54 flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev,
55                               uint32_t idx,
56                               struct mlx5_flow_counter_pool **ppool)
57 {
58         struct mlx5_priv *priv = dev->data->dev_private;
59         struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
60                                                                         0);
61         struct mlx5_flow_counter_pool *pool;
62
63         idx--;
64         pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
65         MLX5_ASSERT(pool);
66         if (ppool)
67                 *ppool = pool;
68         return MLX5_POOL_GET_CNT(pool, idx % MLX5_COUNTERS_PER_POOL);
69 }
70
71 /**
72  * Create Verbs flow counter with Verbs library.
73  *
74  * @param[in] dev
75  *   Pointer to the Ethernet device structure.
76  * @param[in, out] counter
77  *   mlx5 flow counter object, contains the counter id,
78  *   handle of created Verbs flow counter is returned
79  *   in cs field (if counters are supported).
80  *
81  * @return
82  *   0 On success else a negative errno value is returned
83  *   and rte_errno is set.
84  */
85 static int
86 flow_verbs_counter_create(struct rte_eth_dev *dev,
87                           struct mlx5_flow_counter_ext *counter)
88 {
89 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
90         struct mlx5_priv *priv = dev->data->dev_private;
91         struct ibv_context *ctx = priv->sh->ctx;
92         struct ibv_counter_set_init_attr init = {
93                          .counter_set_id = counter->id};
94
95         counter->cs = mlx5_glue->create_counter_set(ctx, &init);
96         if (!counter->cs) {
97                 rte_errno = ENOTSUP;
98                 return -ENOTSUP;
99         }
100         return 0;
101 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
102         struct mlx5_priv *priv = dev->data->dev_private;
103         struct ibv_context *ctx = priv->sh->ctx;
104         struct ibv_counters_init_attr init = {0};
105         struct ibv_counter_attach_attr attach;
106         int ret;
107
108         memset(&attach, 0, sizeof(attach));
109         counter->cs = mlx5_glue->create_counters(ctx, &init);
110         if (!counter->cs) {
111                 rte_errno = ENOTSUP;
112                 return -ENOTSUP;
113         }
114         attach.counter_desc = IBV_COUNTER_PACKETS;
115         attach.index = 0;
116         ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
117         if (!ret) {
118                 attach.counter_desc = IBV_COUNTER_BYTES;
119                 attach.index = 1;
120                 ret = mlx5_glue->attach_counters
121                                         (counter->cs, &attach, NULL);
122         }
123         if (ret) {
124                 claim_zero(mlx5_glue->destroy_counters(counter->cs));
125                 counter->cs = NULL;
126                 rte_errno = ret;
127                 return -ret;
128         }
129         return 0;
130 #else
131         (void)dev;
132         (void)counter;
133         rte_errno = ENOTSUP;
134         return -ENOTSUP;
135 #endif
136 }
137
138 /**
139  * Get a flow counter.
140  *
141  * @param[in] dev
142  *   Pointer to the Ethernet device structure.
143  * @param[in] shared
144  *   Indicate if this counter is shared with other flows.
145  * @param[in] id
146  *   Counter identifier.
147  *
148  * @return
149  *   Index to the counter, 0 otherwise and rte_errno is set.
150  */
151 static uint32_t
152 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
153 {
154         struct mlx5_priv *priv = dev->data->dev_private;
155         struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
156                                                                         0);
157         struct mlx5_flow_counter_pool *pool = NULL;
158         struct mlx5_flow_counter_ext *cnt_ext = NULL;
159         struct mlx5_flow_counter *cnt = NULL;
160         uint32_t n_valid = rte_atomic16_read(&cont->n_valid);
161         uint32_t pool_idx;
162         uint32_t i;
163         int ret;
164
165         if (shared) {
166                 for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) {
167                         pool = cont->pools[pool_idx];
168                         for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
169                                 cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
170                                 if (cnt_ext->shared && cnt_ext->id == id) {
171                                         cnt_ext->ref_cnt++;
172                                         return MLX5_MAKE_CNT_IDX(pool_idx, i);
173                                 }
174                         }
175                 }
176         }
177         for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) {
178                 pool = cont->pools[pool_idx];
179                 if (!pool)
180                         continue;
181                 cnt = TAILQ_FIRST(&pool->counters);
182                 if (cnt)
183                         break;
184         }
185         if (!cnt) {
186                 struct mlx5_flow_counter_pool **pools;
187                 uint32_t size;
188
189                 if (n_valid == cont->n) {
190                         /* Resize the container pool array. */
191                         size = sizeof(struct mlx5_flow_counter_pool *) *
192                                      (n_valid + MLX5_CNT_CONTAINER_RESIZE);
193                         pools = rte_zmalloc(__func__, size, 0);
194                         if (!pools)
195                                 return 0;
196                         if (n_valid) {
197                                 memcpy(pools, cont->pools,
198                                        sizeof(struct mlx5_flow_counter_pool *) *
199                                        n_valid);
200                                 rte_free(cont->pools);
201                         }
202                         cont->pools = pools;
203                         cont->n += MLX5_CNT_CONTAINER_RESIZE;
204                 }
205                 /* Allocate memory for new pool*/
206                 size = sizeof(*pool) + sizeof(*cnt_ext) *
207                        MLX5_COUNTERS_PER_POOL;
208                 pool = rte_calloc(__func__, 1, size, 0);
209                 if (!pool)
210                         return 0;
211                 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
212                         cnt = MLX5_POOL_GET_CNT(pool, i);
213                         TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
214                 }
215                 cnt = MLX5_POOL_GET_CNT(pool, 0);
216                 cont->pools[n_valid] = pool;
217                 pool_idx = n_valid;
218                 rte_atomic16_add(&cont->n_valid, 1);
219                 TAILQ_INSERT_HEAD(&cont->pool_list, pool, next);
220         }
221         i = MLX5_CNT_ARRAY_IDX(pool, cnt);
222         cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
223         cnt_ext->id = id;
224         cnt_ext->shared = shared;
225         cnt_ext->ref_cnt = 1;
226         cnt->hits = 0;
227         cnt->bytes = 0;
228         /* Create counter with Verbs. */
229         ret = flow_verbs_counter_create(dev, cnt_ext);
230         if (!ret) {
231                 TAILQ_REMOVE(&pool->counters, cnt, next);
232                 return MLX5_MAKE_CNT_IDX(pool_idx, i);
233         }
234         /* Some error occurred in Verbs library. */
235         rte_errno = -ret;
236         return 0;
237 }
238
239 /**
240  * Release a flow counter.
241  *
242  * @param[in] dev
243  *   Pointer to the Ethernet device structure.
244  * @param[in] counter
245  *   Index to the counter handler.
246  */
247 static void
248 flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter)
249 {
250         struct mlx5_flow_counter_pool *pool;
251         struct mlx5_flow_counter *cnt;
252         struct mlx5_flow_counter_ext *cnt_ext;
253
254         cnt = flow_verbs_counter_get_by_idx(dev, counter,
255                                             &pool);
256         cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
257         if (--cnt_ext->ref_cnt == 0) {
258 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
259                 claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs));
260                 cnt_ext->cs = NULL;
261 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
262                 claim_zero(mlx5_glue->destroy_counters(cnt_ext->cs));
263                 cnt_ext->cs = NULL;
264 #endif
265                 TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
266         }
267 }
268
269 /**
270  * Query a flow counter via Verbs library call.
271  *
272  * @see rte_flow_query()
273  * @see rte_flow_ops
274  */
275 static int
276 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
277                          struct rte_flow *flow, void *data,
278                          struct rte_flow_error *error)
279 {
280 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
281         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
282         if (flow->counter) {
283                 struct mlx5_flow_counter_pool *pool;
284                 struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx
285                                                 (dev, flow->counter, &pool);
286                 struct mlx5_flow_counter_ext *cnt_ext = MLX5_CNT_TO_CNT_EXT
287                                                 (pool, cnt);
288                 struct rte_flow_query_count *qc = data;
289                 uint64_t counters[2] = {0, 0};
290 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
291                 struct ibv_query_counter_set_attr query_cs_attr = {
292                         .cs = cnt_ext->cs,
293                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
294                 };
295                 struct ibv_counter_set_data query_out = {
296                         .out = counters,
297                         .outlen = 2 * sizeof(uint64_t),
298                 };
299                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
300                                                        &query_out);
301 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
302                 int err = mlx5_glue->query_counters
303                                (cnt_ext->cs, counters,
304                                 RTE_DIM(counters),
305                                 IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
306 #endif
307                 if (err)
308                         return rte_flow_error_set
309                                 (error, err,
310                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
311                                  NULL,
312                                  "cannot read counter");
313                 qc->hits_set = 1;
314                 qc->bytes_set = 1;
315                 qc->hits = counters[0] - cnt->hits;
316                 qc->bytes = counters[1] - cnt->bytes;
317                 if (qc->reset) {
318                         cnt->hits = counters[0];
319                         cnt->bytes = counters[1];
320                 }
321                 return 0;
322         }
323         return rte_flow_error_set(error, EINVAL,
324                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
325                                   NULL,
326                                   "flow does not have counter");
327 #else
328         (void)flow;
329         (void)data;
330         return rte_flow_error_set(error, ENOTSUP,
331                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
332                                   NULL,
333                                   "counters are not available");
334 #endif
335 }
336
337 /**
338  * Add a verbs item specification into @p verbs.
339  *
340  * @param[out] verbs
341  *   Pointer to verbs structure.
342  * @param[in] src
343  *   Create specification.
344  * @param[in] size
345  *   Size in bytes of the specification to copy.
346  */
347 static void
348 flow_verbs_spec_add(struct mlx5_flow_verbs_workspace *verbs,
349                     void *src, unsigned int size)
350 {
351         void *dst;
352
353         if (!verbs)
354                 return;
355         MLX5_ASSERT(verbs->specs);
356         dst = (void *)(verbs->specs + verbs->size);
357         memcpy(dst, src, size);
358         ++verbs->attr.num_of_specs;
359         verbs->size += size;
360 }
361
362 /**
363  * Convert the @p item into a Verbs specification. This function assumes that
364  * the input is valid and that there is space to insert the requested item
365  * into the flow.
366  *
367  * @param[in, out] dev_flow
368  *   Pointer to dev_flow structure.
369  * @param[in] item
370  *   Item specification.
371  * @param[in] item_flags
372  *   Parsed item flags.
373  */
374 static void
375 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
376                               const struct rte_flow_item *item,
377                               uint64_t item_flags)
378 {
379         const struct rte_flow_item_eth *spec = item->spec;
380         const struct rte_flow_item_eth *mask = item->mask;
381         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
382         struct ibv_flow_spec_eth eth = {
383                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
384                 .size = size,
385         };
386
387         if (!mask)
388                 mask = &rte_flow_item_eth_mask;
389         if (spec) {
390                 unsigned int i;
391
392                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes,
393                         RTE_ETHER_ADDR_LEN);
394                 memcpy(&eth.val.src_mac, spec->src.addr_bytes,
395                         RTE_ETHER_ADDR_LEN);
396                 eth.val.ether_type = spec->type;
397                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes,
398                         RTE_ETHER_ADDR_LEN);
399                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes,
400                         RTE_ETHER_ADDR_LEN);
401                 eth.mask.ether_type = mask->type;
402                 /* Remove unwanted bits from values. */
403                 for (i = 0; i < RTE_ETHER_ADDR_LEN; ++i) {
404                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
405                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
406                 }
407                 eth.val.ether_type &= eth.mask.ether_type;
408         }
409         flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
410 }
411
412 /**
413  * Update the VLAN tag in the Verbs Ethernet specification.
414  * This function assumes that the input is valid and there is space to add
415  * the requested item.
416  *
417  * @param[in, out] attr
418  *   Pointer to Verbs attributes structure.
419  * @param[in] eth
420  *   Verbs structure containing the VLAN information to copy.
421  */
422 static void
423 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
424                             struct ibv_flow_spec_eth *eth)
425 {
426         unsigned int i;
427         const enum ibv_flow_spec_type search = eth->type;
428         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
429                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
430
431         for (i = 0; i != attr->num_of_specs; ++i) {
432                 if (hdr->type == search) {
433                         struct ibv_flow_spec_eth *e =
434                                 (struct ibv_flow_spec_eth *)hdr;
435
436                         e->val.vlan_tag = eth->val.vlan_tag;
437                         e->mask.vlan_tag = eth->mask.vlan_tag;
438                         e->val.ether_type = eth->val.ether_type;
439                         e->mask.ether_type = eth->mask.ether_type;
440                         break;
441                 }
442                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
443         }
444 }
445
446 /**
447  * Convert the @p item into a Verbs specification. This function assumes that
448  * the input is valid and that there is space to insert the requested item
449  * into the flow.
450  *
451  * @param[in, out] dev_flow
452  *   Pointer to dev_flow structure.
453  * @param[in] item
454  *   Item specification.
455  * @param[in] item_flags
456  *   Parsed item flags.
457  */
458 static void
459 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
460                                const struct rte_flow_item *item,
461                                uint64_t item_flags)
462 {
463         const struct rte_flow_item_vlan *spec = item->spec;
464         const struct rte_flow_item_vlan *mask = item->mask;
465         unsigned int size = sizeof(struct ibv_flow_spec_eth);
466         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
467         struct ibv_flow_spec_eth eth = {
468                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
469                 .size = size,
470         };
471         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
472                                       MLX5_FLOW_LAYER_OUTER_L2;
473
474         if (!mask)
475                 mask = &rte_flow_item_vlan_mask;
476         if (spec) {
477                 eth.val.vlan_tag = spec->tci;
478                 eth.mask.vlan_tag = mask->tci;
479                 eth.val.vlan_tag &= eth.mask.vlan_tag;
480                 eth.val.ether_type = spec->inner_type;
481                 eth.mask.ether_type = mask->inner_type;
482                 eth.val.ether_type &= eth.mask.ether_type;
483         }
484         if (!(item_flags & l2m))
485                 flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
486         else
487                 flow_verbs_item_vlan_update(&dev_flow->verbs.attr, &eth);
488         if (!tunnel)
489                 dev_flow->handle->vf_vlan.tag =
490                         rte_be_to_cpu_16(spec->tci) & 0x0fff;
491 }
492
493 /**
494  * Convert the @p item into a Verbs specification. This function assumes that
495  * the input is valid and that there is space to insert the requested item
496  * into the flow.
497  *
498  * @param[in, out] dev_flow
499  *   Pointer to dev_flow structure.
500  * @param[in] item
501  *   Item specification.
502  * @param[in] item_flags
503  *   Parsed item flags.
504  */
505 static void
506 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
507                                const struct rte_flow_item *item,
508                                uint64_t item_flags)
509 {
510         const struct rte_flow_item_ipv4 *spec = item->spec;
511         const struct rte_flow_item_ipv4 *mask = item->mask;
512         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
513         struct ibv_flow_spec_ipv4_ext ipv4 = {
514                 .type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
515                 .size = size,
516         };
517
518         if (!mask)
519                 mask = &rte_flow_item_ipv4_mask;
520         if (spec) {
521                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
522                         .src_ip = spec->hdr.src_addr,
523                         .dst_ip = spec->hdr.dst_addr,
524                         .proto = spec->hdr.next_proto_id,
525                         .tos = spec->hdr.type_of_service,
526                 };
527                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
528                         .src_ip = mask->hdr.src_addr,
529                         .dst_ip = mask->hdr.dst_addr,
530                         .proto = mask->hdr.next_proto_id,
531                         .tos = mask->hdr.type_of_service,
532                 };
533                 /* Remove unwanted bits from values. */
534                 ipv4.val.src_ip &= ipv4.mask.src_ip;
535                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
536                 ipv4.val.proto &= ipv4.mask.proto;
537                 ipv4.val.tos &= ipv4.mask.tos;
538         }
539         flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
540 }
541
542 /**
543  * Convert the @p item into a Verbs specification. This function assumes that
544  * the input is valid and that there is space to insert the requested item
545  * into the flow.
546  *
547  * @param[in, out] dev_flow
548  *   Pointer to dev_flow structure.
549  * @param[in] item
550  *   Item specification.
551  * @param[in] item_flags
552  *   Parsed item flags.
553  */
554 static void
555 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
556                                const struct rte_flow_item *item,
557                                uint64_t item_flags)
558 {
559         const struct rte_flow_item_ipv6 *spec = item->spec;
560         const struct rte_flow_item_ipv6 *mask = item->mask;
561         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
562         struct ibv_flow_spec_ipv6 ipv6 = {
563                 .type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
564                 .size = size,
565         };
566
567         if (!mask)
568                 mask = &rte_flow_item_ipv6_mask;
569         if (spec) {
570                 unsigned int i;
571                 uint32_t vtc_flow_val;
572                 uint32_t vtc_flow_mask;
573
574                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
575                        RTE_DIM(ipv6.val.src_ip));
576                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
577                        RTE_DIM(ipv6.val.dst_ip));
578                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
579                        RTE_DIM(ipv6.mask.src_ip));
580                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
581                        RTE_DIM(ipv6.mask.dst_ip));
582                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
583                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
584                 ipv6.val.flow_label =
585                         rte_cpu_to_be_32((vtc_flow_val & RTE_IPV6_HDR_FL_MASK) >>
586                                          RTE_IPV6_HDR_FL_SHIFT);
587                 ipv6.val.traffic_class = (vtc_flow_val & RTE_IPV6_HDR_TC_MASK) >>
588                                          RTE_IPV6_HDR_TC_SHIFT;
589                 ipv6.val.next_hdr = spec->hdr.proto;
590                 ipv6.mask.flow_label =
591                         rte_cpu_to_be_32((vtc_flow_mask & RTE_IPV6_HDR_FL_MASK) >>
592                                          RTE_IPV6_HDR_FL_SHIFT);
593                 ipv6.mask.traffic_class = (vtc_flow_mask & RTE_IPV6_HDR_TC_MASK) >>
594                                           RTE_IPV6_HDR_TC_SHIFT;
595                 ipv6.mask.next_hdr = mask->hdr.proto;
596                 /* Remove unwanted bits from values. */
597                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
598                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
599                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
600                 }
601                 ipv6.val.flow_label &= ipv6.mask.flow_label;
602                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
603                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
604         }
605         flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
606 }
607
608 /**
609  * Convert the @p item into a Verbs specification. This function assumes that
610  * the input is valid and that there is space to insert the requested item
611  * into the flow.
612  *
613  * @param[in, out] dev_flow
614  *   Pointer to dev_flow structure.
615  * @param[in] item
616  *   Item specification.
617  * @param[in] item_flags
618  *   Parsed item flags.
619  */
620 static void
621 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
622                               const struct rte_flow_item *item,
623                               uint64_t item_flags __rte_unused)
624 {
625         const struct rte_flow_item_tcp *spec = item->spec;
626         const struct rte_flow_item_tcp *mask = item->mask;
627         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
628         struct ibv_flow_spec_tcp_udp tcp = {
629                 .type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
630                 .size = size,
631         };
632
633         if (!mask)
634                 mask = &rte_flow_item_tcp_mask;
635         if (spec) {
636                 tcp.val.dst_port = spec->hdr.dst_port;
637                 tcp.val.src_port = spec->hdr.src_port;
638                 tcp.mask.dst_port = mask->hdr.dst_port;
639                 tcp.mask.src_port = mask->hdr.src_port;
640                 /* Remove unwanted bits from values. */
641                 tcp.val.src_port &= tcp.mask.src_port;
642                 tcp.val.dst_port &= tcp.mask.dst_port;
643         }
644         flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
645 }
646
647 /**
648  * Convert the @p item into a Verbs specification. This function assumes that
649  * the input is valid and that there is space to insert the requested item
650  * into the flow.
651  *
652  * @param[in, out] dev_flow
653  *   Pointer to dev_flow structure.
654  * @param[in] item
655  *   Item specification.
656  * @param[in] item_flags
657  *   Parsed item flags.
658  */
659 static void
660 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
661                               const struct rte_flow_item *item,
662                               uint64_t item_flags __rte_unused)
663 {
664         const struct rte_flow_item_udp *spec = item->spec;
665         const struct rte_flow_item_udp *mask = item->mask;
666         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
667         struct ibv_flow_spec_tcp_udp udp = {
668                 .type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
669                 .size = size,
670         };
671
672         if (!mask)
673                 mask = &rte_flow_item_udp_mask;
674         if (spec) {
675                 udp.val.dst_port = spec->hdr.dst_port;
676                 udp.val.src_port = spec->hdr.src_port;
677                 udp.mask.dst_port = mask->hdr.dst_port;
678                 udp.mask.src_port = mask->hdr.src_port;
679                 /* Remove unwanted bits from values. */
680                 udp.val.src_port &= udp.mask.src_port;
681                 udp.val.dst_port &= udp.mask.dst_port;
682         }
683         flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
684 }
685
686 /**
687  * Convert the @p item into a Verbs specification. This function assumes that
688  * the input is valid and that there is space to insert the requested item
689  * into the flow.
690  *
691  * @param[in, out] dev_flow
692  *   Pointer to dev_flow structure.
693  * @param[in] item
694  *   Item specification.
695  * @param[in] item_flags
696  *   Parsed item flags.
697  */
698 static void
699 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
700                                 const struct rte_flow_item *item,
701                                 uint64_t item_flags __rte_unused)
702 {
703         const struct rte_flow_item_vxlan *spec = item->spec;
704         const struct rte_flow_item_vxlan *mask = item->mask;
705         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
706         struct ibv_flow_spec_tunnel vxlan = {
707                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
708                 .size = size,
709         };
710         union vni {
711                 uint32_t vlan_id;
712                 uint8_t vni[4];
713         } id = { .vlan_id = 0, };
714
715         if (!mask)
716                 mask = &rte_flow_item_vxlan_mask;
717         if (spec) {
718                 memcpy(&id.vni[1], spec->vni, 3);
719                 vxlan.val.tunnel_id = id.vlan_id;
720                 memcpy(&id.vni[1], mask->vni, 3);
721                 vxlan.mask.tunnel_id = id.vlan_id;
722                 /* Remove unwanted bits from values. */
723                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
724         }
725         flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
726 }
727
728 /**
729  * Convert the @p item into a Verbs specification. This function assumes that
730  * the input is valid and that there is space to insert the requested item
731  * into the flow.
732  *
733  * @param[in, out] dev_flow
734  *   Pointer to dev_flow structure.
735  * @param[in] item
736  *   Item specification.
737  * @param[in] item_flags
738  *   Parsed item flags.
739  */
740 static void
741 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
742                                     const struct rte_flow_item *item,
743                                     uint64_t item_flags __rte_unused)
744 {
745         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
746         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
747         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
748         struct ibv_flow_spec_tunnel vxlan_gpe = {
749                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
750                 .size = size,
751         };
752         union vni {
753                 uint32_t vlan_id;
754                 uint8_t vni[4];
755         } id = { .vlan_id = 0, };
756
757         if (!mask)
758                 mask = &rte_flow_item_vxlan_gpe_mask;
759         if (spec) {
760                 memcpy(&id.vni[1], spec->vni, 3);
761                 vxlan_gpe.val.tunnel_id = id.vlan_id;
762                 memcpy(&id.vni[1], mask->vni, 3);
763                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
764                 /* Remove unwanted bits from values. */
765                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
766         }
767         flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
768 }
769
770 /**
771  * Update the protocol in Verbs IPv4/IPv6 spec.
772  *
773  * @param[in, out] attr
774  *   Pointer to Verbs attributes structure.
775  * @param[in] search
776  *   Specification type to search in order to update the IP protocol.
777  * @param[in] protocol
778  *   Protocol value to set if none is present in the specification.
779  */
780 static void
781 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
782                                        enum ibv_flow_spec_type search,
783                                        uint8_t protocol)
784 {
785         unsigned int i;
786         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
787                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
788
789         if (!attr)
790                 return;
791         for (i = 0; i != attr->num_of_specs; ++i) {
792                 if (hdr->type == search) {
793                         union {
794                                 struct ibv_flow_spec_ipv4_ext *ipv4;
795                                 struct ibv_flow_spec_ipv6 *ipv6;
796                         } ip;
797
798                         switch (search) {
799                         case IBV_FLOW_SPEC_IPV4_EXT:
800                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
801                                 if (!ip.ipv4->val.proto) {
802                                         ip.ipv4->val.proto = protocol;
803                                         ip.ipv4->mask.proto = 0xff;
804                                 }
805                                 break;
806                         case IBV_FLOW_SPEC_IPV6:
807                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
808                                 if (!ip.ipv6->val.next_hdr) {
809                                         ip.ipv6->val.next_hdr = protocol;
810                                         ip.ipv6->mask.next_hdr = 0xff;
811                                 }
812                                 break;
813                         default:
814                                 break;
815                         }
816                         break;
817                 }
818                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
819         }
820 }
821
822 /**
823  * Convert the @p item into a Verbs specification. This function assumes that
824  * the input is valid and that there is space to insert the requested item
825  * into the flow.
826  *
827  * @param[in, out] dev_flow
828  *   Pointer to dev_flow structure.
829  * @param[in] item
830  *   Item specification.
831  * @param[in] item_flags
832  *   Parsed item flags.
833  */
834 static void
835 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
836                               const struct rte_flow_item *item __rte_unused,
837                               uint64_t item_flags)
838 {
839         struct mlx5_flow_verbs_workspace *verbs = &dev_flow->verbs;
840 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
841         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
842         struct ibv_flow_spec_tunnel tunnel = {
843                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
844                 .size = size,
845         };
846 #else
847         const struct rte_flow_item_gre *spec = item->spec;
848         const struct rte_flow_item_gre *mask = item->mask;
849         unsigned int size = sizeof(struct ibv_flow_spec_gre);
850         struct ibv_flow_spec_gre tunnel = {
851                 .type = IBV_FLOW_SPEC_GRE,
852                 .size = size,
853         };
854
855         if (!mask)
856                 mask = &rte_flow_item_gre_mask;
857         if (spec) {
858                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
859                 tunnel.val.protocol = spec->protocol;
860                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
861                 tunnel.mask.protocol = mask->protocol;
862                 /* Remove unwanted bits from values. */
863                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
864                 tunnel.val.protocol &= tunnel.mask.protocol;
865                 tunnel.val.key &= tunnel.mask.key;
866         }
867 #endif
868         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
869                 flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
870                                                        IBV_FLOW_SPEC_IPV4_EXT,
871                                                        IPPROTO_GRE);
872         else
873                 flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
874                                                        IBV_FLOW_SPEC_IPV6,
875                                                        IPPROTO_GRE);
876         flow_verbs_spec_add(verbs, &tunnel, size);
877 }
878
879 /**
880  * Convert the @p action into a Verbs specification. This function assumes that
881  * the input is valid and that there is space to insert the requested action
882  * into the flow. This function also return the action that was added.
883  *
884  * @param[in, out] dev_flow
885  *   Pointer to dev_flow structure.
886  * @param[in] item
887  *   Item specification.
888  * @param[in] item_flags
889  *   Parsed item flags.
890  */
891 static void
892 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
893                                const struct rte_flow_item *item __rte_unused,
894                                uint64_t item_flags __rte_unused)
895 {
896 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
897         const struct rte_flow_item_mpls *spec = item->spec;
898         const struct rte_flow_item_mpls *mask = item->mask;
899         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
900         struct ibv_flow_spec_mpls mpls = {
901                 .type = IBV_FLOW_SPEC_MPLS,
902                 .size = size,
903         };
904
905         if (!mask)
906                 mask = &rte_flow_item_mpls_mask;
907         if (spec) {
908                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
909                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
910                 /* Remove unwanted bits from values.  */
911                 mpls.val.label &= mpls.mask.label;
912         }
913         flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
914 #endif
915 }
916
917 /**
918  * Convert the @p action into a Verbs specification. This function assumes that
919  * the input is valid and that there is space to insert the requested action
920  * into the flow.
921  *
922  * @param[in] dev_flow
923  *   Pointer to mlx5_flow.
924  * @param[in] action
925  *   Action configuration.
926  */
927 static void
928 flow_verbs_translate_action_drop
929         (struct mlx5_flow *dev_flow,
930          const struct rte_flow_action *action __rte_unused)
931 {
932         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
933         struct ibv_flow_spec_action_drop drop = {
934                         .type = IBV_FLOW_SPEC_ACTION_DROP,
935                         .size = size,
936         };
937
938         flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
939 }
940
941 /**
942  * Convert the @p action into a Verbs specification. This function assumes that
943  * the input is valid and that there is space to insert the requested action
944  * into the flow.
945  *
946  * @param[in] rss_desc
947  *   Pointer to mlx5_flow_rss_desc.
948  * @param[in] action
949  *   Action configuration.
950  */
951 static void
952 flow_verbs_translate_action_queue(struct mlx5_flow_rss_desc *rss_desc,
953                                   const struct rte_flow_action *action)
954 {
955         const struct rte_flow_action_queue *queue = action->conf;
956
957         rss_desc->queue[0] = queue->index;
958         rss_desc->queue_num = 1;
959 }
960
961 /**
962  * Convert the @p action into a Verbs specification. This function assumes that
963  * the input is valid and that there is space to insert the requested action
964  * into the flow.
965  *
966  * @param[in] rss_desc
967  *   Pointer to mlx5_flow_rss_desc.
968  * @param[in] action
969  *   Action configuration.
970  */
971 static void
972 flow_verbs_translate_action_rss(struct mlx5_flow_rss_desc *rss_desc,
973                                 const struct rte_flow_action *action)
974 {
975         const struct rte_flow_action_rss *rss = action->conf;
976         const uint8_t *rss_key;
977
978         memcpy(rss_desc->queue, rss->queue, rss->queue_num * sizeof(uint16_t));
979         rss_desc->queue_num = rss->queue_num;
980         /* NULL RSS key indicates default RSS key. */
981         rss_key = !rss->key ? rss_hash_default_key : rss->key;
982         memcpy(rss_desc->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
983         /*
984          * rss->level and rss.types should be set in advance when expanding
985          * items for RSS.
986          */
987 }
988
989 /**
990  * Convert the @p action into a Verbs specification. This function assumes that
991  * the input is valid and that there is space to insert the requested action
992  * into the flow.
993  *
994  * @param[in] dev_flow
995  *   Pointer to mlx5_flow.
996  * @param[in] action
997  *   Action configuration.
998  */
999 static void
1000 flow_verbs_translate_action_flag
1001         (struct mlx5_flow *dev_flow,
1002          const struct rte_flow_action *action __rte_unused)
1003 {
1004         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1005         struct ibv_flow_spec_action_tag tag = {
1006                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1007                 .size = size,
1008                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1009         };
1010
1011         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1012 }
1013
1014 /**
1015  * Convert the @p action into a Verbs specification. This function assumes that
1016  * the input is valid and that there is space to insert the requested action
1017  * into the flow.
1018  *
1019  * @param[in] dev_flow
1020  *   Pointer to mlx5_flow.
1021  * @param[in] action
1022  *   Action configuration.
1023  */
1024 static void
1025 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
1026                                  const struct rte_flow_action *action)
1027 {
1028         const struct rte_flow_action_mark *mark = action->conf;
1029         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1030         struct ibv_flow_spec_action_tag tag = {
1031                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1032                 .size = size,
1033                 .tag_id = mlx5_flow_mark_set(mark->id),
1034         };
1035
1036         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1037 }
1038
1039 /**
1040  * Convert the @p action into a Verbs specification. This function assumes that
1041  * the input is valid and that there is space to insert the requested action
1042  * into the flow.
1043  *
1044  * @param[in] dev
1045  *   Pointer to the Ethernet device structure.
1046  * @param[in] action
1047  *   Action configuration.
1048  * @param[in] dev_flow
1049  *   Pointer to mlx5_flow.
1050  * @param[out] error
1051  *   Pointer to error structure.
1052  *
1053  * @return
1054  *   0 On success else a negative errno value is returned and rte_errno is set.
1055  */
1056 static int
1057 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
1058                                   const struct rte_flow_action *action,
1059                                   struct rte_eth_dev *dev,
1060                                   struct rte_flow_error *error)
1061 {
1062         const struct rte_flow_action_count *count = action->conf;
1063         struct rte_flow *flow = dev_flow->flow;
1064 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1065         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1066         struct mlx5_flow_counter_pool *pool;
1067         struct mlx5_flow_counter *cnt = NULL;
1068         struct mlx5_flow_counter_ext *cnt_ext;
1069         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1070         struct ibv_flow_spec_counter_action counter = {
1071                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1072                 .size = size,
1073         };
1074 #endif
1075
1076         if (!flow->counter) {
1077                 flow->counter = flow_verbs_counter_new(dev, count->shared,
1078                                                        count->id);
1079                 if (!flow->counter)
1080                         return rte_flow_error_set(error, rte_errno,
1081                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1082                                                   action,
1083                                                   "cannot get counter"
1084                                                   " context.");
1085         }
1086 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
1087         cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1088         cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
1089         counter.counter_set_handle = cnt_ext->cs->handle;
1090         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1091 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1092         cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1093         cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
1094         counter.counters = cnt_ext->cs;
1095         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1096 #endif
1097         return 0;
1098 }
1099
1100 /**
1101  * Internal validation function. For validating both actions and items.
1102  *
1103  * @param[in] dev
1104  *   Pointer to the Ethernet device structure.
1105  * @param[in] attr
1106  *   Pointer to the flow attributes.
1107  * @param[in] items
1108  *   Pointer to the list of items.
1109  * @param[in] actions
1110  *   Pointer to the list of actions.
1111  * @param[in] external
1112  *   This flow rule is created by request external to PMD.
1113  * @param[in] hairpin
1114  *   Number of hairpin TX actions, 0 means classic flow.
1115  * @param[out] error
1116  *   Pointer to the error structure.
1117  *
1118  * @return
1119  *   0 on success, a negative errno value otherwise and rte_errno is set.
1120  */
1121 static int
1122 flow_verbs_validate(struct rte_eth_dev *dev,
1123                     const struct rte_flow_attr *attr,
1124                     const struct rte_flow_item items[],
1125                     const struct rte_flow_action actions[],
1126                     bool external __rte_unused,
1127                     int hairpin __rte_unused,
1128                     struct rte_flow_error *error)
1129 {
1130         int ret;
1131         uint64_t action_flags = 0;
1132         uint64_t item_flags = 0;
1133         uint64_t last_item = 0;
1134         uint8_t next_protocol = 0xff;
1135         uint16_t ether_type = 0;
1136
1137         if (items == NULL)
1138                 return -1;
1139         ret = mlx5_flow_validate_attributes(dev, attr, error);
1140         if (ret < 0)
1141                 return ret;
1142         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1143                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1144                 int ret = 0;
1145
1146                 switch (items->type) {
1147                 case RTE_FLOW_ITEM_TYPE_VOID:
1148                         break;
1149                 case RTE_FLOW_ITEM_TYPE_ETH:
1150                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1151                                                           error);
1152                         if (ret < 0)
1153                                 return ret;
1154                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1155                                              MLX5_FLOW_LAYER_OUTER_L2;
1156                         if (items->mask != NULL && items->spec != NULL) {
1157                                 ether_type =
1158                                         ((const struct rte_flow_item_eth *)
1159                                          items->spec)->type;
1160                                 ether_type &=
1161                                         ((const struct rte_flow_item_eth *)
1162                                          items->mask)->type;
1163                                 ether_type = rte_be_to_cpu_16(ether_type);
1164                         } else {
1165                                 ether_type = 0;
1166                         }
1167                         break;
1168                 case RTE_FLOW_ITEM_TYPE_VLAN:
1169                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1170                                                            dev, error);
1171                         if (ret < 0)
1172                                 return ret;
1173                         last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1174                                               MLX5_FLOW_LAYER_INNER_VLAN) :
1175                                              (MLX5_FLOW_LAYER_OUTER_L2 |
1176                                               MLX5_FLOW_LAYER_OUTER_VLAN);
1177                         if (items->mask != NULL && items->spec != NULL) {
1178                                 ether_type =
1179                                         ((const struct rte_flow_item_vlan *)
1180                                          items->spec)->inner_type;
1181                                 ether_type &=
1182                                         ((const struct rte_flow_item_vlan *)
1183                                          items->mask)->inner_type;
1184                                 ether_type = rte_be_to_cpu_16(ether_type);
1185                         } else {
1186                                 ether_type = 0;
1187                         }
1188                         break;
1189                 case RTE_FLOW_ITEM_TYPE_IPV4:
1190                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1191                                                            last_item,
1192                                                            ether_type, NULL,
1193                                                            error);
1194                         if (ret < 0)
1195                                 return ret;
1196                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1197                                              MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1198                         if (items->mask != NULL &&
1199                             ((const struct rte_flow_item_ipv4 *)
1200                              items->mask)->hdr.next_proto_id) {
1201                                 next_protocol =
1202                                         ((const struct rte_flow_item_ipv4 *)
1203                                          (items->spec))->hdr.next_proto_id;
1204                                 next_protocol &=
1205                                         ((const struct rte_flow_item_ipv4 *)
1206                                          (items->mask))->hdr.next_proto_id;
1207                         } else {
1208                                 /* Reset for inner layer. */
1209                                 next_protocol = 0xff;
1210                         }
1211                         break;
1212                 case RTE_FLOW_ITEM_TYPE_IPV6:
1213                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1214                                                            last_item,
1215                                                            ether_type, NULL,
1216                                                            error);
1217                         if (ret < 0)
1218                                 return ret;
1219                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1220                                              MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1221                         if (items->mask != NULL &&
1222                             ((const struct rte_flow_item_ipv6 *)
1223                              items->mask)->hdr.proto) {
1224                                 next_protocol =
1225                                         ((const struct rte_flow_item_ipv6 *)
1226                                          items->spec)->hdr.proto;
1227                                 next_protocol &=
1228                                         ((const struct rte_flow_item_ipv6 *)
1229                                          items->mask)->hdr.proto;
1230                         } else {
1231                                 /* Reset for inner layer. */
1232                                 next_protocol = 0xff;
1233                         }
1234                         break;
1235                 case RTE_FLOW_ITEM_TYPE_UDP:
1236                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1237                                                           next_protocol,
1238                                                           error);
1239                         if (ret < 0)
1240                                 return ret;
1241                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1242                                              MLX5_FLOW_LAYER_OUTER_L4_UDP;
1243                         break;
1244                 case RTE_FLOW_ITEM_TYPE_TCP:
1245                         ret = mlx5_flow_validate_item_tcp
1246                                                 (items, item_flags,
1247                                                  next_protocol,
1248                                                  &rte_flow_item_tcp_mask,
1249                                                  error);
1250                         if (ret < 0)
1251                                 return ret;
1252                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1253                                              MLX5_FLOW_LAYER_OUTER_L4_TCP;
1254                         break;
1255                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1256                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1257                                                             error);
1258                         if (ret < 0)
1259                                 return ret;
1260                         last_item = MLX5_FLOW_LAYER_VXLAN;
1261                         break;
1262                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1263                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
1264                                                                 item_flags,
1265                                                                 dev, error);
1266                         if (ret < 0)
1267                                 return ret;
1268                         last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1269                         break;
1270                 case RTE_FLOW_ITEM_TYPE_GRE:
1271                         ret = mlx5_flow_validate_item_gre(items, item_flags,
1272                                                           next_protocol, error);
1273                         if (ret < 0)
1274                                 return ret;
1275                         last_item = MLX5_FLOW_LAYER_GRE;
1276                         break;
1277                 case RTE_FLOW_ITEM_TYPE_MPLS:
1278                         ret = mlx5_flow_validate_item_mpls(dev, items,
1279                                                            item_flags,
1280                                                            last_item, error);
1281                         if (ret < 0)
1282                                 return ret;
1283                         last_item = MLX5_FLOW_LAYER_MPLS;
1284                         break;
1285                 default:
1286                         return rte_flow_error_set(error, ENOTSUP,
1287                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1288                                                   NULL, "item not supported");
1289                 }
1290                 item_flags |= last_item;
1291         }
1292         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1293                 switch (actions->type) {
1294                 case RTE_FLOW_ACTION_TYPE_VOID:
1295                         break;
1296                 case RTE_FLOW_ACTION_TYPE_FLAG:
1297                         ret = mlx5_flow_validate_action_flag(action_flags,
1298                                                              attr,
1299                                                              error);
1300                         if (ret < 0)
1301                                 return ret;
1302                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1303                         break;
1304                 case RTE_FLOW_ACTION_TYPE_MARK:
1305                         ret = mlx5_flow_validate_action_mark(actions,
1306                                                              action_flags,
1307                                                              attr,
1308                                                              error);
1309                         if (ret < 0)
1310                                 return ret;
1311                         action_flags |= MLX5_FLOW_ACTION_MARK;
1312                         break;
1313                 case RTE_FLOW_ACTION_TYPE_DROP:
1314                         ret = mlx5_flow_validate_action_drop(action_flags,
1315                                                              attr,
1316                                                              error);
1317                         if (ret < 0)
1318                                 return ret;
1319                         action_flags |= MLX5_FLOW_ACTION_DROP;
1320                         break;
1321                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1322                         ret = mlx5_flow_validate_action_queue(actions,
1323                                                               action_flags, dev,
1324                                                               attr,
1325                                                               error);
1326                         if (ret < 0)
1327                                 return ret;
1328                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1329                         break;
1330                 case RTE_FLOW_ACTION_TYPE_RSS:
1331                         ret = mlx5_flow_validate_action_rss(actions,
1332                                                             action_flags, dev,
1333                                                             attr, item_flags,
1334                                                             error);
1335                         if (ret < 0)
1336                                 return ret;
1337                         action_flags |= MLX5_FLOW_ACTION_RSS;
1338                         break;
1339                 case RTE_FLOW_ACTION_TYPE_COUNT:
1340                         ret = mlx5_flow_validate_action_count(dev, attr, error);
1341                         if (ret < 0)
1342                                 return ret;
1343                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1344                         break;
1345                 default:
1346                         return rte_flow_error_set(error, ENOTSUP,
1347                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1348                                                   actions,
1349                                                   "action not supported");
1350                 }
1351         }
1352         /*
1353          * Validate the drop action mutual exclusion with other actions.
1354          * Drop action is mutually-exclusive with any other action, except for
1355          * Count action.
1356          */
1357         if ((action_flags & MLX5_FLOW_ACTION_DROP) &&
1358             (action_flags & ~(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_COUNT)))
1359                 return rte_flow_error_set(error, EINVAL,
1360                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1361                                           "Drop action is mutually-exclusive "
1362                                           "with any other action, except for "
1363                                           "Count action");
1364         if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1365                 return rte_flow_error_set(error, EINVAL,
1366                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1367                                           "no fate action is found");
1368         return 0;
1369 }
1370
1371 /**
1372  * Calculate the required bytes that are needed for the action part of the verbs
1373  * flow.
1374  *
1375  * @param[in] actions
1376  *   Pointer to the list of actions.
1377  *
1378  * @return
1379  *   The size of the memory needed for all actions.
1380  */
1381 static int
1382 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1383 {
1384         int size = 0;
1385
1386         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1387                 switch (actions->type) {
1388                 case RTE_FLOW_ACTION_TYPE_VOID:
1389                         break;
1390                 case RTE_FLOW_ACTION_TYPE_FLAG:
1391                         size += sizeof(struct ibv_flow_spec_action_tag);
1392                         break;
1393                 case RTE_FLOW_ACTION_TYPE_MARK:
1394                         size += sizeof(struct ibv_flow_spec_action_tag);
1395                         break;
1396                 case RTE_FLOW_ACTION_TYPE_DROP:
1397                         size += sizeof(struct ibv_flow_spec_action_drop);
1398                         break;
1399                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1400                         break;
1401                 case RTE_FLOW_ACTION_TYPE_RSS:
1402                         break;
1403                 case RTE_FLOW_ACTION_TYPE_COUNT:
1404 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1405         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1406                         size += sizeof(struct ibv_flow_spec_counter_action);
1407 #endif
1408                         break;
1409                 default:
1410                         break;
1411                 }
1412         }
1413         return size;
1414 }
1415
1416 /**
1417  * Calculate the required bytes that are needed for the item part of the verbs
1418  * flow.
1419  *
1420  * @param[in] items
1421  *   Pointer to the list of items.
1422  *
1423  * @return
1424  *   The size of the memory needed for all items.
1425  */
1426 static int
1427 flow_verbs_get_items_size(const struct rte_flow_item items[])
1428 {
1429         int size = 0;
1430
1431         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1432                 switch (items->type) {
1433                 case RTE_FLOW_ITEM_TYPE_VOID:
1434                         break;
1435                 case RTE_FLOW_ITEM_TYPE_ETH:
1436                         size += sizeof(struct ibv_flow_spec_eth);
1437                         break;
1438                 case RTE_FLOW_ITEM_TYPE_VLAN:
1439                         size += sizeof(struct ibv_flow_spec_eth);
1440                         break;
1441                 case RTE_FLOW_ITEM_TYPE_IPV4:
1442                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
1443                         break;
1444                 case RTE_FLOW_ITEM_TYPE_IPV6:
1445                         size += sizeof(struct ibv_flow_spec_ipv6);
1446                         break;
1447                 case RTE_FLOW_ITEM_TYPE_UDP:
1448                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1449                         break;
1450                 case RTE_FLOW_ITEM_TYPE_TCP:
1451                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1452                         break;
1453                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1454                         size += sizeof(struct ibv_flow_spec_tunnel);
1455                         break;
1456                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1457                         size += sizeof(struct ibv_flow_spec_tunnel);
1458                         break;
1459 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1460                 case RTE_FLOW_ITEM_TYPE_GRE:
1461                         size += sizeof(struct ibv_flow_spec_gre);
1462                         break;
1463                 case RTE_FLOW_ITEM_TYPE_MPLS:
1464                         size += sizeof(struct ibv_flow_spec_mpls);
1465                         break;
1466 #else
1467                 case RTE_FLOW_ITEM_TYPE_GRE:
1468                         size += sizeof(struct ibv_flow_spec_tunnel);
1469                         break;
1470 #endif
1471                 default:
1472                         break;
1473                 }
1474         }
1475         return size;
1476 }
1477
1478 /**
1479  * Internal preparation function. Allocate mlx5_flow with the required size.
1480  * The required size is calculate based on the actions and items. This function
1481  * also returns the detected actions and items for later use.
1482  *
1483  * @param[in] dev
1484  *   Pointer to Ethernet device.
1485  * @param[in] attr
1486  *   Pointer to the flow attributes.
1487  * @param[in] items
1488  *   Pointer to the list of items.
1489  * @param[in] actions
1490  *   Pointer to the list of actions.
1491  * @param[out] error
1492  *   Pointer to the error structure.
1493  *
1494  * @return
1495  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1496  *   is set.
1497  */
1498 static struct mlx5_flow *
1499 flow_verbs_prepare(struct rte_eth_dev *dev,
1500                    const struct rte_flow_attr *attr __rte_unused,
1501                    const struct rte_flow_item items[],
1502                    const struct rte_flow_action actions[],
1503                    struct rte_flow_error *error)
1504 {
1505         size_t size = 0;
1506         uint32_t handle_idx = 0;
1507         struct mlx5_flow *dev_flow;
1508         struct mlx5_flow_handle *dev_handle;
1509         struct mlx5_priv *priv = dev->data->dev_private;
1510
1511         size += flow_verbs_get_actions_size(actions);
1512         size += flow_verbs_get_items_size(items);
1513         if (size > MLX5_VERBS_MAX_SPEC_ACT_SIZE) {
1514                 rte_flow_error_set(error, E2BIG,
1515                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1516                                    "Verbs spec/action size too large");
1517                 return NULL;
1518         }
1519         /* In case of corrupting the memory. */
1520         if (priv->flow_idx >= MLX5_NUM_MAX_DEV_FLOWS) {
1521                 rte_flow_error_set(error, ENOSPC,
1522                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1523                                    "not free temporary device flow");
1524                 return NULL;
1525         }
1526         dev_handle = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1527                                    &handle_idx);
1528         if (!dev_handle) {
1529                 rte_flow_error_set(error, ENOMEM,
1530                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1531                                    "not enough memory to create flow handle");
1532                 return NULL;
1533         }
1534         /* No multi-thread supporting. */
1535         dev_flow = &((struct mlx5_flow *)priv->inter_flows)[priv->flow_idx++];
1536         dev_flow->handle = dev_handle;
1537         dev_flow->handle_idx = handle_idx;
1538         /* Memcpy is used, only size needs to be cleared to 0. */
1539         dev_flow->verbs.size = 0;
1540         dev_flow->verbs.attr.num_of_specs = 0;
1541         dev_flow->ingress = attr->ingress;
1542         dev_flow->hash_fields = 0;
1543         /* Need to set transfer attribute: not supported in Verbs mode. */
1544         return dev_flow;
1545 }
1546
1547 /**
1548  * Fill the flow with verb spec.
1549  *
1550  * @param[in] dev
1551  *   Pointer to Ethernet device.
1552  * @param[in, out] dev_flow
1553  *   Pointer to the mlx5 flow.
1554  * @param[in] attr
1555  *   Pointer to the flow attributes.
1556  * @param[in] items
1557  *   Pointer to the list of items.
1558  * @param[in] actions
1559  *   Pointer to the list of actions.
1560  * @param[out] error
1561  *   Pointer to the error structure.
1562  *
1563  * @return
1564  *   0 on success, else a negative errno value otherwise and rte_errno is set.
1565  */
1566 static int
1567 flow_verbs_translate(struct rte_eth_dev *dev,
1568                      struct mlx5_flow *dev_flow,
1569                      const struct rte_flow_attr *attr,
1570                      const struct rte_flow_item items[],
1571                      const struct rte_flow_action actions[],
1572                      struct rte_flow_error *error)
1573 {
1574         uint64_t item_flags = 0;
1575         uint64_t action_flags = 0;
1576         uint64_t priority = attr->priority;
1577         uint32_t subpriority = 0;
1578         struct mlx5_priv *priv = dev->data->dev_private;
1579         struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
1580                                               priv->rss_desc)
1581                                               [!!priv->flow_nested_idx];
1582
1583         if (priority == MLX5_FLOW_PRIO_RSVD)
1584                 priority = priv->config.flow_prio - 1;
1585         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1586                 int ret;
1587
1588                 switch (actions->type) {
1589                 case RTE_FLOW_ACTION_TYPE_VOID:
1590                         break;
1591                 case RTE_FLOW_ACTION_TYPE_FLAG:
1592                         flow_verbs_translate_action_flag(dev_flow, actions);
1593                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1594                         dev_flow->handle->mark = 1;
1595                         break;
1596                 case RTE_FLOW_ACTION_TYPE_MARK:
1597                         flow_verbs_translate_action_mark(dev_flow, actions);
1598                         action_flags |= MLX5_FLOW_ACTION_MARK;
1599                         dev_flow->handle->mark = 1;
1600                         break;
1601                 case RTE_FLOW_ACTION_TYPE_DROP:
1602                         flow_verbs_translate_action_drop(dev_flow, actions);
1603                         action_flags |= MLX5_FLOW_ACTION_DROP;
1604                         dev_flow->handle->fate_action = MLX5_FLOW_FATE_DROP;
1605                         break;
1606                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1607                         flow_verbs_translate_action_queue(rss_desc, actions);
1608                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1609                         dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1610                         break;
1611                 case RTE_FLOW_ACTION_TYPE_RSS:
1612                         flow_verbs_translate_action_rss(rss_desc, actions);
1613                         action_flags |= MLX5_FLOW_ACTION_RSS;
1614                         dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1615                         break;
1616                 case RTE_FLOW_ACTION_TYPE_COUNT:
1617                         ret = flow_verbs_translate_action_count(dev_flow,
1618                                                                 actions,
1619                                                                 dev, error);
1620                         if (ret < 0)
1621                                 return ret;
1622                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1623                         break;
1624                 default:
1625                         return rte_flow_error_set(error, ENOTSUP,
1626                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1627                                                   actions,
1628                                                   "action not supported");
1629                 }
1630         }
1631         dev_flow->act_flags = action_flags;
1632         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1633                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1634
1635                 switch (items->type) {
1636                 case RTE_FLOW_ITEM_TYPE_VOID:
1637                         break;
1638                 case RTE_FLOW_ITEM_TYPE_ETH:
1639                         flow_verbs_translate_item_eth(dev_flow, items,
1640                                                       item_flags);
1641                         subpriority = MLX5_PRIORITY_MAP_L2;
1642                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1643                                                MLX5_FLOW_LAYER_OUTER_L2;
1644                         break;
1645                 case RTE_FLOW_ITEM_TYPE_VLAN:
1646                         flow_verbs_translate_item_vlan(dev_flow, items,
1647                                                        item_flags);
1648                         subpriority = MLX5_PRIORITY_MAP_L2;
1649                         item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1650                                                 MLX5_FLOW_LAYER_INNER_VLAN) :
1651                                                (MLX5_FLOW_LAYER_OUTER_L2 |
1652                                                 MLX5_FLOW_LAYER_OUTER_VLAN);
1653                         break;
1654                 case RTE_FLOW_ITEM_TYPE_IPV4:
1655                         flow_verbs_translate_item_ipv4(dev_flow, items,
1656                                                        item_flags);
1657                         subpriority = MLX5_PRIORITY_MAP_L3;
1658                         dev_flow->hash_fields |=
1659                                 mlx5_flow_hashfields_adjust
1660                                         (rss_desc, tunnel,
1661                                          MLX5_IPV4_LAYER_TYPES,
1662                                          MLX5_IPV4_IBV_RX_HASH);
1663                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1664                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1665                         break;
1666                 case RTE_FLOW_ITEM_TYPE_IPV6:
1667                         flow_verbs_translate_item_ipv6(dev_flow, items,
1668                                                        item_flags);
1669                         subpriority = MLX5_PRIORITY_MAP_L3;
1670                         dev_flow->hash_fields |=
1671                                 mlx5_flow_hashfields_adjust
1672                                         (rss_desc, tunnel,
1673                                          MLX5_IPV6_LAYER_TYPES,
1674                                          MLX5_IPV6_IBV_RX_HASH);
1675                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1676                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1677                         break;
1678                 case RTE_FLOW_ITEM_TYPE_TCP:
1679                         flow_verbs_translate_item_tcp(dev_flow, items,
1680                                                       item_flags);
1681                         subpriority = MLX5_PRIORITY_MAP_L4;
1682                         dev_flow->hash_fields |=
1683                                 mlx5_flow_hashfields_adjust
1684                                         (rss_desc, tunnel, ETH_RSS_TCP,
1685                                          (IBV_RX_HASH_SRC_PORT_TCP |
1686                                           IBV_RX_HASH_DST_PORT_TCP));
1687                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1688                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1689                         break;
1690                 case RTE_FLOW_ITEM_TYPE_UDP:
1691                         flow_verbs_translate_item_udp(dev_flow, items,
1692                                                       item_flags);
1693                         subpriority = MLX5_PRIORITY_MAP_L4;
1694                         dev_flow->hash_fields |=
1695                                 mlx5_flow_hashfields_adjust
1696                                         (rss_desc, tunnel, ETH_RSS_UDP,
1697                                          (IBV_RX_HASH_SRC_PORT_UDP |
1698                                           IBV_RX_HASH_DST_PORT_UDP));
1699                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1700                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1701                         break;
1702                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1703                         flow_verbs_translate_item_vxlan(dev_flow, items,
1704                                                         item_flags);
1705                         subpriority = MLX5_PRIORITY_MAP_L2;
1706                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1707                         break;
1708                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1709                         flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1710                                                             item_flags);
1711                         subpriority = MLX5_PRIORITY_MAP_L2;
1712                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1713                         break;
1714                 case RTE_FLOW_ITEM_TYPE_GRE:
1715                         flow_verbs_translate_item_gre(dev_flow, items,
1716                                                       item_flags);
1717                         subpriority = MLX5_PRIORITY_MAP_L2;
1718                         item_flags |= MLX5_FLOW_LAYER_GRE;
1719                         break;
1720                 case RTE_FLOW_ITEM_TYPE_MPLS:
1721                         flow_verbs_translate_item_mpls(dev_flow, items,
1722                                                        item_flags);
1723                         subpriority = MLX5_PRIORITY_MAP_L2;
1724                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1725                         break;
1726                 default:
1727                         return rte_flow_error_set(error, ENOTSUP,
1728                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1729                                                   NULL,
1730                                                   "item not supported");
1731                 }
1732         }
1733         dev_flow->handle->layers = item_flags;
1734         /* Other members of attr will be ignored. */
1735         dev_flow->verbs.attr.priority =
1736                 mlx5_flow_adjust_priority(dev, priority, subpriority);
1737         dev_flow->verbs.attr.port = (uint8_t)priv->ibv_port;
1738         return 0;
1739 }
1740
1741 /**
1742  * Remove the flow from the NIC but keeps it in memory.
1743  *
1744  * @param[in] dev
1745  *   Pointer to the Ethernet device structure.
1746  * @param[in, out] flow
1747  *   Pointer to flow structure.
1748  */
1749 static void
1750 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1751 {
1752         struct mlx5_priv *priv = dev->data->dev_private;
1753         struct mlx5_flow_handle *handle;
1754         uint32_t handle_idx;
1755
1756         if (!flow)
1757                 return;
1758         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1759                        handle_idx, handle, next) {
1760                 if (handle->ib_flow) {
1761                         claim_zero(mlx5_glue->destroy_flow(handle->ib_flow));
1762                         handle->ib_flow = NULL;
1763                 }
1764                 /* hrxq is union, don't touch it only the flag is set. */
1765                 if (handle->rix_hrxq) {
1766                         if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1767                                 mlx5_hrxq_drop_release(dev);
1768                                 handle->rix_hrxq = 0;
1769                         } else if (handle->fate_action ==
1770                                    MLX5_FLOW_FATE_QUEUE) {
1771                                 mlx5_hrxq_release(dev, handle->rix_hrxq);
1772                                 handle->rix_hrxq = 0;
1773                         }
1774                 }
1775                 if (handle->vf_vlan.tag && handle->vf_vlan.created)
1776                         mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
1777         }
1778 }
1779
1780 /**
1781  * Remove the flow from the NIC and the memory.
1782  *
1783  * @param[in] dev
1784  *   Pointer to the Ethernet device structure.
1785  * @param[in, out] flow
1786  *   Pointer to flow structure.
1787  */
1788 static void
1789 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1790 {
1791         struct mlx5_priv *priv = dev->data->dev_private;
1792         struct mlx5_flow_handle *handle;
1793
1794         if (!flow)
1795                 return;
1796         flow_verbs_remove(dev, flow);
1797         while (flow->dev_handles) {
1798                 uint32_t tmp_idx = flow->dev_handles;
1799
1800                 handle = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1801                                    tmp_idx);
1802                 if (!handle)
1803                         return;
1804                 flow->dev_handles = handle->next.next;
1805                 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1806                            tmp_idx);
1807         }
1808         if (flow->counter) {
1809                 flow_verbs_counter_release(dev, flow->counter);
1810                 flow->counter = 0;
1811         }
1812 }
1813
1814 /**
1815  * Apply the flow to the NIC.
1816  *
1817  * @param[in] dev
1818  *   Pointer to the Ethernet device structure.
1819  * @param[in, out] flow
1820  *   Pointer to flow structure.
1821  * @param[out] error
1822  *   Pointer to error structure.
1823  *
1824  * @return
1825  *   0 on success, a negative errno value otherwise and rte_errno is set.
1826  */
1827 static int
1828 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1829                  struct rte_flow_error *error)
1830 {
1831         struct mlx5_priv *priv = dev->data->dev_private;
1832         struct mlx5_flow_handle *handle;
1833         struct mlx5_flow *dev_flow;
1834         struct mlx5_hrxq *hrxq;
1835         uint32_t dev_handles;
1836         int err;
1837         int idx;
1838
1839         for (idx = priv->flow_idx - 1; idx >= priv->flow_nested_idx; idx--) {
1840                 dev_flow = &((struct mlx5_flow *)priv->inter_flows)[idx];
1841                 handle = dev_flow->handle;
1842                 if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1843                         hrxq = mlx5_hrxq_drop_new(dev);
1844                         if (!hrxq) {
1845                                 rte_flow_error_set
1846                                         (error, errno,
1847                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1848                                          "cannot get drop hash queue");
1849                                 goto error;
1850                         }
1851                 } else {
1852                         uint32_t hrxq_idx;
1853                         struct mlx5_flow_rss_desc *rss_desc =
1854                                 &((struct mlx5_flow_rss_desc *)priv->rss_desc)
1855                                 [!!priv->flow_nested_idx];
1856
1857                         MLX5_ASSERT(rss_desc->queue_num);
1858                         hrxq_idx = mlx5_hrxq_get(dev, rss_desc->key,
1859                                              MLX5_RSS_HASH_KEY_LEN,
1860                                              dev_flow->hash_fields,
1861                                              rss_desc->queue,
1862                                              rss_desc->queue_num);
1863                         if (!hrxq_idx)
1864                                 hrxq_idx = mlx5_hrxq_new(dev, rss_desc->key,
1865                                                 MLX5_RSS_HASH_KEY_LEN,
1866                                                 dev_flow->hash_fields,
1867                                                 rss_desc->queue,
1868                                                 rss_desc->queue_num,
1869                                                 !!(handle->layers &
1870                                                 MLX5_FLOW_LAYER_TUNNEL));
1871                         hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1872                                          hrxq_idx);
1873                         if (!hrxq) {
1874                                 rte_flow_error_set
1875                                         (error, rte_errno,
1876                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1877                                          "cannot get hash queue");
1878                                 goto error;
1879                         }
1880                         handle->rix_hrxq = hrxq_idx;
1881                 }
1882                 MLX5_ASSERT(hrxq);
1883                 handle->ib_flow = mlx5_glue->create_flow(hrxq->qp,
1884                                                      &dev_flow->verbs.attr);
1885                 if (!handle->ib_flow) {
1886                         rte_flow_error_set(error, errno,
1887                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1888                                            NULL,
1889                                            "hardware refuses to create flow");
1890                         goto error;
1891                 }
1892                 if (priv->vmwa_context &&
1893                     handle->vf_vlan.tag && !handle->vf_vlan.created) {
1894                         /*
1895                          * The rule contains the VLAN pattern.
1896                          * For VF we are going to create VLAN
1897                          * interface to make hypervisor set correct
1898                          * e-Switch vport context.
1899                          */
1900                         mlx5_vlan_vmwa_acquire(dev, &handle->vf_vlan);
1901                 }
1902         }
1903         return 0;
1904 error:
1905         err = rte_errno; /* Save rte_errno before cleanup. */
1906         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1907                        dev_handles, handle, next) {
1908                 /* hrxq is union, don't touch it only the flag is set. */
1909                 if (handle->rix_hrxq) {
1910                         if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1911                                 mlx5_hrxq_drop_release(dev);
1912                                 handle->rix_hrxq = 0;
1913                         } else if (handle->fate_action ==
1914                                    MLX5_FLOW_FATE_QUEUE) {
1915                                 mlx5_hrxq_release(dev, handle->rix_hrxq);
1916                                 handle->rix_hrxq = 0;
1917                         }
1918                 }
1919                 if (handle->vf_vlan.tag && handle->vf_vlan.created)
1920                         mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
1921         }
1922         rte_errno = err; /* Restore rte_errno. */
1923         return -rte_errno;
1924 }
1925
1926 /**
1927  * Query a flow.
1928  *
1929  * @see rte_flow_query()
1930  * @see rte_flow_ops
1931  */
1932 static int
1933 flow_verbs_query(struct rte_eth_dev *dev,
1934                  struct rte_flow *flow,
1935                  const struct rte_flow_action *actions,
1936                  void *data,
1937                  struct rte_flow_error *error)
1938 {
1939         int ret = -EINVAL;
1940
1941         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1942                 switch (actions->type) {
1943                 case RTE_FLOW_ACTION_TYPE_VOID:
1944                         break;
1945                 case RTE_FLOW_ACTION_TYPE_COUNT:
1946                         ret = flow_verbs_counter_query(dev, flow, data, error);
1947                         break;
1948                 default:
1949                         return rte_flow_error_set(error, ENOTSUP,
1950                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1951                                                   actions,
1952                                                   "action not supported");
1953                 }
1954         }
1955         return ret;
1956 }
1957
1958 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
1959         .validate = flow_verbs_validate,
1960         .prepare = flow_verbs_prepare,
1961         .translate = flow_verbs_translate,
1962         .apply = flow_verbs_apply,
1963         .remove = flow_verbs_remove,
1964         .destroy = flow_verbs_destroy,
1965         .query = flow_verbs_query,
1966 };