0bb17b5e01a18ae3b5a89e7a7bc487064d4795fc
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 #include <rte_common.h>
12 #include <rte_ether.h>
13 #include <rte_ethdev_driver.h>
14 #include <rte_flow.h>
15 #include <rte_flow_driver.h>
16 #include <rte_malloc.h>
17 #include <rte_ip.h>
18
19 #include <mlx5_glue.h>
20 #include <mlx5_prm.h>
21 #include <mlx5_malloc.h>
22
23 #include "mlx5_defs.h"
24 #include "mlx5.h"
25 #include "mlx5_flow.h"
26 #include "mlx5_rxtx.h"
27
28 #define VERBS_SPEC_INNER(item_flags) \
29         (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
30
31 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
32 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
33         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
34 };
35
36 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
37 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
38         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
39         { 9, 10, 11 }, { 12, 13, 14 },
40 };
41
42 /**
43  * Discover the maximum number of priority available.
44  *
45  * @param[in] dev
46  *   Pointer to the Ethernet device structure.
47  *
48  * @return
49  *   number of supported flow priority on success, a negative errno
50  *   value otherwise and rte_errno is set.
51  */
52 int
53 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
54 {
55         struct mlx5_priv *priv = dev->data->dev_private;
56         struct {
57                 struct ibv_flow_attr attr;
58                 struct ibv_flow_spec_eth eth;
59                 struct ibv_flow_spec_action_drop drop;
60         } flow_attr = {
61                 .attr = {
62                         .num_of_specs = 2,
63                         .port = (uint8_t)priv->dev_port,
64                 },
65                 .eth = {
66                         .type = IBV_FLOW_SPEC_ETH,
67                         .size = sizeof(struct ibv_flow_spec_eth),
68                 },
69                 .drop = {
70                         .size = sizeof(struct ibv_flow_spec_action_drop),
71                         .type = IBV_FLOW_SPEC_ACTION_DROP,
72                 },
73         };
74         struct ibv_flow *flow;
75         struct mlx5_hrxq *drop = mlx5_drop_action_create(dev);
76         uint16_t vprio[] = { 8, 16 };
77         int i;
78         int priority = 0;
79
80         if (!drop) {
81                 rte_errno = ENOTSUP;
82                 return -rte_errno;
83         }
84         for (i = 0; i != RTE_DIM(vprio); i++) {
85                 flow_attr.attr.priority = vprio[i] - 1;
86                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
87                 if (!flow)
88                         break;
89                 claim_zero(mlx5_glue->destroy_flow(flow));
90                 priority = vprio[i];
91         }
92         mlx5_drop_action_destroy(dev);
93         switch (priority) {
94         case 8:
95                 priority = RTE_DIM(priority_map_3);
96                 break;
97         case 16:
98                 priority = RTE_DIM(priority_map_5);
99                 break;
100         default:
101                 rte_errno = ENOTSUP;
102                 DRV_LOG(ERR,
103                         "port %u verbs maximum priority: %d expected 8/16",
104                         dev->data->port_id, priority);
105                 return -rte_errno;
106         }
107         DRV_LOG(INFO, "port %u flow maximum priority: %d",
108                 dev->data->port_id, priority);
109         return priority;
110 }
111
112 /**
113  * Adjust flow priority based on the highest layer and the request priority.
114  *
115  * @param[in] dev
116  *   Pointer to the Ethernet device structure.
117  * @param[in] priority
118  *   The rule base priority.
119  * @param[in] subpriority
120  *   The priority based on the items.
121  *
122  * @return
123  *   The new priority.
124  */
125 uint32_t
126 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
127                                    uint32_t subpriority)
128 {
129         uint32_t res = 0;
130         struct mlx5_priv *priv = dev->data->dev_private;
131
132         switch (priv->config.flow_prio) {
133         case RTE_DIM(priority_map_3):
134                 res = priority_map_3[priority][subpriority];
135                 break;
136         case RTE_DIM(priority_map_5):
137                 res = priority_map_5[priority][subpriority];
138                 break;
139         }
140         return  res;
141 }
142
143 /**
144  * Get Verbs flow counter by index.
145  *
146  * @param[in] dev
147  *   Pointer to the Ethernet device structure.
148  * @param[in] idx
149  *   mlx5 flow counter index in the container.
150  * @param[out] ppool
151  *   mlx5 flow counter pool in the container,
152  *
153  * @return
154  *   A pointer to the counter, NULL otherwise.
155  */
156 static struct mlx5_flow_counter *
157 flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev,
158                               uint32_t idx,
159                               struct mlx5_flow_counter_pool **ppool)
160 {
161         struct mlx5_priv *priv = dev->data->dev_private;
162         struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
163         struct mlx5_flow_counter_pool *pool;
164
165         idx = (idx - 1) & (MLX5_CNT_SHARED_OFFSET - 1);
166         pool = cmng->pools[idx / MLX5_COUNTERS_PER_POOL];
167         MLX5_ASSERT(pool);
168         if (ppool)
169                 *ppool = pool;
170         return MLX5_POOL_GET_CNT(pool, idx % MLX5_COUNTERS_PER_POOL);
171 }
172
173 /**
174  * Create Verbs flow counter with Verbs library.
175  *
176  * @param[in] dev
177  *   Pointer to the Ethernet device structure.
178  * @param[in, out] counter
179  *   mlx5 flow counter object, contains the counter id,
180  *   handle of created Verbs flow counter is returned
181  *   in cs field (if counters are supported).
182  *
183  * @return
184  *   0 On success else a negative errno value is returned
185  *   and rte_errno is set.
186  */
187 static int
188 flow_verbs_counter_create(struct rte_eth_dev *dev,
189                           struct mlx5_flow_counter_ext *counter)
190 {
191 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
192         struct mlx5_priv *priv = dev->data->dev_private;
193         struct ibv_context *ctx = priv->sh->ctx;
194         struct ibv_counter_set_init_attr init = {
195                          .counter_set_id = counter->id};
196
197         counter->cs = mlx5_glue->create_counter_set(ctx, &init);
198         if (!counter->cs) {
199                 rte_errno = ENOTSUP;
200                 return -ENOTSUP;
201         }
202         return 0;
203 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
204         struct mlx5_priv *priv = dev->data->dev_private;
205         struct ibv_context *ctx = priv->sh->ctx;
206         struct ibv_counters_init_attr init = {0};
207         struct ibv_counter_attach_attr attach;
208         int ret;
209
210         memset(&attach, 0, sizeof(attach));
211         counter->cs = mlx5_glue->create_counters(ctx, &init);
212         if (!counter->cs) {
213                 rte_errno = ENOTSUP;
214                 return -ENOTSUP;
215         }
216         attach.counter_desc = IBV_COUNTER_PACKETS;
217         attach.index = 0;
218         ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
219         if (!ret) {
220                 attach.counter_desc = IBV_COUNTER_BYTES;
221                 attach.index = 1;
222                 ret = mlx5_glue->attach_counters
223                                         (counter->cs, &attach, NULL);
224         }
225         if (ret) {
226                 claim_zero(mlx5_glue->destroy_counters(counter->cs));
227                 counter->cs = NULL;
228                 rte_errno = ret;
229                 return -ret;
230         }
231         return 0;
232 #else
233         (void)dev;
234         (void)counter;
235         rte_errno = ENOTSUP;
236         return -ENOTSUP;
237 #endif
238 }
239
240 /**
241  * Get a flow counter.
242  *
243  * @param[in] dev
244  *   Pointer to the Ethernet device structure.
245  * @param[in] shared
246  *   Indicate if this counter is shared with other flows.
247  * @param[in] id
248  *   Counter identifier.
249  *
250  * @return
251  *   Index to the counter, 0 otherwise and rte_errno is set.
252  */
253 static uint32_t
254 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
255 {
256         struct mlx5_priv *priv = dev->data->dev_private;
257         struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
258         struct mlx5_flow_counter_pool *pool = NULL;
259         struct mlx5_flow_counter_ext *cnt_ext = NULL;
260         struct mlx5_flow_counter *cnt = NULL;
261         union mlx5_l3t_data data;
262         uint32_t n_valid = cmng->n_valid;
263         uint32_t pool_idx, cnt_idx;
264         uint32_t i;
265         int ret;
266
267         if (shared && !mlx5_l3t_get_entry(priv->sh->cnt_id_tbl, id, &data) &&
268             data.dword)
269                 return data.dword;
270         for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) {
271                 pool = cmng->pools[pool_idx];
272                 if (!pool)
273                         continue;
274                 cnt = TAILQ_FIRST(&pool->counters[0]);
275                 if (cnt)
276                         break;
277         }
278         if (!cnt) {
279                 struct mlx5_flow_counter_pool **pools;
280                 uint32_t size;
281
282                 if (n_valid == cmng->n) {
283                         /* Resize the container pool array. */
284                         size = sizeof(struct mlx5_flow_counter_pool *) *
285                                      (n_valid + MLX5_CNT_CONTAINER_RESIZE);
286                         pools = mlx5_malloc(MLX5_MEM_ZERO, size, 0,
287                                             SOCKET_ID_ANY);
288                         if (!pools)
289                                 return 0;
290                         if (n_valid) {
291                                 memcpy(pools, cmng->pools,
292                                        sizeof(struct mlx5_flow_counter_pool *) *
293                                        n_valid);
294                                 mlx5_free(cmng->pools);
295                         }
296                         cmng->pools = pools;
297                         cmng->n += MLX5_CNT_CONTAINER_RESIZE;
298                 }
299                 /* Allocate memory for new pool*/
300                 size = sizeof(*pool) + (sizeof(*cnt_ext) + sizeof(*cnt)) *
301                        MLX5_COUNTERS_PER_POOL;
302                 pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
303                 if (!pool)
304                         return 0;
305                 pool->type |= MLX5_CNT_POOL_TYPE_EXT;
306                 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
307                         cnt = MLX5_POOL_GET_CNT(pool, i);
308                         TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
309                 }
310                 cnt = MLX5_POOL_GET_CNT(pool, 0);
311                 cmng->pools[n_valid] = pool;
312                 pool_idx = n_valid;
313                 cmng->n_valid++;
314         }
315         i = MLX5_CNT_ARRAY_IDX(pool, cnt);
316         cnt_idx = MLX5_MAKE_CNT_IDX(pool_idx, i);
317         if (shared) {
318                 data.dword = cnt_idx;
319                 if (mlx5_l3t_set_entry(priv->sh->cnt_id_tbl, id, &data))
320                         return 0;
321                 cnt->shared_info.id = id;
322                 cnt_idx |= MLX5_CNT_SHARED_OFFSET;
323         }
324         cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
325         cnt->hits = 0;
326         cnt->bytes = 0;
327         /* Create counter with Verbs. */
328         ret = flow_verbs_counter_create(dev, cnt_ext);
329         if (!ret) {
330                 TAILQ_REMOVE(&pool->counters[0], cnt, next);
331                 return cnt_idx;
332         }
333         /* Some error occurred in Verbs library. */
334         rte_errno = -ret;
335         return 0;
336 }
337
338 /**
339  * Release a flow counter.
340  *
341  * @param[in] dev
342  *   Pointer to the Ethernet device structure.
343  * @param[in] counter
344  *   Index to the counter handler.
345  */
346 static void
347 flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter)
348 {
349         struct mlx5_priv *priv = dev->data->dev_private;
350         struct mlx5_flow_counter_pool *pool;
351         struct mlx5_flow_counter *cnt;
352         struct mlx5_flow_counter_ext *cnt_ext;
353
354         cnt = flow_verbs_counter_get_by_idx(dev, counter, &pool);
355         if (IS_SHARED_CNT(counter) &&
356             mlx5_l3t_clear_entry(priv->sh->cnt_id_tbl, cnt->shared_info.id))
357                 return;
358         cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
359 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
360         claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs));
361         cnt_ext->cs = NULL;
362 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
363         claim_zero(mlx5_glue->destroy_counters(cnt_ext->cs));
364         cnt_ext->cs = NULL;
365 #endif
366         (void)cnt_ext;
367         TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
368 }
369
370 /**
371  * Query a flow counter via Verbs library call.
372  *
373  * @see rte_flow_query()
374  * @see rte_flow_ops
375  */
376 static int
377 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
378                          struct rte_flow *flow, void *data,
379                          struct rte_flow_error *error)
380 {
381 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
382         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
383         if (flow->counter) {
384                 struct mlx5_flow_counter_pool *pool;
385                 struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx
386                                                 (dev, flow->counter, &pool);
387                 struct mlx5_flow_counter_ext *cnt_ext = MLX5_CNT_TO_CNT_EXT
388                                                 (pool, cnt);
389                 struct rte_flow_query_count *qc = data;
390                 uint64_t counters[2] = {0, 0};
391 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
392                 struct ibv_query_counter_set_attr query_cs_attr = {
393                         .cs = cnt_ext->cs,
394                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
395                 };
396                 struct ibv_counter_set_data query_out = {
397                         .out = counters,
398                         .outlen = 2 * sizeof(uint64_t),
399                 };
400                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
401                                                        &query_out);
402 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
403                 int err = mlx5_glue->query_counters
404                                (cnt_ext->cs, counters,
405                                 RTE_DIM(counters),
406                                 IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
407 #endif
408                 if (err)
409                         return rte_flow_error_set
410                                 (error, err,
411                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
412                                  NULL,
413                                  "cannot read counter");
414                 qc->hits_set = 1;
415                 qc->bytes_set = 1;
416                 qc->hits = counters[0] - cnt->hits;
417                 qc->bytes = counters[1] - cnt->bytes;
418                 if (qc->reset) {
419                         cnt->hits = counters[0];
420                         cnt->bytes = counters[1];
421                 }
422                 return 0;
423         }
424         return rte_flow_error_set(error, EINVAL,
425                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
426                                   NULL,
427                                   "flow does not have counter");
428 #else
429         (void)flow;
430         (void)data;
431         return rte_flow_error_set(error, ENOTSUP,
432                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
433                                   NULL,
434                                   "counters are not available");
435 #endif
436 }
437
438 /**
439  * Add a verbs item specification into @p verbs.
440  *
441  * @param[out] verbs
442  *   Pointer to verbs structure.
443  * @param[in] src
444  *   Create specification.
445  * @param[in] size
446  *   Size in bytes of the specification to copy.
447  */
448 static void
449 flow_verbs_spec_add(struct mlx5_flow_verbs_workspace *verbs,
450                     void *src, unsigned int size)
451 {
452         void *dst;
453
454         if (!verbs)
455                 return;
456         MLX5_ASSERT(verbs->specs);
457         dst = (void *)(verbs->specs + verbs->size);
458         memcpy(dst, src, size);
459         ++verbs->attr.num_of_specs;
460         verbs->size += size;
461 }
462
463 /**
464  * Convert the @p item into a Verbs specification. This function assumes that
465  * the input is valid and that there is space to insert the requested item
466  * into the flow.
467  *
468  * @param[in, out] dev_flow
469  *   Pointer to dev_flow structure.
470  * @param[in] item
471  *   Item specification.
472  * @param[in] item_flags
473  *   Parsed item flags.
474  */
475 static void
476 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
477                               const struct rte_flow_item *item,
478                               uint64_t item_flags)
479 {
480         const struct rte_flow_item_eth *spec = item->spec;
481         const struct rte_flow_item_eth *mask = item->mask;
482         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
483         struct ibv_flow_spec_eth eth = {
484                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
485                 .size = size,
486         };
487
488         if (!mask)
489                 mask = &rte_flow_item_eth_mask;
490         if (spec) {
491                 unsigned int i;
492
493                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes,
494                         RTE_ETHER_ADDR_LEN);
495                 memcpy(&eth.val.src_mac, spec->src.addr_bytes,
496                         RTE_ETHER_ADDR_LEN);
497                 eth.val.ether_type = spec->type;
498                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes,
499                         RTE_ETHER_ADDR_LEN);
500                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes,
501                         RTE_ETHER_ADDR_LEN);
502                 eth.mask.ether_type = mask->type;
503                 /* Remove unwanted bits from values. */
504                 for (i = 0; i < RTE_ETHER_ADDR_LEN; ++i) {
505                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
506                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
507                 }
508                 eth.val.ether_type &= eth.mask.ether_type;
509         }
510         flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
511 }
512
513 /**
514  * Update the VLAN tag in the Verbs Ethernet specification.
515  * This function assumes that the input is valid and there is space to add
516  * the requested item.
517  *
518  * @param[in, out] attr
519  *   Pointer to Verbs attributes structure.
520  * @param[in] eth
521  *   Verbs structure containing the VLAN information to copy.
522  */
523 static void
524 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
525                             struct ibv_flow_spec_eth *eth)
526 {
527         unsigned int i;
528         const enum ibv_flow_spec_type search = eth->type;
529         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
530                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
531
532         for (i = 0; i != attr->num_of_specs; ++i) {
533                 if (hdr->type == search) {
534                         struct ibv_flow_spec_eth *e =
535                                 (struct ibv_flow_spec_eth *)hdr;
536
537                         e->val.vlan_tag = eth->val.vlan_tag;
538                         e->mask.vlan_tag = eth->mask.vlan_tag;
539                         e->val.ether_type = eth->val.ether_type;
540                         e->mask.ether_type = eth->mask.ether_type;
541                         break;
542                 }
543                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
544         }
545 }
546
547 /**
548  * Convert the @p item into a Verbs specification. This function assumes that
549  * the input is valid and that there is space to insert the requested item
550  * into the flow.
551  *
552  * @param[in, out] dev_flow
553  *   Pointer to dev_flow structure.
554  * @param[in] item
555  *   Item specification.
556  * @param[in] item_flags
557  *   Parsed item flags.
558  */
559 static void
560 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
561                                const struct rte_flow_item *item,
562                                uint64_t item_flags)
563 {
564         const struct rte_flow_item_vlan *spec = item->spec;
565         const struct rte_flow_item_vlan *mask = item->mask;
566         unsigned int size = sizeof(struct ibv_flow_spec_eth);
567         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
568         struct ibv_flow_spec_eth eth = {
569                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
570                 .size = size,
571         };
572         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
573                                       MLX5_FLOW_LAYER_OUTER_L2;
574
575         if (!mask)
576                 mask = &rte_flow_item_vlan_mask;
577         if (spec) {
578                 eth.val.vlan_tag = spec->tci;
579                 eth.mask.vlan_tag = mask->tci;
580                 eth.val.vlan_tag &= eth.mask.vlan_tag;
581                 eth.val.ether_type = spec->inner_type;
582                 eth.mask.ether_type = mask->inner_type;
583                 eth.val.ether_type &= eth.mask.ether_type;
584         }
585         if (!(item_flags & l2m))
586                 flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
587         else
588                 flow_verbs_item_vlan_update(&dev_flow->verbs.attr, &eth);
589         if (!tunnel)
590                 dev_flow->handle->vf_vlan.tag =
591                         rte_be_to_cpu_16(spec->tci) & 0x0fff;
592 }
593
594 /**
595  * Convert the @p item into a Verbs specification. This function assumes that
596  * the input is valid and that there is space to insert the requested item
597  * into the flow.
598  *
599  * @param[in, out] dev_flow
600  *   Pointer to dev_flow structure.
601  * @param[in] item
602  *   Item specification.
603  * @param[in] item_flags
604  *   Parsed item flags.
605  */
606 static void
607 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
608                                const struct rte_flow_item *item,
609                                uint64_t item_flags)
610 {
611         const struct rte_flow_item_ipv4 *spec = item->spec;
612         const struct rte_flow_item_ipv4 *mask = item->mask;
613         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
614         struct ibv_flow_spec_ipv4_ext ipv4 = {
615                 .type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
616                 .size = size,
617         };
618
619         if (!mask)
620                 mask = &rte_flow_item_ipv4_mask;
621         if (spec) {
622                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
623                         .src_ip = spec->hdr.src_addr,
624                         .dst_ip = spec->hdr.dst_addr,
625                         .proto = spec->hdr.next_proto_id,
626                         .tos = spec->hdr.type_of_service,
627                 };
628                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
629                         .src_ip = mask->hdr.src_addr,
630                         .dst_ip = mask->hdr.dst_addr,
631                         .proto = mask->hdr.next_proto_id,
632                         .tos = mask->hdr.type_of_service,
633                 };
634                 /* Remove unwanted bits from values. */
635                 ipv4.val.src_ip &= ipv4.mask.src_ip;
636                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
637                 ipv4.val.proto &= ipv4.mask.proto;
638                 ipv4.val.tos &= ipv4.mask.tos;
639         }
640         flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
641 }
642
643 /**
644  * Convert the @p item into a Verbs specification. This function assumes that
645  * the input is valid and that there is space to insert the requested item
646  * into the flow.
647  *
648  * @param[in, out] dev_flow
649  *   Pointer to dev_flow structure.
650  * @param[in] item
651  *   Item specification.
652  * @param[in] item_flags
653  *   Parsed item flags.
654  */
655 static void
656 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
657                                const struct rte_flow_item *item,
658                                uint64_t item_flags)
659 {
660         const struct rte_flow_item_ipv6 *spec = item->spec;
661         const struct rte_flow_item_ipv6 *mask = item->mask;
662         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
663         struct ibv_flow_spec_ipv6 ipv6 = {
664                 .type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
665                 .size = size,
666         };
667
668         if (!mask)
669                 mask = &rte_flow_item_ipv6_mask;
670         if (spec) {
671                 unsigned int i;
672                 uint32_t vtc_flow_val;
673                 uint32_t vtc_flow_mask;
674
675                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
676                        RTE_DIM(ipv6.val.src_ip));
677                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
678                        RTE_DIM(ipv6.val.dst_ip));
679                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
680                        RTE_DIM(ipv6.mask.src_ip));
681                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
682                        RTE_DIM(ipv6.mask.dst_ip));
683                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
684                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
685                 ipv6.val.flow_label =
686                         rte_cpu_to_be_32((vtc_flow_val & RTE_IPV6_HDR_FL_MASK) >>
687                                          RTE_IPV6_HDR_FL_SHIFT);
688                 ipv6.val.traffic_class = (vtc_flow_val & RTE_IPV6_HDR_TC_MASK) >>
689                                          RTE_IPV6_HDR_TC_SHIFT;
690                 ipv6.val.next_hdr = spec->hdr.proto;
691                 ipv6.mask.flow_label =
692                         rte_cpu_to_be_32((vtc_flow_mask & RTE_IPV6_HDR_FL_MASK) >>
693                                          RTE_IPV6_HDR_FL_SHIFT);
694                 ipv6.mask.traffic_class = (vtc_flow_mask & RTE_IPV6_HDR_TC_MASK) >>
695                                           RTE_IPV6_HDR_TC_SHIFT;
696                 ipv6.mask.next_hdr = mask->hdr.proto;
697                 /* Remove unwanted bits from values. */
698                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
699                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
700                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
701                 }
702                 ipv6.val.flow_label &= ipv6.mask.flow_label;
703                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
704                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
705         }
706         flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
707 }
708
709 /**
710  * Convert the @p item into a Verbs specification. This function assumes that
711  * the input is valid and that there is space to insert the requested item
712  * into the flow.
713  *
714  * @param[in, out] dev_flow
715  *   Pointer to dev_flow structure.
716  * @param[in] item
717  *   Item specification.
718  * @param[in] item_flags
719  *   Parsed item flags.
720  */
721 static void
722 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
723                               const struct rte_flow_item *item,
724                               uint64_t item_flags __rte_unused)
725 {
726         const struct rte_flow_item_tcp *spec = item->spec;
727         const struct rte_flow_item_tcp *mask = item->mask;
728         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
729         struct ibv_flow_spec_tcp_udp tcp = {
730                 .type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
731                 .size = size,
732         };
733
734         if (!mask)
735                 mask = &rte_flow_item_tcp_mask;
736         if (spec) {
737                 tcp.val.dst_port = spec->hdr.dst_port;
738                 tcp.val.src_port = spec->hdr.src_port;
739                 tcp.mask.dst_port = mask->hdr.dst_port;
740                 tcp.mask.src_port = mask->hdr.src_port;
741                 /* Remove unwanted bits from values. */
742                 tcp.val.src_port &= tcp.mask.src_port;
743                 tcp.val.dst_port &= tcp.mask.dst_port;
744         }
745         flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
746 }
747
748 /**
749  * Convert the @p item into a Verbs specification. This function assumes that
750  * the input is valid and that there is space to insert the requested item
751  * into the flow.
752  *
753  * @param[in, out] dev_flow
754  *   Pointer to dev_flow structure.
755  * @param[in] item
756  *   Item specification.
757  * @param[in] item_flags
758  *   Parsed item flags.
759  */
760 static void
761 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
762                               const struct rte_flow_item *item,
763                               uint64_t item_flags __rte_unused)
764 {
765         const struct rte_flow_item_udp *spec = item->spec;
766         const struct rte_flow_item_udp *mask = item->mask;
767         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
768         struct ibv_flow_spec_tcp_udp udp = {
769                 .type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
770                 .size = size,
771         };
772
773         if (!mask)
774                 mask = &rte_flow_item_udp_mask;
775         if (spec) {
776                 udp.val.dst_port = spec->hdr.dst_port;
777                 udp.val.src_port = spec->hdr.src_port;
778                 udp.mask.dst_port = mask->hdr.dst_port;
779                 udp.mask.src_port = mask->hdr.src_port;
780                 /* Remove unwanted bits from values. */
781                 udp.val.src_port &= udp.mask.src_port;
782                 udp.val.dst_port &= udp.mask.dst_port;
783         }
784         item++;
785         while (item->type == RTE_FLOW_ITEM_TYPE_VOID)
786                 item++;
787         if (!(udp.val.dst_port & udp.mask.dst_port)) {
788                 switch ((item)->type) {
789                 case RTE_FLOW_ITEM_TYPE_VXLAN:
790                         udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN);
791                         udp.mask.dst_port = 0xffff;
792                         break;
793                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
794                         udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN_GPE);
795                         udp.mask.dst_port = 0xffff;
796                         break;
797                 case RTE_FLOW_ITEM_TYPE_MPLS:
798                         udp.val.dst_port = htons(MLX5_UDP_PORT_MPLS);
799                         udp.mask.dst_port = 0xffff;
800                         break;
801                 default:
802                         break;
803                 }
804         }
805
806         flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
807 }
808
809 /**
810  * Convert the @p item into a Verbs specification. This function assumes that
811  * the input is valid and that there is space to insert the requested item
812  * into the flow.
813  *
814  * @param[in, out] dev_flow
815  *   Pointer to dev_flow structure.
816  * @param[in] item
817  *   Item specification.
818  * @param[in] item_flags
819  *   Parsed item flags.
820  */
821 static void
822 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
823                                 const struct rte_flow_item *item,
824                                 uint64_t item_flags __rte_unused)
825 {
826         const struct rte_flow_item_vxlan *spec = item->spec;
827         const struct rte_flow_item_vxlan *mask = item->mask;
828         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
829         struct ibv_flow_spec_tunnel vxlan = {
830                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
831                 .size = size,
832         };
833         union vni {
834                 uint32_t vlan_id;
835                 uint8_t vni[4];
836         } id = { .vlan_id = 0, };
837
838         if (!mask)
839                 mask = &rte_flow_item_vxlan_mask;
840         if (spec) {
841                 memcpy(&id.vni[1], spec->vni, 3);
842                 vxlan.val.tunnel_id = id.vlan_id;
843                 memcpy(&id.vni[1], mask->vni, 3);
844                 vxlan.mask.tunnel_id = id.vlan_id;
845                 /* Remove unwanted bits from values. */
846                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
847         }
848         flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
849 }
850
851 /**
852  * Convert the @p item into a Verbs specification. This function assumes that
853  * the input is valid and that there is space to insert the requested item
854  * into the flow.
855  *
856  * @param[in, out] dev_flow
857  *   Pointer to dev_flow structure.
858  * @param[in] item
859  *   Item specification.
860  * @param[in] item_flags
861  *   Parsed item flags.
862  */
863 static void
864 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
865                                     const struct rte_flow_item *item,
866                                     uint64_t item_flags __rte_unused)
867 {
868         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
869         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
870         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
871         struct ibv_flow_spec_tunnel vxlan_gpe = {
872                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
873                 .size = size,
874         };
875         union vni {
876                 uint32_t vlan_id;
877                 uint8_t vni[4];
878         } id = { .vlan_id = 0, };
879
880         if (!mask)
881                 mask = &rte_flow_item_vxlan_gpe_mask;
882         if (spec) {
883                 memcpy(&id.vni[1], spec->vni, 3);
884                 vxlan_gpe.val.tunnel_id = id.vlan_id;
885                 memcpy(&id.vni[1], mask->vni, 3);
886                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
887                 /* Remove unwanted bits from values. */
888                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
889         }
890         flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
891 }
892
893 /**
894  * Update the protocol in Verbs IPv4/IPv6 spec.
895  *
896  * @param[in, out] attr
897  *   Pointer to Verbs attributes structure.
898  * @param[in] search
899  *   Specification type to search in order to update the IP protocol.
900  * @param[in] protocol
901  *   Protocol value to set if none is present in the specification.
902  */
903 static void
904 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
905                                        enum ibv_flow_spec_type search,
906                                        uint8_t protocol)
907 {
908         unsigned int i;
909         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
910                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
911
912         if (!attr)
913                 return;
914         for (i = 0; i != attr->num_of_specs; ++i) {
915                 if (hdr->type == search) {
916                         union {
917                                 struct ibv_flow_spec_ipv4_ext *ipv4;
918                                 struct ibv_flow_spec_ipv6 *ipv6;
919                         } ip;
920
921                         switch (search) {
922                         case IBV_FLOW_SPEC_IPV4_EXT:
923                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
924                                 if (!ip.ipv4->val.proto) {
925                                         ip.ipv4->val.proto = protocol;
926                                         ip.ipv4->mask.proto = 0xff;
927                                 }
928                                 break;
929                         case IBV_FLOW_SPEC_IPV6:
930                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
931                                 if (!ip.ipv6->val.next_hdr) {
932                                         ip.ipv6->val.next_hdr = protocol;
933                                         ip.ipv6->mask.next_hdr = 0xff;
934                                 }
935                                 break;
936                         default:
937                                 break;
938                         }
939                         break;
940                 }
941                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
942         }
943 }
944
945 /**
946  * Convert the @p item into a Verbs specification. This function assumes that
947  * the input is valid and that there is space to insert the requested item
948  * into the flow.
949  *
950  * @param[in, out] dev_flow
951  *   Pointer to dev_flow structure.
952  * @param[in] item
953  *   Item specification.
954  * @param[in] item_flags
955  *   Parsed item flags.
956  */
957 static void
958 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
959                               const struct rte_flow_item *item __rte_unused,
960                               uint64_t item_flags)
961 {
962         struct mlx5_flow_verbs_workspace *verbs = &dev_flow->verbs;
963 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
964         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
965         struct ibv_flow_spec_tunnel tunnel = {
966                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
967                 .size = size,
968         };
969 #else
970         const struct rte_flow_item_gre *spec = item->spec;
971         const struct rte_flow_item_gre *mask = item->mask;
972         unsigned int size = sizeof(struct ibv_flow_spec_gre);
973         struct ibv_flow_spec_gre tunnel = {
974                 .type = IBV_FLOW_SPEC_GRE,
975                 .size = size,
976         };
977
978         if (!mask)
979                 mask = &rte_flow_item_gre_mask;
980         if (spec) {
981                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
982                 tunnel.val.protocol = spec->protocol;
983                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
984                 tunnel.mask.protocol = mask->protocol;
985                 /* Remove unwanted bits from values. */
986                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
987                 tunnel.val.protocol &= tunnel.mask.protocol;
988                 tunnel.val.key &= tunnel.mask.key;
989         }
990 #endif
991         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
992                 flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
993                                                        IBV_FLOW_SPEC_IPV4_EXT,
994                                                        IPPROTO_GRE);
995         else
996                 flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
997                                                        IBV_FLOW_SPEC_IPV6,
998                                                        IPPROTO_GRE);
999         flow_verbs_spec_add(verbs, &tunnel, size);
1000 }
1001
1002 /**
1003  * Convert the @p action into a Verbs specification. This function assumes that
1004  * the input is valid and that there is space to insert the requested action
1005  * into the flow. This function also return the action that was added.
1006  *
1007  * @param[in, out] dev_flow
1008  *   Pointer to dev_flow structure.
1009  * @param[in] item
1010  *   Item specification.
1011  * @param[in] item_flags
1012  *   Parsed item flags.
1013  */
1014 static void
1015 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
1016                                const struct rte_flow_item *item __rte_unused,
1017                                uint64_t item_flags __rte_unused)
1018 {
1019 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1020         const struct rte_flow_item_mpls *spec = item->spec;
1021         const struct rte_flow_item_mpls *mask = item->mask;
1022         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
1023         struct ibv_flow_spec_mpls mpls = {
1024                 .type = IBV_FLOW_SPEC_MPLS,
1025                 .size = size,
1026         };
1027
1028         if (!mask)
1029                 mask = &rte_flow_item_mpls_mask;
1030         if (spec) {
1031                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
1032                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
1033                 /* Remove unwanted bits from values.  */
1034                 mpls.val.label &= mpls.mask.label;
1035         }
1036         flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
1037 #endif
1038 }
1039
1040 /**
1041  * Convert the @p action into a Verbs specification. This function assumes that
1042  * the input is valid and that there is space to insert the requested action
1043  * into the flow.
1044  *
1045  * @param[in] dev_flow
1046  *   Pointer to mlx5_flow.
1047  * @param[in] action
1048  *   Action configuration.
1049  */
1050 static void
1051 flow_verbs_translate_action_drop
1052         (struct mlx5_flow *dev_flow,
1053          const struct rte_flow_action *action __rte_unused)
1054 {
1055         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1056         struct ibv_flow_spec_action_drop drop = {
1057                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1058                         .size = size,
1059         };
1060
1061         flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
1062 }
1063
1064 /**
1065  * Convert the @p action into a Verbs specification. This function assumes that
1066  * the input is valid and that there is space to insert the requested action
1067  * into the flow.
1068  *
1069  * @param[in] rss_desc
1070  *   Pointer to mlx5_flow_rss_desc.
1071  * @param[in] action
1072  *   Action configuration.
1073  */
1074 static void
1075 flow_verbs_translate_action_queue(struct mlx5_flow_rss_desc *rss_desc,
1076                                   const struct rte_flow_action *action)
1077 {
1078         const struct rte_flow_action_queue *queue = action->conf;
1079
1080         rss_desc->queue[0] = queue->index;
1081         rss_desc->queue_num = 1;
1082 }
1083
1084 /**
1085  * Convert the @p action into a Verbs specification. This function assumes that
1086  * the input is valid and that there is space to insert the requested action
1087  * into the flow.
1088  *
1089  * @param[in] rss_desc
1090  *   Pointer to mlx5_flow_rss_desc.
1091  * @param[in] action
1092  *   Action configuration.
1093  */
1094 static void
1095 flow_verbs_translate_action_rss(struct mlx5_flow_rss_desc *rss_desc,
1096                                 const struct rte_flow_action *action)
1097 {
1098         const struct rte_flow_action_rss *rss = action->conf;
1099         const uint8_t *rss_key;
1100
1101         memcpy(rss_desc->queue, rss->queue, rss->queue_num * sizeof(uint16_t));
1102         rss_desc->queue_num = rss->queue_num;
1103         /* NULL RSS key indicates default RSS key. */
1104         rss_key = !rss->key ? rss_hash_default_key : rss->key;
1105         memcpy(rss_desc->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
1106         /*
1107          * rss->level and rss.types should be set in advance when expanding
1108          * items for RSS.
1109          */
1110 }
1111
1112 /**
1113  * Convert the @p action into a Verbs specification. This function assumes that
1114  * the input is valid and that there is space to insert the requested action
1115  * into the flow.
1116  *
1117  * @param[in] dev_flow
1118  *   Pointer to mlx5_flow.
1119  * @param[in] action
1120  *   Action configuration.
1121  */
1122 static void
1123 flow_verbs_translate_action_flag
1124         (struct mlx5_flow *dev_flow,
1125          const struct rte_flow_action *action __rte_unused)
1126 {
1127         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1128         struct ibv_flow_spec_action_tag tag = {
1129                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1130                 .size = size,
1131                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1132         };
1133
1134         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1135 }
1136
1137 /**
1138  * Convert the @p action into a Verbs specification. This function assumes that
1139  * the input is valid and that there is space to insert the requested action
1140  * into the flow.
1141  *
1142  * @param[in] dev_flow
1143  *   Pointer to mlx5_flow.
1144  * @param[in] action
1145  *   Action configuration.
1146  */
1147 static void
1148 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
1149                                  const struct rte_flow_action *action)
1150 {
1151         const struct rte_flow_action_mark *mark = action->conf;
1152         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1153         struct ibv_flow_spec_action_tag tag = {
1154                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1155                 .size = size,
1156                 .tag_id = mlx5_flow_mark_set(mark->id),
1157         };
1158
1159         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1160 }
1161
1162 /**
1163  * Convert the @p action into a Verbs specification. This function assumes that
1164  * the input is valid and that there is space to insert the requested action
1165  * into the flow.
1166  *
1167  * @param[in] dev
1168  *   Pointer to the Ethernet device structure.
1169  * @param[in] action
1170  *   Action configuration.
1171  * @param[in] dev_flow
1172  *   Pointer to mlx5_flow.
1173  * @param[out] error
1174  *   Pointer to error structure.
1175  *
1176  * @return
1177  *   0 On success else a negative errno value is returned and rte_errno is set.
1178  */
1179 static int
1180 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
1181                                   const struct rte_flow_action *action,
1182                                   struct rte_eth_dev *dev,
1183                                   struct rte_flow_error *error)
1184 {
1185         const struct rte_flow_action_count *count = action->conf;
1186         struct rte_flow *flow = dev_flow->flow;
1187 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1188         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1189         struct mlx5_flow_counter_pool *pool;
1190         struct mlx5_flow_counter *cnt = NULL;
1191         struct mlx5_flow_counter_ext *cnt_ext;
1192         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1193         struct ibv_flow_spec_counter_action counter = {
1194                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1195                 .size = size,
1196         };
1197 #endif
1198
1199         if (!flow->counter) {
1200                 flow->counter = flow_verbs_counter_new(dev, count->shared,
1201                                                        count->id);
1202                 if (!flow->counter)
1203                         return rte_flow_error_set(error, rte_errno,
1204                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1205                                                   action,
1206                                                   "cannot get counter"
1207                                                   " context.");
1208         }
1209 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
1210         cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1211         cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
1212         counter.counter_set_handle = cnt_ext->cs->handle;
1213         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1214 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1215         cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1216         cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
1217         counter.counters = cnt_ext->cs;
1218         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1219 #endif
1220         return 0;
1221 }
1222
1223 /**
1224  * Internal validation function. For validating both actions and items.
1225  *
1226  * @param[in] dev
1227  *   Pointer to the Ethernet device structure.
1228  * @param[in] attr
1229  *   Pointer to the flow attributes.
1230  * @param[in] items
1231  *   Pointer to the list of items.
1232  * @param[in] actions
1233  *   Pointer to the list of actions.
1234  * @param[in] external
1235  *   This flow rule is created by request external to PMD.
1236  * @param[in] hairpin
1237  *   Number of hairpin TX actions, 0 means classic flow.
1238  * @param[out] error
1239  *   Pointer to the error structure.
1240  *
1241  * @return
1242  *   0 on success, a negative errno value otherwise and rte_errno is set.
1243  */
1244 static int
1245 flow_verbs_validate(struct rte_eth_dev *dev,
1246                     const struct rte_flow_attr *attr,
1247                     const struct rte_flow_item items[],
1248                     const struct rte_flow_action actions[],
1249                     bool external __rte_unused,
1250                     int hairpin __rte_unused,
1251                     struct rte_flow_error *error)
1252 {
1253         int ret;
1254         uint64_t action_flags = 0;
1255         uint64_t item_flags = 0;
1256         uint64_t last_item = 0;
1257         uint8_t next_protocol = 0xff;
1258         uint16_t ether_type = 0;
1259
1260         if (items == NULL)
1261                 return -1;
1262         ret = mlx5_flow_validate_attributes(dev, attr, error);
1263         if (ret < 0)
1264                 return ret;
1265         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1266                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1267                 int ret = 0;
1268
1269                 switch (items->type) {
1270                 case RTE_FLOW_ITEM_TYPE_VOID:
1271                         break;
1272                 case RTE_FLOW_ITEM_TYPE_ETH:
1273                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1274                                                           error);
1275                         if (ret < 0)
1276                                 return ret;
1277                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1278                                              MLX5_FLOW_LAYER_OUTER_L2;
1279                         if (items->mask != NULL && items->spec != NULL) {
1280                                 ether_type =
1281                                         ((const struct rte_flow_item_eth *)
1282                                          items->spec)->type;
1283                                 ether_type &=
1284                                         ((const struct rte_flow_item_eth *)
1285                                          items->mask)->type;
1286                                 ether_type = rte_be_to_cpu_16(ether_type);
1287                         } else {
1288                                 ether_type = 0;
1289                         }
1290                         break;
1291                 case RTE_FLOW_ITEM_TYPE_VLAN:
1292                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1293                                                            dev, error);
1294                         if (ret < 0)
1295                                 return ret;
1296                         last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1297                                               MLX5_FLOW_LAYER_INNER_VLAN) :
1298                                              (MLX5_FLOW_LAYER_OUTER_L2 |
1299                                               MLX5_FLOW_LAYER_OUTER_VLAN);
1300                         if (items->mask != NULL && items->spec != NULL) {
1301                                 ether_type =
1302                                         ((const struct rte_flow_item_vlan *)
1303                                          items->spec)->inner_type;
1304                                 ether_type &=
1305                                         ((const struct rte_flow_item_vlan *)
1306                                          items->mask)->inner_type;
1307                                 ether_type = rte_be_to_cpu_16(ether_type);
1308                         } else {
1309                                 ether_type = 0;
1310                         }
1311                         break;
1312                 case RTE_FLOW_ITEM_TYPE_IPV4:
1313                         ret = mlx5_flow_validate_item_ipv4
1314                                                 (items, item_flags,
1315                                                  last_item, ether_type, NULL,
1316                                                  MLX5_ITEM_RANGE_NOT_ACCEPTED,
1317                                                  error);
1318                         if (ret < 0)
1319                                 return ret;
1320                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1321                                              MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1322                         if (items->mask != NULL &&
1323                             ((const struct rte_flow_item_ipv4 *)
1324                              items->mask)->hdr.next_proto_id) {
1325                                 next_protocol =
1326                                         ((const struct rte_flow_item_ipv4 *)
1327                                          (items->spec))->hdr.next_proto_id;
1328                                 next_protocol &=
1329                                         ((const struct rte_flow_item_ipv4 *)
1330                                          (items->mask))->hdr.next_proto_id;
1331                         } else {
1332                                 /* Reset for inner layer. */
1333                                 next_protocol = 0xff;
1334                         }
1335                         break;
1336                 case RTE_FLOW_ITEM_TYPE_IPV6:
1337                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1338                                                            last_item,
1339                                                            ether_type, NULL,
1340                                                            error);
1341                         if (ret < 0)
1342                                 return ret;
1343                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1344                                              MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1345                         if (items->mask != NULL &&
1346                             ((const struct rte_flow_item_ipv6 *)
1347                              items->mask)->hdr.proto) {
1348                                 next_protocol =
1349                                         ((const struct rte_flow_item_ipv6 *)
1350                                          items->spec)->hdr.proto;
1351                                 next_protocol &=
1352                                         ((const struct rte_flow_item_ipv6 *)
1353                                          items->mask)->hdr.proto;
1354                         } else {
1355                                 /* Reset for inner layer. */
1356                                 next_protocol = 0xff;
1357                         }
1358                         break;
1359                 case RTE_FLOW_ITEM_TYPE_UDP:
1360                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1361                                                           next_protocol,
1362                                                           error);
1363                         if (ret < 0)
1364                                 return ret;
1365                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1366                                              MLX5_FLOW_LAYER_OUTER_L4_UDP;
1367                         break;
1368                 case RTE_FLOW_ITEM_TYPE_TCP:
1369                         ret = mlx5_flow_validate_item_tcp
1370                                                 (items, item_flags,
1371                                                  next_protocol,
1372                                                  &rte_flow_item_tcp_mask,
1373                                                  error);
1374                         if (ret < 0)
1375                                 return ret;
1376                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1377                                              MLX5_FLOW_LAYER_OUTER_L4_TCP;
1378                         break;
1379                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1380                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1381                                                             error);
1382                         if (ret < 0)
1383                                 return ret;
1384                         last_item = MLX5_FLOW_LAYER_VXLAN;
1385                         break;
1386                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1387                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
1388                                                                 item_flags,
1389                                                                 dev, error);
1390                         if (ret < 0)
1391                                 return ret;
1392                         last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1393                         break;
1394                 case RTE_FLOW_ITEM_TYPE_GRE:
1395                         ret = mlx5_flow_validate_item_gre(items, item_flags,
1396                                                           next_protocol, error);
1397                         if (ret < 0)
1398                                 return ret;
1399                         last_item = MLX5_FLOW_LAYER_GRE;
1400                         break;
1401                 case RTE_FLOW_ITEM_TYPE_MPLS:
1402                         ret = mlx5_flow_validate_item_mpls(dev, items,
1403                                                            item_flags,
1404                                                            last_item, error);
1405                         if (ret < 0)
1406                                 return ret;
1407                         last_item = MLX5_FLOW_LAYER_MPLS;
1408                         break;
1409                 default:
1410                         return rte_flow_error_set(error, ENOTSUP,
1411                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1412                                                   NULL, "item not supported");
1413                 }
1414                 item_flags |= last_item;
1415         }
1416         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1417                 switch (actions->type) {
1418                 case RTE_FLOW_ACTION_TYPE_VOID:
1419                         break;
1420                 case RTE_FLOW_ACTION_TYPE_FLAG:
1421                         ret = mlx5_flow_validate_action_flag(action_flags,
1422                                                              attr,
1423                                                              error);
1424                         if (ret < 0)
1425                                 return ret;
1426                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1427                         break;
1428                 case RTE_FLOW_ACTION_TYPE_MARK:
1429                         ret = mlx5_flow_validate_action_mark(actions,
1430                                                              action_flags,
1431                                                              attr,
1432                                                              error);
1433                         if (ret < 0)
1434                                 return ret;
1435                         action_flags |= MLX5_FLOW_ACTION_MARK;
1436                         break;
1437                 case RTE_FLOW_ACTION_TYPE_DROP:
1438                         ret = mlx5_flow_validate_action_drop(action_flags,
1439                                                              attr,
1440                                                              error);
1441                         if (ret < 0)
1442                                 return ret;
1443                         action_flags |= MLX5_FLOW_ACTION_DROP;
1444                         break;
1445                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1446                         ret = mlx5_flow_validate_action_queue(actions,
1447                                                               action_flags, dev,
1448                                                               attr,
1449                                                               error);
1450                         if (ret < 0)
1451                                 return ret;
1452                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1453                         break;
1454                 case RTE_FLOW_ACTION_TYPE_RSS:
1455                         ret = mlx5_flow_validate_action_rss(actions,
1456                                                             action_flags, dev,
1457                                                             attr, item_flags,
1458                                                             error);
1459                         if (ret < 0)
1460                                 return ret;
1461                         action_flags |= MLX5_FLOW_ACTION_RSS;
1462                         break;
1463                 case RTE_FLOW_ACTION_TYPE_COUNT:
1464                         ret = mlx5_flow_validate_action_count(dev, attr, error);
1465                         if (ret < 0)
1466                                 return ret;
1467                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1468                         break;
1469                 default:
1470                         return rte_flow_error_set(error, ENOTSUP,
1471                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1472                                                   actions,
1473                                                   "action not supported");
1474                 }
1475         }
1476         /*
1477          * Validate the drop action mutual exclusion with other actions.
1478          * Drop action is mutually-exclusive with any other action, except for
1479          * Count action.
1480          */
1481         if ((action_flags & MLX5_FLOW_ACTION_DROP) &&
1482             (action_flags & ~(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_COUNT)))
1483                 return rte_flow_error_set(error, EINVAL,
1484                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1485                                           "Drop action is mutually-exclusive "
1486                                           "with any other action, except for "
1487                                           "Count action");
1488         if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1489                 return rte_flow_error_set(error, EINVAL,
1490                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1491                                           "no fate action is found");
1492         return 0;
1493 }
1494
1495 /**
1496  * Calculate the required bytes that are needed for the action part of the verbs
1497  * flow.
1498  *
1499  * @param[in] actions
1500  *   Pointer to the list of actions.
1501  *
1502  * @return
1503  *   The size of the memory needed for all actions.
1504  */
1505 static int
1506 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1507 {
1508         int size = 0;
1509
1510         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1511                 switch (actions->type) {
1512                 case RTE_FLOW_ACTION_TYPE_VOID:
1513                         break;
1514                 case RTE_FLOW_ACTION_TYPE_FLAG:
1515                         size += sizeof(struct ibv_flow_spec_action_tag);
1516                         break;
1517                 case RTE_FLOW_ACTION_TYPE_MARK:
1518                         size += sizeof(struct ibv_flow_spec_action_tag);
1519                         break;
1520                 case RTE_FLOW_ACTION_TYPE_DROP:
1521                         size += sizeof(struct ibv_flow_spec_action_drop);
1522                         break;
1523                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1524                         break;
1525                 case RTE_FLOW_ACTION_TYPE_RSS:
1526                         break;
1527                 case RTE_FLOW_ACTION_TYPE_COUNT:
1528 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1529         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1530                         size += sizeof(struct ibv_flow_spec_counter_action);
1531 #endif
1532                         break;
1533                 default:
1534                         break;
1535                 }
1536         }
1537         return size;
1538 }
1539
1540 /**
1541  * Calculate the required bytes that are needed for the item part of the verbs
1542  * flow.
1543  *
1544  * @param[in] items
1545  *   Pointer to the list of items.
1546  *
1547  * @return
1548  *   The size of the memory needed for all items.
1549  */
1550 static int
1551 flow_verbs_get_items_size(const struct rte_flow_item items[])
1552 {
1553         int size = 0;
1554
1555         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1556                 switch (items->type) {
1557                 case RTE_FLOW_ITEM_TYPE_VOID:
1558                         break;
1559                 case RTE_FLOW_ITEM_TYPE_ETH:
1560                         size += sizeof(struct ibv_flow_spec_eth);
1561                         break;
1562                 case RTE_FLOW_ITEM_TYPE_VLAN:
1563                         size += sizeof(struct ibv_flow_spec_eth);
1564                         break;
1565                 case RTE_FLOW_ITEM_TYPE_IPV4:
1566                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
1567                         break;
1568                 case RTE_FLOW_ITEM_TYPE_IPV6:
1569                         size += sizeof(struct ibv_flow_spec_ipv6);
1570                         break;
1571                 case RTE_FLOW_ITEM_TYPE_UDP:
1572                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1573                         break;
1574                 case RTE_FLOW_ITEM_TYPE_TCP:
1575                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1576                         break;
1577                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1578                         size += sizeof(struct ibv_flow_spec_tunnel);
1579                         break;
1580                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1581                         size += sizeof(struct ibv_flow_spec_tunnel);
1582                         break;
1583 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1584                 case RTE_FLOW_ITEM_TYPE_GRE:
1585                         size += sizeof(struct ibv_flow_spec_gre);
1586                         break;
1587                 case RTE_FLOW_ITEM_TYPE_MPLS:
1588                         size += sizeof(struct ibv_flow_spec_mpls);
1589                         break;
1590 #else
1591                 case RTE_FLOW_ITEM_TYPE_GRE:
1592                         size += sizeof(struct ibv_flow_spec_tunnel);
1593                         break;
1594 #endif
1595                 default:
1596                         break;
1597                 }
1598         }
1599         return size;
1600 }
1601
1602 /**
1603  * Internal preparation function. Allocate mlx5_flow with the required size.
1604  * The required size is calculate based on the actions and items. This function
1605  * also returns the detected actions and items for later use.
1606  *
1607  * @param[in] dev
1608  *   Pointer to Ethernet device.
1609  * @param[in] attr
1610  *   Pointer to the flow attributes.
1611  * @param[in] items
1612  *   Pointer to the list of items.
1613  * @param[in] actions
1614  *   Pointer to the list of actions.
1615  * @param[out] error
1616  *   Pointer to the error structure.
1617  *
1618  * @return
1619  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1620  *   is set.
1621  */
1622 static struct mlx5_flow *
1623 flow_verbs_prepare(struct rte_eth_dev *dev,
1624                    const struct rte_flow_attr *attr __rte_unused,
1625                    const struct rte_flow_item items[],
1626                    const struct rte_flow_action actions[],
1627                    struct rte_flow_error *error)
1628 {
1629         size_t size = 0;
1630         uint32_t handle_idx = 0;
1631         struct mlx5_flow *dev_flow;
1632         struct mlx5_flow_handle *dev_handle;
1633         struct mlx5_priv *priv = dev->data->dev_private;
1634
1635         size += flow_verbs_get_actions_size(actions);
1636         size += flow_verbs_get_items_size(items);
1637         if (size > MLX5_VERBS_MAX_SPEC_ACT_SIZE) {
1638                 rte_flow_error_set(error, E2BIG,
1639                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1640                                    "Verbs spec/action size too large");
1641                 return NULL;
1642         }
1643         /* In case of corrupting the memory. */
1644         if (priv->flow_idx >= MLX5_NUM_MAX_DEV_FLOWS) {
1645                 rte_flow_error_set(error, ENOSPC,
1646                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1647                                    "not free temporary device flow");
1648                 return NULL;
1649         }
1650         dev_handle = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1651                                    &handle_idx);
1652         if (!dev_handle) {
1653                 rte_flow_error_set(error, ENOMEM,
1654                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1655                                    "not enough memory to create flow handle");
1656                 return NULL;
1657         }
1658         /* No multi-thread supporting. */
1659         dev_flow = &((struct mlx5_flow *)priv->inter_flows)[priv->flow_idx++];
1660         dev_flow->handle = dev_handle;
1661         dev_flow->handle_idx = handle_idx;
1662         /* Memcpy is used, only size needs to be cleared to 0. */
1663         dev_flow->verbs.size = 0;
1664         dev_flow->verbs.attr.num_of_specs = 0;
1665         dev_flow->ingress = attr->ingress;
1666         dev_flow->hash_fields = 0;
1667         /* Need to set transfer attribute: not supported in Verbs mode. */
1668         return dev_flow;
1669 }
1670
1671 /**
1672  * Fill the flow with verb spec.
1673  *
1674  * @param[in] dev
1675  *   Pointer to Ethernet device.
1676  * @param[in, out] dev_flow
1677  *   Pointer to the mlx5 flow.
1678  * @param[in] attr
1679  *   Pointer to the flow attributes.
1680  * @param[in] items
1681  *   Pointer to the list of items.
1682  * @param[in] actions
1683  *   Pointer to the list of actions.
1684  * @param[out] error
1685  *   Pointer to the error structure.
1686  *
1687  * @return
1688  *   0 on success, else a negative errno value otherwise and rte_errno is set.
1689  */
1690 static int
1691 flow_verbs_translate(struct rte_eth_dev *dev,
1692                      struct mlx5_flow *dev_flow,
1693                      const struct rte_flow_attr *attr,
1694                      const struct rte_flow_item items[],
1695                      const struct rte_flow_action actions[],
1696                      struct rte_flow_error *error)
1697 {
1698         uint64_t item_flags = 0;
1699         uint64_t action_flags = 0;
1700         uint64_t priority = attr->priority;
1701         uint32_t subpriority = 0;
1702         struct mlx5_priv *priv = dev->data->dev_private;
1703         struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
1704                                               priv->rss_desc)
1705                                               [!!priv->flow_nested_idx];
1706
1707         if (priority == MLX5_FLOW_PRIO_RSVD)
1708                 priority = priv->config.flow_prio - 1;
1709         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1710                 int ret;
1711
1712                 switch (actions->type) {
1713                 case RTE_FLOW_ACTION_TYPE_VOID:
1714                         break;
1715                 case RTE_FLOW_ACTION_TYPE_FLAG:
1716                         flow_verbs_translate_action_flag(dev_flow, actions);
1717                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1718                         dev_flow->handle->mark = 1;
1719                         break;
1720                 case RTE_FLOW_ACTION_TYPE_MARK:
1721                         flow_verbs_translate_action_mark(dev_flow, actions);
1722                         action_flags |= MLX5_FLOW_ACTION_MARK;
1723                         dev_flow->handle->mark = 1;
1724                         break;
1725                 case RTE_FLOW_ACTION_TYPE_DROP:
1726                         flow_verbs_translate_action_drop(dev_flow, actions);
1727                         action_flags |= MLX5_FLOW_ACTION_DROP;
1728                         dev_flow->handle->fate_action = MLX5_FLOW_FATE_DROP;
1729                         break;
1730                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1731                         flow_verbs_translate_action_queue(rss_desc, actions);
1732                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1733                         dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1734                         break;
1735                 case RTE_FLOW_ACTION_TYPE_RSS:
1736                         flow_verbs_translate_action_rss(rss_desc, actions);
1737                         action_flags |= MLX5_FLOW_ACTION_RSS;
1738                         dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1739                         break;
1740                 case RTE_FLOW_ACTION_TYPE_COUNT:
1741                         ret = flow_verbs_translate_action_count(dev_flow,
1742                                                                 actions,
1743                                                                 dev, error);
1744                         if (ret < 0)
1745                                 return ret;
1746                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1747                         break;
1748                 default:
1749                         return rte_flow_error_set(error, ENOTSUP,
1750                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1751                                                   actions,
1752                                                   "action not supported");
1753                 }
1754         }
1755         dev_flow->act_flags = action_flags;
1756         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1757                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1758
1759                 switch (items->type) {
1760                 case RTE_FLOW_ITEM_TYPE_VOID:
1761                         break;
1762                 case RTE_FLOW_ITEM_TYPE_ETH:
1763                         flow_verbs_translate_item_eth(dev_flow, items,
1764                                                       item_flags);
1765                         subpriority = MLX5_PRIORITY_MAP_L2;
1766                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1767                                                MLX5_FLOW_LAYER_OUTER_L2;
1768                         break;
1769                 case RTE_FLOW_ITEM_TYPE_VLAN:
1770                         flow_verbs_translate_item_vlan(dev_flow, items,
1771                                                        item_flags);
1772                         subpriority = MLX5_PRIORITY_MAP_L2;
1773                         item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1774                                                 MLX5_FLOW_LAYER_INNER_VLAN) :
1775                                                (MLX5_FLOW_LAYER_OUTER_L2 |
1776                                                 MLX5_FLOW_LAYER_OUTER_VLAN);
1777                         break;
1778                 case RTE_FLOW_ITEM_TYPE_IPV4:
1779                         flow_verbs_translate_item_ipv4(dev_flow, items,
1780                                                        item_flags);
1781                         subpriority = MLX5_PRIORITY_MAP_L3;
1782                         dev_flow->hash_fields |=
1783                                 mlx5_flow_hashfields_adjust
1784                                         (rss_desc, tunnel,
1785                                          MLX5_IPV4_LAYER_TYPES,
1786                                          MLX5_IPV4_IBV_RX_HASH);
1787                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1788                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1789                         break;
1790                 case RTE_FLOW_ITEM_TYPE_IPV6:
1791                         flow_verbs_translate_item_ipv6(dev_flow, items,
1792                                                        item_flags);
1793                         subpriority = MLX5_PRIORITY_MAP_L3;
1794                         dev_flow->hash_fields |=
1795                                 mlx5_flow_hashfields_adjust
1796                                         (rss_desc, tunnel,
1797                                          MLX5_IPV6_LAYER_TYPES,
1798                                          MLX5_IPV6_IBV_RX_HASH);
1799                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1800                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1801                         break;
1802                 case RTE_FLOW_ITEM_TYPE_TCP:
1803                         flow_verbs_translate_item_tcp(dev_flow, items,
1804                                                       item_flags);
1805                         subpriority = MLX5_PRIORITY_MAP_L4;
1806                         dev_flow->hash_fields |=
1807                                 mlx5_flow_hashfields_adjust
1808                                         (rss_desc, tunnel, ETH_RSS_TCP,
1809                                          (IBV_RX_HASH_SRC_PORT_TCP |
1810                                           IBV_RX_HASH_DST_PORT_TCP));
1811                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1812                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1813                         break;
1814                 case RTE_FLOW_ITEM_TYPE_UDP:
1815                         flow_verbs_translate_item_udp(dev_flow, items,
1816                                                       item_flags);
1817                         subpriority = MLX5_PRIORITY_MAP_L4;
1818                         dev_flow->hash_fields |=
1819                                 mlx5_flow_hashfields_adjust
1820                                         (rss_desc, tunnel, ETH_RSS_UDP,
1821                                          (IBV_RX_HASH_SRC_PORT_UDP |
1822                                           IBV_RX_HASH_DST_PORT_UDP));
1823                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1824                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1825                         break;
1826                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1827                         flow_verbs_translate_item_vxlan(dev_flow, items,
1828                                                         item_flags);
1829                         subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1830                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1831                         break;
1832                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1833                         flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1834                                                             item_flags);
1835                         subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1836                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1837                         break;
1838                 case RTE_FLOW_ITEM_TYPE_GRE:
1839                         flow_verbs_translate_item_gre(dev_flow, items,
1840                                                       item_flags);
1841                         subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1842                         item_flags |= MLX5_FLOW_LAYER_GRE;
1843                         break;
1844                 case RTE_FLOW_ITEM_TYPE_MPLS:
1845                         flow_verbs_translate_item_mpls(dev_flow, items,
1846                                                        item_flags);
1847                         subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1848                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1849                         break;
1850                 default:
1851                         return rte_flow_error_set(error, ENOTSUP,
1852                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1853                                                   NULL,
1854                                                   "item not supported");
1855                 }
1856         }
1857         dev_flow->handle->layers = item_flags;
1858         /* Other members of attr will be ignored. */
1859         dev_flow->verbs.attr.priority =
1860                 mlx5_flow_adjust_priority(dev, priority, subpriority);
1861         dev_flow->verbs.attr.port = (uint8_t)priv->dev_port;
1862         return 0;
1863 }
1864
1865 /**
1866  * Remove the flow from the NIC but keeps it in memory.
1867  *
1868  * @param[in] dev
1869  *   Pointer to the Ethernet device structure.
1870  * @param[in, out] flow
1871  *   Pointer to flow structure.
1872  */
1873 static void
1874 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1875 {
1876         struct mlx5_priv *priv = dev->data->dev_private;
1877         struct mlx5_flow_handle *handle;
1878         uint32_t handle_idx;
1879
1880         if (!flow)
1881                 return;
1882         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1883                        handle_idx, handle, next) {
1884                 if (handle->drv_flow) {
1885                         claim_zero(mlx5_glue->destroy_flow(handle->drv_flow));
1886                         handle->drv_flow = NULL;
1887                 }
1888                 /* hrxq is union, don't touch it only the flag is set. */
1889                 if (handle->rix_hrxq) {
1890                         if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1891                                 mlx5_drop_action_destroy(dev);
1892                                 handle->rix_hrxq = 0;
1893                         } else if (handle->fate_action ==
1894                                    MLX5_FLOW_FATE_QUEUE) {
1895                                 mlx5_hrxq_release(dev, handle->rix_hrxq);
1896                                 handle->rix_hrxq = 0;
1897                         }
1898                 }
1899                 if (handle->vf_vlan.tag && handle->vf_vlan.created)
1900                         mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
1901         }
1902 }
1903
1904 /**
1905  * Remove the flow from the NIC and the memory.
1906  *
1907  * @param[in] dev
1908  *   Pointer to the Ethernet device structure.
1909  * @param[in, out] flow
1910  *   Pointer to flow structure.
1911  */
1912 static void
1913 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1914 {
1915         struct mlx5_priv *priv = dev->data->dev_private;
1916         struct mlx5_flow_handle *handle;
1917
1918         if (!flow)
1919                 return;
1920         flow_verbs_remove(dev, flow);
1921         while (flow->dev_handles) {
1922                 uint32_t tmp_idx = flow->dev_handles;
1923
1924                 handle = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1925                                    tmp_idx);
1926                 if (!handle)
1927                         return;
1928                 flow->dev_handles = handle->next.next;
1929                 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1930                            tmp_idx);
1931         }
1932         if (flow->counter) {
1933                 flow_verbs_counter_release(dev, flow->counter);
1934                 flow->counter = 0;
1935         }
1936 }
1937
1938 /**
1939  * Apply the flow to the NIC.
1940  *
1941  * @param[in] dev
1942  *   Pointer to the Ethernet device structure.
1943  * @param[in, out] flow
1944  *   Pointer to flow structure.
1945  * @param[out] error
1946  *   Pointer to error structure.
1947  *
1948  * @return
1949  *   0 on success, a negative errno value otherwise and rte_errno is set.
1950  */
1951 static int
1952 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1953                  struct rte_flow_error *error)
1954 {
1955         struct mlx5_priv *priv = dev->data->dev_private;
1956         struct mlx5_flow_handle *handle;
1957         struct mlx5_flow *dev_flow;
1958         struct mlx5_hrxq *hrxq;
1959         uint32_t dev_handles;
1960         int err;
1961         int idx;
1962
1963         for (idx = priv->flow_idx - 1; idx >= priv->flow_nested_idx; idx--) {
1964                 dev_flow = &((struct mlx5_flow *)priv->inter_flows)[idx];
1965                 handle = dev_flow->handle;
1966                 if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1967                         hrxq = mlx5_drop_action_create(dev);
1968                         if (!hrxq) {
1969                                 rte_flow_error_set
1970                                         (error, errno,
1971                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1972                                          "cannot get drop hash queue");
1973                                 goto error;
1974                         }
1975                 } else {
1976                         uint32_t hrxq_idx;
1977                         struct mlx5_flow_rss_desc *rss_desc =
1978                                 &((struct mlx5_flow_rss_desc *)priv->rss_desc)
1979                                 [!!priv->flow_nested_idx];
1980
1981                         MLX5_ASSERT(rss_desc->queue_num);
1982                         hrxq_idx = mlx5_hrxq_get(dev, rss_desc->key,
1983                                                  MLX5_RSS_HASH_KEY_LEN,
1984                                                  dev_flow->hash_fields,
1985                                                  rss_desc->queue,
1986                                                  rss_desc->queue_num);
1987                         if (!hrxq_idx)
1988                                 hrxq_idx = mlx5_hrxq_new
1989                                                 (dev, rss_desc->key,
1990                                                  MLX5_RSS_HASH_KEY_LEN,
1991                                                  dev_flow->hash_fields,
1992                                                  rss_desc->queue,
1993                                                  rss_desc->queue_num,
1994                                                  !!(handle->layers &
1995                                                  MLX5_FLOW_LAYER_TUNNEL));
1996                         hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1997                                               hrxq_idx);
1998                         if (!hrxq) {
1999                                 rte_flow_error_set
2000                                         (error, rte_errno,
2001                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2002                                          "cannot get hash queue");
2003                                 goto error;
2004                         }
2005                         handle->rix_hrxq = hrxq_idx;
2006                 }
2007                 MLX5_ASSERT(hrxq);
2008                 handle->drv_flow = mlx5_glue->create_flow
2009                                         (hrxq->qp, &dev_flow->verbs.attr);
2010                 if (!handle->drv_flow) {
2011                         rte_flow_error_set(error, errno,
2012                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2013                                            NULL,
2014                                            "hardware refuses to create flow");
2015                         goto error;
2016                 }
2017                 if (priv->vmwa_context &&
2018                     handle->vf_vlan.tag && !handle->vf_vlan.created) {
2019                         /*
2020                          * The rule contains the VLAN pattern.
2021                          * For VF we are going to create VLAN
2022                          * interface to make hypervisor set correct
2023                          * e-Switch vport context.
2024                          */
2025                         mlx5_vlan_vmwa_acquire(dev, &handle->vf_vlan);
2026                 }
2027         }
2028         return 0;
2029 error:
2030         err = rte_errno; /* Save rte_errno before cleanup. */
2031         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
2032                        dev_handles, handle, next) {
2033                 /* hrxq is union, don't touch it only the flag is set. */
2034                 if (handle->rix_hrxq) {
2035                         if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
2036                                 mlx5_drop_action_destroy(dev);
2037                                 handle->rix_hrxq = 0;
2038                         } else if (handle->fate_action ==
2039                                    MLX5_FLOW_FATE_QUEUE) {
2040                                 mlx5_hrxq_release(dev, handle->rix_hrxq);
2041                                 handle->rix_hrxq = 0;
2042                         }
2043                 }
2044                 if (handle->vf_vlan.tag && handle->vf_vlan.created)
2045                         mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
2046         }
2047         rte_errno = err; /* Restore rte_errno. */
2048         return -rte_errno;
2049 }
2050
2051 /**
2052  * Query a flow.
2053  *
2054  * @see rte_flow_query()
2055  * @see rte_flow_ops
2056  */
2057 static int
2058 flow_verbs_query(struct rte_eth_dev *dev,
2059                  struct rte_flow *flow,
2060                  const struct rte_flow_action *actions,
2061                  void *data,
2062                  struct rte_flow_error *error)
2063 {
2064         int ret = -EINVAL;
2065
2066         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2067                 switch (actions->type) {
2068                 case RTE_FLOW_ACTION_TYPE_VOID:
2069                         break;
2070                 case RTE_FLOW_ACTION_TYPE_COUNT:
2071                         ret = flow_verbs_counter_query(dev, flow, data, error);
2072                         break;
2073                 default:
2074                         return rte_flow_error_set(error, ENOTSUP,
2075                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2076                                                   actions,
2077                                                   "action not supported");
2078                 }
2079         }
2080         return ret;
2081 }
2082
2083 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
2084         .validate = flow_verbs_validate,
2085         .prepare = flow_verbs_prepare,
2086         .translate = flow_verbs_translate,
2087         .apply = flow_verbs_apply,
2088         .remove = flow_verbs_remove,
2089         .destroy = flow_verbs_destroy,
2090         .query = flow_verbs_query,
2091 };