net/mlx5: separate Rx function declarations to another file
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 #include <rte_common.h>
12 #include <rte_ether.h>
13 #include <ethdev_driver.h>
14 #include <rte_flow.h>
15 #include <rte_flow_driver.h>
16 #include <rte_malloc.h>
17 #include <rte_ip.h>
18
19 #include <mlx5_glue.h>
20 #include <mlx5_prm.h>
21 #include <mlx5_malloc.h>
22
23 #include "mlx5_defs.h"
24 #include "mlx5.h"
25 #include "mlx5_flow.h"
26 #include "mlx5_rxtx.h"
27 #include "mlx5_rx.h"
28
29 #define VERBS_SPEC_INNER(item_flags) \
30         (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
31
32 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
33 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
34         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
35 };
36
37 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
38 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
39         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
40         { 9, 10, 11 }, { 12, 13, 14 },
41 };
42
43 /* Verbs specification header. */
44 struct ibv_spec_header {
45         enum ibv_flow_spec_type type;
46         uint16_t size;
47 };
48
49 /**
50  * Discover the maximum number of priority available.
51  *
52  * @param[in] dev
53  *   Pointer to the Ethernet device structure.
54  *
55  * @return
56  *   number of supported flow priority on success, a negative errno
57  *   value otherwise and rte_errno is set.
58  */
59 int
60 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
61 {
62         struct mlx5_priv *priv = dev->data->dev_private;
63         struct {
64                 struct ibv_flow_attr attr;
65                 struct ibv_flow_spec_eth eth;
66                 struct ibv_flow_spec_action_drop drop;
67         } flow_attr = {
68                 .attr = {
69                         .num_of_specs = 2,
70                         .port = (uint8_t)priv->dev_port,
71                 },
72                 .eth = {
73                         .type = IBV_FLOW_SPEC_ETH,
74                         .size = sizeof(struct ibv_flow_spec_eth),
75                 },
76                 .drop = {
77                         .size = sizeof(struct ibv_flow_spec_action_drop),
78                         .type = IBV_FLOW_SPEC_ACTION_DROP,
79                 },
80         };
81         struct ibv_flow *flow;
82         struct mlx5_hrxq *drop = priv->drop_queue.hrxq;
83         uint16_t vprio[] = { 8, 16 };
84         int i;
85         int priority = 0;
86
87         if (!drop->qp) {
88                 rte_errno = ENOTSUP;
89                 return -rte_errno;
90         }
91         for (i = 0; i != RTE_DIM(vprio); i++) {
92                 flow_attr.attr.priority = vprio[i] - 1;
93                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
94                 if (!flow)
95                         break;
96                 claim_zero(mlx5_glue->destroy_flow(flow));
97                 priority = vprio[i];
98         }
99         switch (priority) {
100         case 8:
101                 priority = RTE_DIM(priority_map_3);
102                 break;
103         case 16:
104                 priority = RTE_DIM(priority_map_5);
105                 break;
106         default:
107                 rte_errno = ENOTSUP;
108                 DRV_LOG(ERR,
109                         "port %u verbs maximum priority: %d expected 8/16",
110                         dev->data->port_id, priority);
111                 return -rte_errno;
112         }
113         DRV_LOG(INFO, "port %u supported flow priorities:"
114                 " 0-%d for ingress or egress root table,"
115                 " 0-%d for non-root table or transfer root table.",
116                 dev->data->port_id, priority - 2,
117                 MLX5_NON_ROOT_FLOW_MAX_PRIO - 1);
118         return priority;
119 }
120
121 /**
122  * Adjust flow priority based on the highest layer and the request priority.
123  *
124  * @param[in] dev
125  *   Pointer to the Ethernet device structure.
126  * @param[in] priority
127  *   The rule base priority.
128  * @param[in] subpriority
129  *   The priority based on the items.
130  *
131  * @return
132  *   The new priority.
133  */
134 uint32_t
135 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
136                                    uint32_t subpriority)
137 {
138         uint32_t res = 0;
139         struct mlx5_priv *priv = dev->data->dev_private;
140
141         switch (priv->config.flow_prio) {
142         case RTE_DIM(priority_map_3):
143                 res = priority_map_3[priority][subpriority];
144                 break;
145         case RTE_DIM(priority_map_5):
146                 res = priority_map_5[priority][subpriority];
147                 break;
148         }
149         return  res;
150 }
151
152 /**
153  * Get Verbs flow counter by index.
154  *
155  * @param[in] dev
156  *   Pointer to the Ethernet device structure.
157  * @param[in] idx
158  *   mlx5 flow counter index in the container.
159  * @param[out] ppool
160  *   mlx5 flow counter pool in the container,
161  *
162  * @return
163  *   A pointer to the counter, NULL otherwise.
164  */
165 static struct mlx5_flow_counter *
166 flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev,
167                               uint32_t idx,
168                               struct mlx5_flow_counter_pool **ppool)
169 {
170         struct mlx5_priv *priv = dev->data->dev_private;
171         struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
172         struct mlx5_flow_counter_pool *pool;
173
174         idx = (idx - 1) & (MLX5_CNT_SHARED_OFFSET - 1);
175         pool = cmng->pools[idx / MLX5_COUNTERS_PER_POOL];
176         MLX5_ASSERT(pool);
177         if (ppool)
178                 *ppool = pool;
179         return MLX5_POOL_GET_CNT(pool, idx % MLX5_COUNTERS_PER_POOL);
180 }
181
182 /**
183  * Create Verbs flow counter with Verbs library.
184  *
185  * @param[in] dev
186  *   Pointer to the Ethernet device structure.
187  * @param[in, out] counter
188  *   mlx5 flow counter object, contains the counter id,
189  *   handle of created Verbs flow counter is returned
190  *   in cs field (if counters are supported).
191  *
192  * @return
193  *   0 On success else a negative errno value is returned
194  *   and rte_errno is set.
195  */
196 static int
197 flow_verbs_counter_create(struct rte_eth_dev *dev,
198                           struct mlx5_flow_counter *counter)
199 {
200 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
201         struct mlx5_priv *priv = dev->data->dev_private;
202         struct ibv_context *ctx = priv->sh->ctx;
203         struct ibv_counter_set_init_attr init = {
204                          .counter_set_id = counter->shared_info.id};
205
206         counter->dcs_when_free = mlx5_glue->create_counter_set(ctx, &init);
207         if (!counter->dcs_when_free) {
208                 rte_errno = ENOTSUP;
209                 return -ENOTSUP;
210         }
211         return 0;
212 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
213         struct mlx5_priv *priv = dev->data->dev_private;
214         struct ibv_context *ctx = priv->sh->ctx;
215         struct ibv_counters_init_attr init = {0};
216         struct ibv_counter_attach_attr attach;
217         int ret;
218
219         memset(&attach, 0, sizeof(attach));
220         counter->dcs_when_free = mlx5_glue->create_counters(ctx, &init);
221         if (!counter->dcs_when_free) {
222                 rte_errno = ENOTSUP;
223                 return -ENOTSUP;
224         }
225         attach.counter_desc = IBV_COUNTER_PACKETS;
226         attach.index = 0;
227         ret = mlx5_glue->attach_counters(counter->dcs_when_free, &attach, NULL);
228         if (!ret) {
229                 attach.counter_desc = IBV_COUNTER_BYTES;
230                 attach.index = 1;
231                 ret = mlx5_glue->attach_counters
232                                         (counter->dcs_when_free, &attach, NULL);
233         }
234         if (ret) {
235                 claim_zero(mlx5_glue->destroy_counters(counter->dcs_when_free));
236                 counter->dcs_when_free = NULL;
237                 rte_errno = ret;
238                 return -ret;
239         }
240         return 0;
241 #else
242         (void)dev;
243         (void)counter;
244         rte_errno = ENOTSUP;
245         return -ENOTSUP;
246 #endif
247 }
248
249 /**
250  * Get a flow counter.
251  *
252  * @param[in] dev
253  *   Pointer to the Ethernet device structure.
254  * @param[in] shared
255  *   Indicate if this counter is shared with other flows.
256  * @param[in] id
257  *   Counter identifier.
258  *
259  * @return
260  *   Index to the counter, 0 otherwise and rte_errno is set.
261  */
262 static uint32_t
263 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
264 {
265         struct mlx5_priv *priv = dev->data->dev_private;
266         struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
267         struct mlx5_flow_counter_pool *pool = NULL;
268         struct mlx5_flow_counter *cnt = NULL;
269         union mlx5_l3t_data data;
270         uint32_t n_valid = cmng->n_valid;
271         uint32_t pool_idx, cnt_idx;
272         uint32_t i;
273         int ret;
274
275         if (shared && !mlx5_l3t_get_entry(priv->sh->cnt_id_tbl, id, &data) &&
276             data.dword)
277                 return data.dword;
278         for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) {
279                 pool = cmng->pools[pool_idx];
280                 if (!pool)
281                         continue;
282                 cnt = TAILQ_FIRST(&pool->counters[0]);
283                 if (cnt)
284                         break;
285         }
286         if (!cnt) {
287                 struct mlx5_flow_counter_pool **pools;
288                 uint32_t size;
289
290                 if (n_valid == cmng->n) {
291                         /* Resize the container pool array. */
292                         size = sizeof(struct mlx5_flow_counter_pool *) *
293                                      (n_valid + MLX5_CNT_CONTAINER_RESIZE);
294                         pools = mlx5_malloc(MLX5_MEM_ZERO, size, 0,
295                                             SOCKET_ID_ANY);
296                         if (!pools)
297                                 return 0;
298                         if (n_valid) {
299                                 memcpy(pools, cmng->pools,
300                                        sizeof(struct mlx5_flow_counter_pool *) *
301                                        n_valid);
302                                 mlx5_free(cmng->pools);
303                         }
304                         cmng->pools = pools;
305                         cmng->n += MLX5_CNT_CONTAINER_RESIZE;
306                 }
307                 /* Allocate memory for new pool*/
308                 size = sizeof(*pool) + sizeof(*cnt) * MLX5_COUNTERS_PER_POOL;
309                 pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
310                 if (!pool)
311                         return 0;
312                 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
313                         cnt = MLX5_POOL_GET_CNT(pool, i);
314                         TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
315                 }
316                 cnt = MLX5_POOL_GET_CNT(pool, 0);
317                 cmng->pools[n_valid] = pool;
318                 pool_idx = n_valid;
319                 cmng->n_valid++;
320         }
321         TAILQ_REMOVE(&pool->counters[0], cnt, next);
322         i = MLX5_CNT_ARRAY_IDX(pool, cnt);
323         cnt_idx = MLX5_MAKE_CNT_IDX(pool_idx, i);
324         if (shared) {
325                 data.dword = cnt_idx;
326                 if (mlx5_l3t_set_entry(priv->sh->cnt_id_tbl, id, &data))
327                         return 0;
328                 cnt->shared_info.id = id;
329                 cnt_idx |= MLX5_CNT_SHARED_OFFSET;
330         }
331         /* Create counter with Verbs. */
332         ret = flow_verbs_counter_create(dev, cnt);
333         if (!ret) {
334                 cnt->dcs_when_active = cnt->dcs_when_free;
335                 cnt->hits = 0;
336                 cnt->bytes = 0;
337                 return cnt_idx;
338         }
339         TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
340         /* Some error occurred in Verbs library. */
341         rte_errno = -ret;
342         return 0;
343 }
344
345 /**
346  * Release a flow counter.
347  *
348  * @param[in] dev
349  *   Pointer to the Ethernet device structure.
350  * @param[in] counter
351  *   Index to the counter handler.
352  */
353 static void
354 flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter)
355 {
356         struct mlx5_priv *priv = dev->data->dev_private;
357         struct mlx5_flow_counter_pool *pool;
358         struct mlx5_flow_counter *cnt;
359
360         cnt = flow_verbs_counter_get_by_idx(dev, counter, &pool);
361         if (IS_SHARED_CNT(counter) &&
362             mlx5_l3t_clear_entry(priv->sh->cnt_id_tbl, cnt->shared_info.id))
363                 return;
364 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
365         claim_zero(mlx5_glue->destroy_counter_set
366                         ((struct ibv_counter_set *)cnt->dcs_when_active));
367 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
368         claim_zero(mlx5_glue->destroy_counters
369                                 ((struct ibv_counters *)cnt->dcs_when_active));
370 #endif
371         TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
372 }
373
374 /**
375  * Query a flow counter via Verbs library call.
376  *
377  * @see rte_flow_query()
378  * @see rte_flow_ops
379  */
380 static int
381 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
382                          struct rte_flow *flow, void *data,
383                          struct rte_flow_error *error)
384 {
385 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
386         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
387         if (flow->counter) {
388                 struct mlx5_flow_counter_pool *pool;
389                 struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx
390                                                 (dev, flow->counter, &pool);
391                 struct rte_flow_query_count *qc = data;
392                 uint64_t counters[2] = {0, 0};
393 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
394                 struct ibv_query_counter_set_attr query_cs_attr = {
395                         .dcs_when_free = (struct ibv_counter_set *)
396                                                 cnt->dcs_when_active,
397                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
398                 };
399                 struct ibv_counter_set_data query_out = {
400                         .out = counters,
401                         .outlen = 2 * sizeof(uint64_t),
402                 };
403                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
404                                                        &query_out);
405 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
406                 int err = mlx5_glue->query_counters
407                         ((struct ibv_counters *)cnt->dcs_when_active, counters,
408                                 RTE_DIM(counters),
409                                 IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
410 #endif
411                 if (err)
412                         return rte_flow_error_set
413                                 (error, err,
414                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
415                                  NULL,
416                                  "cannot read counter");
417                 qc->hits_set = 1;
418                 qc->bytes_set = 1;
419                 qc->hits = counters[0] - cnt->hits;
420                 qc->bytes = counters[1] - cnt->bytes;
421                 if (qc->reset) {
422                         cnt->hits = counters[0];
423                         cnt->bytes = counters[1];
424                 }
425                 return 0;
426         }
427         return rte_flow_error_set(error, EINVAL,
428                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
429                                   NULL,
430                                   "flow does not have counter");
431 #else
432         (void)flow;
433         (void)data;
434         return rte_flow_error_set(error, ENOTSUP,
435                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
436                                   NULL,
437                                   "counters are not available");
438 #endif
439 }
440
441 /**
442  * Add a verbs item specification into @p verbs.
443  *
444  * @param[out] verbs
445  *   Pointer to verbs structure.
446  * @param[in] src
447  *   Create specification.
448  * @param[in] size
449  *   Size in bytes of the specification to copy.
450  */
451 static void
452 flow_verbs_spec_add(struct mlx5_flow_verbs_workspace *verbs,
453                     void *src, unsigned int size)
454 {
455         void *dst;
456
457         if (!verbs)
458                 return;
459         MLX5_ASSERT(verbs->specs);
460         dst = (void *)(verbs->specs + verbs->size);
461         memcpy(dst, src, size);
462         ++verbs->attr.num_of_specs;
463         verbs->size += size;
464 }
465
466 /**
467  * Convert the @p item into a Verbs specification. This function assumes that
468  * the input is valid and that there is space to insert the requested item
469  * into the flow.
470  *
471  * @param[in, out] dev_flow
472  *   Pointer to dev_flow structure.
473  * @param[in] item
474  *   Item specification.
475  * @param[in] item_flags
476  *   Parsed item flags.
477  */
478 static void
479 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
480                               const struct rte_flow_item *item,
481                               uint64_t item_flags)
482 {
483         const struct rte_flow_item_eth *spec = item->spec;
484         const struct rte_flow_item_eth *mask = item->mask;
485         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
486         struct ibv_flow_spec_eth eth = {
487                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
488                 .size = size,
489         };
490
491         if (!mask)
492                 mask = &rte_flow_item_eth_mask;
493         if (spec) {
494                 unsigned int i;
495
496                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes,
497                         RTE_ETHER_ADDR_LEN);
498                 memcpy(&eth.val.src_mac, spec->src.addr_bytes,
499                         RTE_ETHER_ADDR_LEN);
500                 eth.val.ether_type = spec->type;
501                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes,
502                         RTE_ETHER_ADDR_LEN);
503                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes,
504                         RTE_ETHER_ADDR_LEN);
505                 eth.mask.ether_type = mask->type;
506                 /* Remove unwanted bits from values. */
507                 for (i = 0; i < RTE_ETHER_ADDR_LEN; ++i) {
508                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
509                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
510                 }
511                 eth.val.ether_type &= eth.mask.ether_type;
512         }
513         flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
514 }
515
516 /**
517  * Update the VLAN tag in the Verbs Ethernet specification.
518  * This function assumes that the input is valid and there is space to add
519  * the requested item.
520  *
521  * @param[in, out] attr
522  *   Pointer to Verbs attributes structure.
523  * @param[in] eth
524  *   Verbs structure containing the VLAN information to copy.
525  */
526 static void
527 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
528                             struct ibv_flow_spec_eth *eth)
529 {
530         unsigned int i;
531         const enum ibv_flow_spec_type search = eth->type;
532         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
533                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
534
535         for (i = 0; i != attr->num_of_specs; ++i) {
536                 if (hdr->type == search) {
537                         struct ibv_flow_spec_eth *e =
538                                 (struct ibv_flow_spec_eth *)hdr;
539
540                         e->val.vlan_tag = eth->val.vlan_tag;
541                         e->mask.vlan_tag = eth->mask.vlan_tag;
542                         e->val.ether_type = eth->val.ether_type;
543                         e->mask.ether_type = eth->mask.ether_type;
544                         break;
545                 }
546                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
547         }
548 }
549
550 /**
551  * Convert the @p item into a Verbs specification. This function assumes that
552  * the input is valid and that there is space to insert the requested item
553  * into the flow.
554  *
555  * @param[in, out] dev_flow
556  *   Pointer to dev_flow structure.
557  * @param[in] item
558  *   Item specification.
559  * @param[in] item_flags
560  *   Parsed item flags.
561  */
562 static void
563 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
564                                const struct rte_flow_item *item,
565                                uint64_t item_flags)
566 {
567         const struct rte_flow_item_vlan *spec = item->spec;
568         const struct rte_flow_item_vlan *mask = item->mask;
569         unsigned int size = sizeof(struct ibv_flow_spec_eth);
570         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
571         struct ibv_flow_spec_eth eth = {
572                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
573                 .size = size,
574         };
575         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
576                                       MLX5_FLOW_LAYER_OUTER_L2;
577
578         if (!mask)
579                 mask = &rte_flow_item_vlan_mask;
580         if (spec) {
581                 eth.val.vlan_tag = spec->tci;
582                 eth.mask.vlan_tag = mask->tci;
583                 eth.val.vlan_tag &= eth.mask.vlan_tag;
584                 eth.val.ether_type = spec->inner_type;
585                 eth.mask.ether_type = mask->inner_type;
586                 eth.val.ether_type &= eth.mask.ether_type;
587         }
588         if (!(item_flags & l2m))
589                 flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
590         else
591                 flow_verbs_item_vlan_update(&dev_flow->verbs.attr, &eth);
592         if (!tunnel)
593                 dev_flow->handle->vf_vlan.tag =
594                         rte_be_to_cpu_16(spec->tci) & 0x0fff;
595 }
596
597 /**
598  * Convert the @p item into a Verbs specification. This function assumes that
599  * the input is valid and that there is space to insert the requested item
600  * into the flow.
601  *
602  * @param[in, out] dev_flow
603  *   Pointer to dev_flow structure.
604  * @param[in] item
605  *   Item specification.
606  * @param[in] item_flags
607  *   Parsed item flags.
608  */
609 static void
610 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
611                                const struct rte_flow_item *item,
612                                uint64_t item_flags)
613 {
614         const struct rte_flow_item_ipv4 *spec = item->spec;
615         const struct rte_flow_item_ipv4 *mask = item->mask;
616         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
617         struct ibv_flow_spec_ipv4_ext ipv4 = {
618                 .type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
619                 .size = size,
620         };
621
622         if (!mask)
623                 mask = &rte_flow_item_ipv4_mask;
624         if (spec) {
625                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
626                         .src_ip = spec->hdr.src_addr,
627                         .dst_ip = spec->hdr.dst_addr,
628                         .proto = spec->hdr.next_proto_id,
629                         .tos = spec->hdr.type_of_service,
630                 };
631                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
632                         .src_ip = mask->hdr.src_addr,
633                         .dst_ip = mask->hdr.dst_addr,
634                         .proto = mask->hdr.next_proto_id,
635                         .tos = mask->hdr.type_of_service,
636                 };
637                 /* Remove unwanted bits from values. */
638                 ipv4.val.src_ip &= ipv4.mask.src_ip;
639                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
640                 ipv4.val.proto &= ipv4.mask.proto;
641                 ipv4.val.tos &= ipv4.mask.tos;
642         }
643         flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
644 }
645
646 /**
647  * Convert the @p item into a Verbs specification. This function assumes that
648  * the input is valid and that there is space to insert the requested item
649  * into the flow.
650  *
651  * @param[in, out] dev_flow
652  *   Pointer to dev_flow structure.
653  * @param[in] item
654  *   Item specification.
655  * @param[in] item_flags
656  *   Parsed item flags.
657  */
658 static void
659 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
660                                const struct rte_flow_item *item,
661                                uint64_t item_flags)
662 {
663         const struct rte_flow_item_ipv6 *spec = item->spec;
664         const struct rte_flow_item_ipv6 *mask = item->mask;
665         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
666         struct ibv_flow_spec_ipv6 ipv6 = {
667                 .type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
668                 .size = size,
669         };
670
671         if (!mask)
672                 mask = &rte_flow_item_ipv6_mask;
673         if (spec) {
674                 unsigned int i;
675                 uint32_t vtc_flow_val;
676                 uint32_t vtc_flow_mask;
677
678                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
679                        RTE_DIM(ipv6.val.src_ip));
680                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
681                        RTE_DIM(ipv6.val.dst_ip));
682                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
683                        RTE_DIM(ipv6.mask.src_ip));
684                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
685                        RTE_DIM(ipv6.mask.dst_ip));
686                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
687                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
688                 ipv6.val.flow_label =
689                         rte_cpu_to_be_32((vtc_flow_val & RTE_IPV6_HDR_FL_MASK) >>
690                                          RTE_IPV6_HDR_FL_SHIFT);
691                 ipv6.val.traffic_class = (vtc_flow_val & RTE_IPV6_HDR_TC_MASK) >>
692                                          RTE_IPV6_HDR_TC_SHIFT;
693                 ipv6.val.next_hdr = spec->hdr.proto;
694                 ipv6.mask.flow_label =
695                         rte_cpu_to_be_32((vtc_flow_mask & RTE_IPV6_HDR_FL_MASK) >>
696                                          RTE_IPV6_HDR_FL_SHIFT);
697                 ipv6.mask.traffic_class = (vtc_flow_mask & RTE_IPV6_HDR_TC_MASK) >>
698                                           RTE_IPV6_HDR_TC_SHIFT;
699                 ipv6.mask.next_hdr = mask->hdr.proto;
700                 /* Remove unwanted bits from values. */
701                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
702                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
703                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
704                 }
705                 ipv6.val.flow_label &= ipv6.mask.flow_label;
706                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
707                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
708         }
709         flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
710 }
711
712 /**
713  * Convert the @p item into a Verbs specification. This function assumes that
714  * the input is valid and that there is space to insert the requested item
715  * into the flow.
716  *
717  * @param[in, out] dev_flow
718  *   Pointer to dev_flow structure.
719  * @param[in] item
720  *   Item specification.
721  * @param[in] item_flags
722  *   Parsed item flags.
723  */
724 static void
725 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
726                               const struct rte_flow_item *item,
727                               uint64_t item_flags __rte_unused)
728 {
729         const struct rte_flow_item_tcp *spec = item->spec;
730         const struct rte_flow_item_tcp *mask = item->mask;
731         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
732         struct ibv_flow_spec_tcp_udp tcp = {
733                 .type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
734                 .size = size,
735         };
736
737         if (!mask)
738                 mask = &rte_flow_item_tcp_mask;
739         if (spec) {
740                 tcp.val.dst_port = spec->hdr.dst_port;
741                 tcp.val.src_port = spec->hdr.src_port;
742                 tcp.mask.dst_port = mask->hdr.dst_port;
743                 tcp.mask.src_port = mask->hdr.src_port;
744                 /* Remove unwanted bits from values. */
745                 tcp.val.src_port &= tcp.mask.src_port;
746                 tcp.val.dst_port &= tcp.mask.dst_port;
747         }
748         flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
749 }
750
751 /**
752  * Convert the @p item into a Verbs specification. This function assumes that
753  * the input is valid and that there is space to insert the requested item
754  * into the flow.
755  *
756  * @param[in, out] dev_flow
757  *   Pointer to dev_flow structure.
758  * @param[in] item
759  *   Item specification.
760  * @param[in] item_flags
761  *   Parsed item flags.
762  */
763 static void
764 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
765                               const struct rte_flow_item *item,
766                               uint64_t item_flags __rte_unused)
767 {
768         const struct rte_flow_item_udp *spec = item->spec;
769         const struct rte_flow_item_udp *mask = item->mask;
770         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
771         struct ibv_flow_spec_tcp_udp udp = {
772                 .type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
773                 .size = size,
774         };
775
776         if (!mask)
777                 mask = &rte_flow_item_udp_mask;
778         if (spec) {
779                 udp.val.dst_port = spec->hdr.dst_port;
780                 udp.val.src_port = spec->hdr.src_port;
781                 udp.mask.dst_port = mask->hdr.dst_port;
782                 udp.mask.src_port = mask->hdr.src_port;
783                 /* Remove unwanted bits from values. */
784                 udp.val.src_port &= udp.mask.src_port;
785                 udp.val.dst_port &= udp.mask.dst_port;
786         }
787         item++;
788         while (item->type == RTE_FLOW_ITEM_TYPE_VOID)
789                 item++;
790         if (!(udp.val.dst_port & udp.mask.dst_port)) {
791                 switch ((item)->type) {
792                 case RTE_FLOW_ITEM_TYPE_VXLAN:
793                         udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN);
794                         udp.mask.dst_port = 0xffff;
795                         break;
796                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
797                         udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN_GPE);
798                         udp.mask.dst_port = 0xffff;
799                         break;
800                 case RTE_FLOW_ITEM_TYPE_MPLS:
801                         udp.val.dst_port = htons(MLX5_UDP_PORT_MPLS);
802                         udp.mask.dst_port = 0xffff;
803                         break;
804                 default:
805                         break;
806                 }
807         }
808
809         flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
810 }
811
812 /**
813  * Convert the @p item into a Verbs specification. This function assumes that
814  * the input is valid and that there is space to insert the requested item
815  * into the flow.
816  *
817  * @param[in, out] dev_flow
818  *   Pointer to dev_flow structure.
819  * @param[in] item
820  *   Item specification.
821  * @param[in] item_flags
822  *   Parsed item flags.
823  */
824 static void
825 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
826                                 const struct rte_flow_item *item,
827                                 uint64_t item_flags __rte_unused)
828 {
829         const struct rte_flow_item_vxlan *spec = item->spec;
830         const struct rte_flow_item_vxlan *mask = item->mask;
831         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
832         struct ibv_flow_spec_tunnel vxlan = {
833                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
834                 .size = size,
835         };
836         union vni {
837                 uint32_t vlan_id;
838                 uint8_t vni[4];
839         } id = { .vlan_id = 0, };
840
841         if (!mask)
842                 mask = &rte_flow_item_vxlan_mask;
843         if (spec) {
844                 memcpy(&id.vni[1], spec->vni, 3);
845                 vxlan.val.tunnel_id = id.vlan_id;
846                 memcpy(&id.vni[1], mask->vni, 3);
847                 vxlan.mask.tunnel_id = id.vlan_id;
848                 /* Remove unwanted bits from values. */
849                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
850         }
851         flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
852 }
853
854 /**
855  * Convert the @p item into a Verbs specification. This function assumes that
856  * the input is valid and that there is space to insert the requested item
857  * into the flow.
858  *
859  * @param[in, out] dev_flow
860  *   Pointer to dev_flow structure.
861  * @param[in] item
862  *   Item specification.
863  * @param[in] item_flags
864  *   Parsed item flags.
865  */
866 static void
867 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
868                                     const struct rte_flow_item *item,
869                                     uint64_t item_flags __rte_unused)
870 {
871         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
872         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
873         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
874         struct ibv_flow_spec_tunnel vxlan_gpe = {
875                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
876                 .size = size,
877         };
878         union vni {
879                 uint32_t vlan_id;
880                 uint8_t vni[4];
881         } id = { .vlan_id = 0, };
882
883         if (!mask)
884                 mask = &rte_flow_item_vxlan_gpe_mask;
885         if (spec) {
886                 memcpy(&id.vni[1], spec->vni, 3);
887                 vxlan_gpe.val.tunnel_id = id.vlan_id;
888                 memcpy(&id.vni[1], mask->vni, 3);
889                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
890                 /* Remove unwanted bits from values. */
891                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
892         }
893         flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
894 }
895
896 /**
897  * Update the protocol in Verbs IPv4/IPv6 spec.
898  *
899  * @param[in, out] attr
900  *   Pointer to Verbs attributes structure.
901  * @param[in] search
902  *   Specification type to search in order to update the IP protocol.
903  * @param[in] protocol
904  *   Protocol value to set if none is present in the specification.
905  */
906 static void
907 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
908                                        enum ibv_flow_spec_type search,
909                                        uint8_t protocol)
910 {
911         unsigned int i;
912         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
913                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
914
915         if (!attr)
916                 return;
917         for (i = 0; i != attr->num_of_specs; ++i) {
918                 if (hdr->type == search) {
919                         union {
920                                 struct ibv_flow_spec_ipv4_ext *ipv4;
921                                 struct ibv_flow_spec_ipv6 *ipv6;
922                         } ip;
923
924                         switch (search) {
925                         case IBV_FLOW_SPEC_IPV4_EXT:
926                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
927                                 if (!ip.ipv4->val.proto) {
928                                         ip.ipv4->val.proto = protocol;
929                                         ip.ipv4->mask.proto = 0xff;
930                                 }
931                                 break;
932                         case IBV_FLOW_SPEC_IPV6:
933                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
934                                 if (!ip.ipv6->val.next_hdr) {
935                                         ip.ipv6->val.next_hdr = protocol;
936                                         ip.ipv6->mask.next_hdr = 0xff;
937                                 }
938                                 break;
939                         default:
940                                 break;
941                         }
942                         break;
943                 }
944                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
945         }
946 }
947
948 /**
949  * Convert the @p item into a Verbs specification. This function assumes that
950  * the input is valid and that there is space to insert the requested item
951  * into the flow.
952  *
953  * @param[in, out] dev_flow
954  *   Pointer to dev_flow structure.
955  * @param[in] item
956  *   Item specification.
957  * @param[in] item_flags
958  *   Parsed item flags.
959  */
960 static void
961 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
962                               const struct rte_flow_item *item __rte_unused,
963                               uint64_t item_flags)
964 {
965         struct mlx5_flow_verbs_workspace *verbs = &dev_flow->verbs;
966 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
967         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
968         struct ibv_flow_spec_tunnel tunnel = {
969                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
970                 .size = size,
971         };
972 #else
973         const struct rte_flow_item_gre *spec = item->spec;
974         const struct rte_flow_item_gre *mask = item->mask;
975         unsigned int size = sizeof(struct ibv_flow_spec_gre);
976         struct ibv_flow_spec_gre tunnel = {
977                 .type = IBV_FLOW_SPEC_GRE,
978                 .size = size,
979         };
980
981         if (!mask)
982                 mask = &rte_flow_item_gre_mask;
983         if (spec) {
984                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
985                 tunnel.val.protocol = spec->protocol;
986                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
987                 tunnel.mask.protocol = mask->protocol;
988                 /* Remove unwanted bits from values. */
989                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
990                 tunnel.val.protocol &= tunnel.mask.protocol;
991                 tunnel.val.key &= tunnel.mask.key;
992         }
993 #endif
994         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
995                 flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
996                                                        IBV_FLOW_SPEC_IPV4_EXT,
997                                                        IPPROTO_GRE);
998         else
999                 flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
1000                                                        IBV_FLOW_SPEC_IPV6,
1001                                                        IPPROTO_GRE);
1002         flow_verbs_spec_add(verbs, &tunnel, size);
1003 }
1004
1005 /**
1006  * Convert the @p action into a Verbs specification. This function assumes that
1007  * the input is valid and that there is space to insert the requested action
1008  * into the flow. This function also return the action that was added.
1009  *
1010  * @param[in, out] dev_flow
1011  *   Pointer to dev_flow structure.
1012  * @param[in] item
1013  *   Item specification.
1014  * @param[in] item_flags
1015  *   Parsed item flags.
1016  */
1017 static void
1018 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
1019                                const struct rte_flow_item *item __rte_unused,
1020                                uint64_t item_flags __rte_unused)
1021 {
1022 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1023         const struct rte_flow_item_mpls *spec = item->spec;
1024         const struct rte_flow_item_mpls *mask = item->mask;
1025         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
1026         struct ibv_flow_spec_mpls mpls = {
1027                 .type = IBV_FLOW_SPEC_MPLS,
1028                 .size = size,
1029         };
1030
1031         if (!mask)
1032                 mask = &rte_flow_item_mpls_mask;
1033         if (spec) {
1034                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
1035                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
1036                 /* Remove unwanted bits from values.  */
1037                 mpls.val.label &= mpls.mask.label;
1038         }
1039         flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
1040 #endif
1041 }
1042
1043 /**
1044  * Convert the @p action into a Verbs specification. This function assumes that
1045  * the input is valid and that there is space to insert the requested action
1046  * into the flow.
1047  *
1048  * @param[in] dev_flow
1049  *   Pointer to mlx5_flow.
1050  * @param[in] action
1051  *   Action configuration.
1052  */
1053 static void
1054 flow_verbs_translate_action_drop
1055         (struct mlx5_flow *dev_flow,
1056          const struct rte_flow_action *action __rte_unused)
1057 {
1058         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1059         struct ibv_flow_spec_action_drop drop = {
1060                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1061                         .size = size,
1062         };
1063
1064         flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
1065 }
1066
1067 /**
1068  * Convert the @p action into a Verbs specification. This function assumes that
1069  * the input is valid and that there is space to insert the requested action
1070  * into the flow.
1071  *
1072  * @param[in] rss_desc
1073  *   Pointer to mlx5_flow_rss_desc.
1074  * @param[in] action
1075  *   Action configuration.
1076  */
1077 static void
1078 flow_verbs_translate_action_queue(struct mlx5_flow_rss_desc *rss_desc,
1079                                   const struct rte_flow_action *action)
1080 {
1081         const struct rte_flow_action_queue *queue = action->conf;
1082
1083         rss_desc->queue[0] = queue->index;
1084         rss_desc->queue_num = 1;
1085 }
1086
1087 /**
1088  * Convert the @p action into a Verbs specification. This function assumes that
1089  * the input is valid and that there is space to insert the requested action
1090  * into the flow.
1091  *
1092  * @param[in] rss_desc
1093  *   Pointer to mlx5_flow_rss_desc.
1094  * @param[in] action
1095  *   Action configuration.
1096  */
1097 static void
1098 flow_verbs_translate_action_rss(struct mlx5_flow_rss_desc *rss_desc,
1099                                 const struct rte_flow_action *action)
1100 {
1101         const struct rte_flow_action_rss *rss = action->conf;
1102         const uint8_t *rss_key;
1103
1104         memcpy(rss_desc->queue, rss->queue, rss->queue_num * sizeof(uint16_t));
1105         rss_desc->queue_num = rss->queue_num;
1106         /* NULL RSS key indicates default RSS key. */
1107         rss_key = !rss->key ? rss_hash_default_key : rss->key;
1108         memcpy(rss_desc->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
1109         /*
1110          * rss->level and rss.types should be set in advance when expanding
1111          * items for RSS.
1112          */
1113 }
1114
1115 /**
1116  * Convert the @p action into a Verbs specification. This function assumes that
1117  * the input is valid and that there is space to insert the requested action
1118  * into the flow.
1119  *
1120  * @param[in] dev_flow
1121  *   Pointer to mlx5_flow.
1122  * @param[in] action
1123  *   Action configuration.
1124  */
1125 static void
1126 flow_verbs_translate_action_flag
1127         (struct mlx5_flow *dev_flow,
1128          const struct rte_flow_action *action __rte_unused)
1129 {
1130         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1131         struct ibv_flow_spec_action_tag tag = {
1132                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1133                 .size = size,
1134                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1135         };
1136
1137         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1138 }
1139
1140 /**
1141  * Convert the @p action into a Verbs specification. This function assumes that
1142  * the input is valid and that there is space to insert the requested action
1143  * into the flow.
1144  *
1145  * @param[in] dev_flow
1146  *   Pointer to mlx5_flow.
1147  * @param[in] action
1148  *   Action configuration.
1149  */
1150 static void
1151 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
1152                                  const struct rte_flow_action *action)
1153 {
1154         const struct rte_flow_action_mark *mark = action->conf;
1155         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1156         struct ibv_flow_spec_action_tag tag = {
1157                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1158                 .size = size,
1159                 .tag_id = mlx5_flow_mark_set(mark->id),
1160         };
1161
1162         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1163 }
1164
1165 /**
1166  * Convert the @p action into a Verbs specification. This function assumes that
1167  * the input is valid and that there is space to insert the requested action
1168  * into the flow.
1169  *
1170  * @param[in] dev
1171  *   Pointer to the Ethernet device structure.
1172  * @param[in] action
1173  *   Action configuration.
1174  * @param[in] dev_flow
1175  *   Pointer to mlx5_flow.
1176  * @param[out] error
1177  *   Pointer to error structure.
1178  *
1179  * @return
1180  *   0 On success else a negative errno value is returned and rte_errno is set.
1181  */
1182 static int
1183 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
1184                                   const struct rte_flow_action *action,
1185                                   struct rte_eth_dev *dev,
1186                                   struct rte_flow_error *error)
1187 {
1188         const struct rte_flow_action_count *count = action->conf;
1189         struct rte_flow *flow = dev_flow->flow;
1190 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1191         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1192         struct mlx5_flow_counter_pool *pool;
1193         struct mlx5_flow_counter *cnt = NULL;
1194         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1195         struct ibv_flow_spec_counter_action counter = {
1196                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1197                 .size = size,
1198         };
1199 #endif
1200
1201         if (!flow->counter) {
1202                 flow->counter = flow_verbs_counter_new(dev, count->shared,
1203                                                        count->id);
1204                 if (!flow->counter)
1205                         return rte_flow_error_set(error, rte_errno,
1206                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1207                                                   action,
1208                                                   "cannot get counter"
1209                                                   " context.");
1210         }
1211 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
1212         cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1213         counter.counter_set_handle =
1214                 ((struct ibv_counter_set *)cnt->dcs_when_active)->handle;
1215         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1216 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1217         cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1218         counter.counters = (struct ibv_counters *)cnt->dcs_when_active;
1219         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1220 #endif
1221         return 0;
1222 }
1223
1224 /**
1225  * Internal validation function. For validating both actions and items.
1226  *
1227  * @param[in] dev
1228  *   Pointer to the Ethernet device structure.
1229  * @param[in] attr
1230  *   Pointer to the flow attributes.
1231  * @param[in] items
1232  *   Pointer to the list of items.
1233  * @param[in] actions
1234  *   Pointer to the list of actions.
1235  * @param[in] external
1236  *   This flow rule is created by request external to PMD.
1237  * @param[in] hairpin
1238  *   Number of hairpin TX actions, 0 means classic flow.
1239  * @param[out] error
1240  *   Pointer to the error structure.
1241  *
1242  * @return
1243  *   0 on success, a negative errno value otherwise and rte_errno is set.
1244  */
1245 static int
1246 flow_verbs_validate(struct rte_eth_dev *dev,
1247                     const struct rte_flow_attr *attr,
1248                     const struct rte_flow_item items[],
1249                     const struct rte_flow_action actions[],
1250                     bool external __rte_unused,
1251                     int hairpin __rte_unused,
1252                     struct rte_flow_error *error)
1253 {
1254         int ret;
1255         uint64_t action_flags = 0;
1256         uint64_t item_flags = 0;
1257         uint64_t last_item = 0;
1258         uint8_t next_protocol = 0xff;
1259         uint16_t ether_type = 0;
1260         bool is_empty_vlan = false;
1261
1262         if (items == NULL)
1263                 return -1;
1264         ret = mlx5_flow_validate_attributes(dev, attr, error);
1265         if (ret < 0)
1266                 return ret;
1267         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1268                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1269                 int ret = 0;
1270
1271                 switch (items->type) {
1272                 case RTE_FLOW_ITEM_TYPE_VOID:
1273                         break;
1274                 case RTE_FLOW_ITEM_TYPE_ETH:
1275                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1276                                                           false, error);
1277                         if (ret < 0)
1278                                 return ret;
1279                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1280                                              MLX5_FLOW_LAYER_OUTER_L2;
1281                         if (items->mask != NULL && items->spec != NULL) {
1282                                 ether_type =
1283                                         ((const struct rte_flow_item_eth *)
1284                                          items->spec)->type;
1285                                 ether_type &=
1286                                         ((const struct rte_flow_item_eth *)
1287                                          items->mask)->type;
1288                                 if (ether_type == RTE_BE16(RTE_ETHER_TYPE_VLAN))
1289                                         is_empty_vlan = true;
1290                                 ether_type = rte_be_to_cpu_16(ether_type);
1291                         } else {
1292                                 ether_type = 0;
1293                         }
1294                         break;
1295                 case RTE_FLOW_ITEM_TYPE_VLAN:
1296                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1297                                                            dev, error);
1298                         if (ret < 0)
1299                                 return ret;
1300                         last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1301                                               MLX5_FLOW_LAYER_INNER_VLAN) :
1302                                              (MLX5_FLOW_LAYER_OUTER_L2 |
1303                                               MLX5_FLOW_LAYER_OUTER_VLAN);
1304                         if (items->mask != NULL && items->spec != NULL) {
1305                                 ether_type =
1306                                         ((const struct rte_flow_item_vlan *)
1307                                          items->spec)->inner_type;
1308                                 ether_type &=
1309                                         ((const struct rte_flow_item_vlan *)
1310                                          items->mask)->inner_type;
1311                                 ether_type = rte_be_to_cpu_16(ether_type);
1312                         } else {
1313                                 ether_type = 0;
1314                         }
1315                         is_empty_vlan = false;
1316                         break;
1317                 case RTE_FLOW_ITEM_TYPE_IPV4:
1318                         ret = mlx5_flow_validate_item_ipv4
1319                                                 (items, item_flags,
1320                                                  last_item, ether_type, NULL,
1321                                                  MLX5_ITEM_RANGE_NOT_ACCEPTED,
1322                                                  error);
1323                         if (ret < 0)
1324                                 return ret;
1325                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1326                                              MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1327                         if (items->mask != NULL &&
1328                             ((const struct rte_flow_item_ipv4 *)
1329                              items->mask)->hdr.next_proto_id) {
1330                                 next_protocol =
1331                                         ((const struct rte_flow_item_ipv4 *)
1332                                          (items->spec))->hdr.next_proto_id;
1333                                 next_protocol &=
1334                                         ((const struct rte_flow_item_ipv4 *)
1335                                          (items->mask))->hdr.next_proto_id;
1336                         } else {
1337                                 /* Reset for inner layer. */
1338                                 next_protocol = 0xff;
1339                         }
1340                         break;
1341                 case RTE_FLOW_ITEM_TYPE_IPV6:
1342                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1343                                                            last_item,
1344                                                            ether_type, NULL,
1345                                                            error);
1346                         if (ret < 0)
1347                                 return ret;
1348                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1349                                              MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1350                         if (items->mask != NULL &&
1351                             ((const struct rte_flow_item_ipv6 *)
1352                              items->mask)->hdr.proto) {
1353                                 next_protocol =
1354                                         ((const struct rte_flow_item_ipv6 *)
1355                                          items->spec)->hdr.proto;
1356                                 next_protocol &=
1357                                         ((const struct rte_flow_item_ipv6 *)
1358                                          items->mask)->hdr.proto;
1359                         } else {
1360                                 /* Reset for inner layer. */
1361                                 next_protocol = 0xff;
1362                         }
1363                         break;
1364                 case RTE_FLOW_ITEM_TYPE_UDP:
1365                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1366                                                           next_protocol,
1367                                                           error);
1368                         if (ret < 0)
1369                                 return ret;
1370                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1371                                              MLX5_FLOW_LAYER_OUTER_L4_UDP;
1372                         break;
1373                 case RTE_FLOW_ITEM_TYPE_TCP:
1374                         ret = mlx5_flow_validate_item_tcp
1375                                                 (items, item_flags,
1376                                                  next_protocol,
1377                                                  &rte_flow_item_tcp_mask,
1378                                                  error);
1379                         if (ret < 0)
1380                                 return ret;
1381                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1382                                              MLX5_FLOW_LAYER_OUTER_L4_TCP;
1383                         break;
1384                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1385                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1386                                                             error);
1387                         if (ret < 0)
1388                                 return ret;
1389                         last_item = MLX5_FLOW_LAYER_VXLAN;
1390                         break;
1391                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1392                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
1393                                                                 item_flags,
1394                                                                 dev, error);
1395                         if (ret < 0)
1396                                 return ret;
1397                         last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1398                         break;
1399                 case RTE_FLOW_ITEM_TYPE_GRE:
1400                         ret = mlx5_flow_validate_item_gre(items, item_flags,
1401                                                           next_protocol, error);
1402                         if (ret < 0)
1403                                 return ret;
1404                         last_item = MLX5_FLOW_LAYER_GRE;
1405                         break;
1406                 case RTE_FLOW_ITEM_TYPE_MPLS:
1407                         ret = mlx5_flow_validate_item_mpls(dev, items,
1408                                                            item_flags,
1409                                                            last_item, error);
1410                         if (ret < 0)
1411                                 return ret;
1412                         last_item = MLX5_FLOW_LAYER_MPLS;
1413                         break;
1414                 case RTE_FLOW_ITEM_TYPE_ICMP:
1415                 case RTE_FLOW_ITEM_TYPE_ICMP6:
1416                         return rte_flow_error_set(error, ENOTSUP,
1417                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1418                                                   NULL, "ICMP/ICMP6 "
1419                                                   "item not supported");
1420                 default:
1421                         return rte_flow_error_set(error, ENOTSUP,
1422                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1423                                                   NULL, "item not supported");
1424                 }
1425                 item_flags |= last_item;
1426         }
1427         if (is_empty_vlan)
1428                 return rte_flow_error_set(error, ENOTSUP,
1429                                                  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
1430                     "VLAN matching without vid specification is not supported");
1431         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1432                 switch (actions->type) {
1433                 case RTE_FLOW_ACTION_TYPE_VOID:
1434                         break;
1435                 case RTE_FLOW_ACTION_TYPE_FLAG:
1436                         ret = mlx5_flow_validate_action_flag(action_flags,
1437                                                              attr,
1438                                                              error);
1439                         if (ret < 0)
1440                                 return ret;
1441                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1442                         break;
1443                 case RTE_FLOW_ACTION_TYPE_MARK:
1444                         ret = mlx5_flow_validate_action_mark(actions,
1445                                                              action_flags,
1446                                                              attr,
1447                                                              error);
1448                         if (ret < 0)
1449                                 return ret;
1450                         action_flags |= MLX5_FLOW_ACTION_MARK;
1451                         break;
1452                 case RTE_FLOW_ACTION_TYPE_DROP:
1453                         ret = mlx5_flow_validate_action_drop(action_flags,
1454                                                              attr,
1455                                                              error);
1456                         if (ret < 0)
1457                                 return ret;
1458                         action_flags |= MLX5_FLOW_ACTION_DROP;
1459                         break;
1460                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1461                         ret = mlx5_flow_validate_action_queue(actions,
1462                                                               action_flags, dev,
1463                                                               attr,
1464                                                               error);
1465                         if (ret < 0)
1466                                 return ret;
1467                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1468                         break;
1469                 case RTE_FLOW_ACTION_TYPE_RSS:
1470                         ret = mlx5_flow_validate_action_rss(actions,
1471                                                             action_flags, dev,
1472                                                             attr, item_flags,
1473                                                             error);
1474                         if (ret < 0)
1475                                 return ret;
1476                         action_flags |= MLX5_FLOW_ACTION_RSS;
1477                         break;
1478                 case RTE_FLOW_ACTION_TYPE_COUNT:
1479                         ret = mlx5_flow_validate_action_count(dev, attr, error);
1480                         if (ret < 0)
1481                                 return ret;
1482                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1483                         break;
1484                 default:
1485                         return rte_flow_error_set(error, ENOTSUP,
1486                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1487                                                   actions,
1488                                                   "action not supported");
1489                 }
1490         }
1491         /*
1492          * Validate the drop action mutual exclusion with other actions.
1493          * Drop action is mutually-exclusive with any other action, except for
1494          * Count action.
1495          */
1496         if ((action_flags & MLX5_FLOW_ACTION_DROP) &&
1497             (action_flags & ~(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_COUNT)))
1498                 return rte_flow_error_set(error, EINVAL,
1499                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1500                                           "Drop action is mutually-exclusive "
1501                                           "with any other action, except for "
1502                                           "Count action");
1503         if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1504                 return rte_flow_error_set(error, EINVAL,
1505                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1506                                           "no fate action is found");
1507         return 0;
1508 }
1509
1510 /**
1511  * Calculate the required bytes that are needed for the action part of the verbs
1512  * flow.
1513  *
1514  * @param[in] actions
1515  *   Pointer to the list of actions.
1516  *
1517  * @return
1518  *   The size of the memory needed for all actions.
1519  */
1520 static int
1521 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1522 {
1523         int size = 0;
1524
1525         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1526                 switch (actions->type) {
1527                 case RTE_FLOW_ACTION_TYPE_VOID:
1528                         break;
1529                 case RTE_FLOW_ACTION_TYPE_FLAG:
1530                         size += sizeof(struct ibv_flow_spec_action_tag);
1531                         break;
1532                 case RTE_FLOW_ACTION_TYPE_MARK:
1533                         size += sizeof(struct ibv_flow_spec_action_tag);
1534                         break;
1535                 case RTE_FLOW_ACTION_TYPE_DROP:
1536                         size += sizeof(struct ibv_flow_spec_action_drop);
1537                         break;
1538                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1539                         break;
1540                 case RTE_FLOW_ACTION_TYPE_RSS:
1541                         break;
1542                 case RTE_FLOW_ACTION_TYPE_COUNT:
1543 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1544         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1545                         size += sizeof(struct ibv_flow_spec_counter_action);
1546 #endif
1547                         break;
1548                 default:
1549                         break;
1550                 }
1551         }
1552         return size;
1553 }
1554
1555 /**
1556  * Calculate the required bytes that are needed for the item part of the verbs
1557  * flow.
1558  *
1559  * @param[in] items
1560  *   Pointer to the list of items.
1561  *
1562  * @return
1563  *   The size of the memory needed for all items.
1564  */
1565 static int
1566 flow_verbs_get_items_size(const struct rte_flow_item items[])
1567 {
1568         int size = 0;
1569
1570         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1571                 switch (items->type) {
1572                 case RTE_FLOW_ITEM_TYPE_VOID:
1573                         break;
1574                 case RTE_FLOW_ITEM_TYPE_ETH:
1575                         size += sizeof(struct ibv_flow_spec_eth);
1576                         break;
1577                 case RTE_FLOW_ITEM_TYPE_VLAN:
1578                         size += sizeof(struct ibv_flow_spec_eth);
1579                         break;
1580                 case RTE_FLOW_ITEM_TYPE_IPV4:
1581                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
1582                         break;
1583                 case RTE_FLOW_ITEM_TYPE_IPV6:
1584                         size += sizeof(struct ibv_flow_spec_ipv6);
1585                         break;
1586                 case RTE_FLOW_ITEM_TYPE_UDP:
1587                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1588                         break;
1589                 case RTE_FLOW_ITEM_TYPE_TCP:
1590                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1591                         break;
1592                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1593                         size += sizeof(struct ibv_flow_spec_tunnel);
1594                         break;
1595                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1596                         size += sizeof(struct ibv_flow_spec_tunnel);
1597                         break;
1598 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1599                 case RTE_FLOW_ITEM_TYPE_GRE:
1600                         size += sizeof(struct ibv_flow_spec_gre);
1601                         break;
1602                 case RTE_FLOW_ITEM_TYPE_MPLS:
1603                         size += sizeof(struct ibv_flow_spec_mpls);
1604                         break;
1605 #else
1606                 case RTE_FLOW_ITEM_TYPE_GRE:
1607                         size += sizeof(struct ibv_flow_spec_tunnel);
1608                         break;
1609 #endif
1610                 default:
1611                         break;
1612                 }
1613         }
1614         return size;
1615 }
1616
1617 /**
1618  * Internal preparation function. Allocate mlx5_flow with the required size.
1619  * The required size is calculate based on the actions and items. This function
1620  * also returns the detected actions and items for later use.
1621  *
1622  * @param[in] dev
1623  *   Pointer to Ethernet device.
1624  * @param[in] attr
1625  *   Pointer to the flow attributes.
1626  * @param[in] items
1627  *   Pointer to the list of items.
1628  * @param[in] actions
1629  *   Pointer to the list of actions.
1630  * @param[out] error
1631  *   Pointer to the error structure.
1632  *
1633  * @return
1634  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1635  *   is set.
1636  */
1637 static struct mlx5_flow *
1638 flow_verbs_prepare(struct rte_eth_dev *dev,
1639                    const struct rte_flow_attr *attr __rte_unused,
1640                    const struct rte_flow_item items[],
1641                    const struct rte_flow_action actions[],
1642                    struct rte_flow_error *error)
1643 {
1644         size_t size = 0;
1645         uint32_t handle_idx = 0;
1646         struct mlx5_flow *dev_flow;
1647         struct mlx5_flow_handle *dev_handle;
1648         struct mlx5_priv *priv = dev->data->dev_private;
1649         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
1650
1651         MLX5_ASSERT(wks);
1652         size += flow_verbs_get_actions_size(actions);
1653         size += flow_verbs_get_items_size(items);
1654         if (size > MLX5_VERBS_MAX_SPEC_ACT_SIZE) {
1655                 rte_flow_error_set(error, E2BIG,
1656                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1657                                    "Verbs spec/action size too large");
1658                 return NULL;
1659         }
1660         /* In case of corrupting the memory. */
1661         if (wks->flow_idx >= MLX5_NUM_MAX_DEV_FLOWS) {
1662                 rte_flow_error_set(error, ENOSPC,
1663                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1664                                    "not free temporary device flow");
1665                 return NULL;
1666         }
1667         dev_handle = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1668                                    &handle_idx);
1669         if (!dev_handle) {
1670                 rte_flow_error_set(error, ENOMEM,
1671                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1672                                    "not enough memory to create flow handle");
1673                 return NULL;
1674         }
1675         MLX5_ASSERT(wks->flow_idx + 1 < RTE_DIM(wks->flows));
1676         dev_flow = &wks->flows[wks->flow_idx++];
1677         dev_flow->handle = dev_handle;
1678         dev_flow->handle_idx = handle_idx;
1679         /* Memcpy is used, only size needs to be cleared to 0. */
1680         dev_flow->verbs.size = 0;
1681         dev_flow->verbs.attr.num_of_specs = 0;
1682         dev_flow->ingress = attr->ingress;
1683         dev_flow->hash_fields = 0;
1684         /* Need to set transfer attribute: not supported in Verbs mode. */
1685         return dev_flow;
1686 }
1687
1688 /**
1689  * Fill the flow with verb spec.
1690  *
1691  * @param[in] dev
1692  *   Pointer to Ethernet device.
1693  * @param[in, out] dev_flow
1694  *   Pointer to the mlx5 flow.
1695  * @param[in] attr
1696  *   Pointer to the flow attributes.
1697  * @param[in] items
1698  *   Pointer to the list of items.
1699  * @param[in] actions
1700  *   Pointer to the list of actions.
1701  * @param[out] error
1702  *   Pointer to the error structure.
1703  *
1704  * @return
1705  *   0 on success, else a negative errno value otherwise and rte_errno is set.
1706  */
1707 static int
1708 flow_verbs_translate(struct rte_eth_dev *dev,
1709                      struct mlx5_flow *dev_flow,
1710                      const struct rte_flow_attr *attr,
1711                      const struct rte_flow_item items[],
1712                      const struct rte_flow_action actions[],
1713                      struct rte_flow_error *error)
1714 {
1715         uint64_t item_flags = 0;
1716         uint64_t action_flags = 0;
1717         uint64_t priority = attr->priority;
1718         uint32_t subpriority = 0;
1719         struct mlx5_priv *priv = dev->data->dev_private;
1720         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
1721         struct mlx5_flow_rss_desc *rss_desc;
1722
1723         MLX5_ASSERT(wks);
1724         rss_desc = &wks->rss_desc;
1725         if (priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1726                 priority = priv->config.flow_prio - 1;
1727         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1728                 int ret;
1729
1730                 switch (actions->type) {
1731                 case RTE_FLOW_ACTION_TYPE_VOID:
1732                         break;
1733                 case RTE_FLOW_ACTION_TYPE_FLAG:
1734                         flow_verbs_translate_action_flag(dev_flow, actions);
1735                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1736                         dev_flow->handle->mark = 1;
1737                         break;
1738                 case RTE_FLOW_ACTION_TYPE_MARK:
1739                         flow_verbs_translate_action_mark(dev_flow, actions);
1740                         action_flags |= MLX5_FLOW_ACTION_MARK;
1741                         dev_flow->handle->mark = 1;
1742                         break;
1743                 case RTE_FLOW_ACTION_TYPE_DROP:
1744                         flow_verbs_translate_action_drop(dev_flow, actions);
1745                         action_flags |= MLX5_FLOW_ACTION_DROP;
1746                         dev_flow->handle->fate_action = MLX5_FLOW_FATE_DROP;
1747                         break;
1748                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1749                         flow_verbs_translate_action_queue(rss_desc, actions);
1750                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1751                         dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1752                         break;
1753                 case RTE_FLOW_ACTION_TYPE_RSS:
1754                         flow_verbs_translate_action_rss(rss_desc, actions);
1755                         action_flags |= MLX5_FLOW_ACTION_RSS;
1756                         dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1757                         break;
1758                 case RTE_FLOW_ACTION_TYPE_COUNT:
1759                         ret = flow_verbs_translate_action_count(dev_flow,
1760                                                                 actions,
1761                                                                 dev, error);
1762                         if (ret < 0)
1763                                 return ret;
1764                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1765                         break;
1766                 default:
1767                         return rte_flow_error_set(error, ENOTSUP,
1768                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1769                                                   actions,
1770                                                   "action not supported");
1771                 }
1772         }
1773         dev_flow->act_flags = action_flags;
1774         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1775                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1776
1777                 switch (items->type) {
1778                 case RTE_FLOW_ITEM_TYPE_VOID:
1779                         break;
1780                 case RTE_FLOW_ITEM_TYPE_ETH:
1781                         flow_verbs_translate_item_eth(dev_flow, items,
1782                                                       item_flags);
1783                         subpriority = MLX5_PRIORITY_MAP_L2;
1784                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1785                                                MLX5_FLOW_LAYER_OUTER_L2;
1786                         break;
1787                 case RTE_FLOW_ITEM_TYPE_VLAN:
1788                         flow_verbs_translate_item_vlan(dev_flow, items,
1789                                                        item_flags);
1790                         subpriority = MLX5_PRIORITY_MAP_L2;
1791                         item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1792                                                 MLX5_FLOW_LAYER_INNER_VLAN) :
1793                                                (MLX5_FLOW_LAYER_OUTER_L2 |
1794                                                 MLX5_FLOW_LAYER_OUTER_VLAN);
1795                         break;
1796                 case RTE_FLOW_ITEM_TYPE_IPV4:
1797                         flow_verbs_translate_item_ipv4(dev_flow, items,
1798                                                        item_flags);
1799                         subpriority = MLX5_PRIORITY_MAP_L3;
1800                         dev_flow->hash_fields |=
1801                                 mlx5_flow_hashfields_adjust
1802                                         (rss_desc, tunnel,
1803                                          MLX5_IPV4_LAYER_TYPES,
1804                                          MLX5_IPV4_IBV_RX_HASH);
1805                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1806                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1807                         break;
1808                 case RTE_FLOW_ITEM_TYPE_IPV6:
1809                         flow_verbs_translate_item_ipv6(dev_flow, items,
1810                                                        item_flags);
1811                         subpriority = MLX5_PRIORITY_MAP_L3;
1812                         dev_flow->hash_fields |=
1813                                 mlx5_flow_hashfields_adjust
1814                                         (rss_desc, tunnel,
1815                                          MLX5_IPV6_LAYER_TYPES,
1816                                          MLX5_IPV6_IBV_RX_HASH);
1817                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1818                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1819                         break;
1820                 case RTE_FLOW_ITEM_TYPE_TCP:
1821                         flow_verbs_translate_item_tcp(dev_flow, items,
1822                                                       item_flags);
1823                         subpriority = MLX5_PRIORITY_MAP_L4;
1824                         dev_flow->hash_fields |=
1825                                 mlx5_flow_hashfields_adjust
1826                                         (rss_desc, tunnel, ETH_RSS_TCP,
1827                                          (IBV_RX_HASH_SRC_PORT_TCP |
1828                                           IBV_RX_HASH_DST_PORT_TCP));
1829                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1830                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1831                         break;
1832                 case RTE_FLOW_ITEM_TYPE_UDP:
1833                         flow_verbs_translate_item_udp(dev_flow, items,
1834                                                       item_flags);
1835                         subpriority = MLX5_PRIORITY_MAP_L4;
1836                         dev_flow->hash_fields |=
1837                                 mlx5_flow_hashfields_adjust
1838                                         (rss_desc, tunnel, ETH_RSS_UDP,
1839                                          (IBV_RX_HASH_SRC_PORT_UDP |
1840                                           IBV_RX_HASH_DST_PORT_UDP));
1841                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1842                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1843                         break;
1844                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1845                         flow_verbs_translate_item_vxlan(dev_flow, items,
1846                                                         item_flags);
1847                         subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1848                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1849                         break;
1850                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1851                         flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1852                                                             item_flags);
1853                         subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1854                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1855                         break;
1856                 case RTE_FLOW_ITEM_TYPE_GRE:
1857                         flow_verbs_translate_item_gre(dev_flow, items,
1858                                                       item_flags);
1859                         subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1860                         item_flags |= MLX5_FLOW_LAYER_GRE;
1861                         break;
1862                 case RTE_FLOW_ITEM_TYPE_MPLS:
1863                         flow_verbs_translate_item_mpls(dev_flow, items,
1864                                                        item_flags);
1865                         subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1866                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1867                         break;
1868                 default:
1869                         return rte_flow_error_set(error, ENOTSUP,
1870                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1871                                                   NULL, "item not supported");
1872                 }
1873         }
1874         dev_flow->handle->layers = item_flags;
1875         /* Other members of attr will be ignored. */
1876         dev_flow->verbs.attr.priority =
1877                 mlx5_flow_adjust_priority(dev, priority, subpriority);
1878         dev_flow->verbs.attr.port = (uint8_t)priv->dev_port;
1879         return 0;
1880 }
1881
1882 /**
1883  * Remove the flow from the NIC but keeps it in memory.
1884  *
1885  * @param[in] dev
1886  *   Pointer to the Ethernet device structure.
1887  * @param[in, out] flow
1888  *   Pointer to flow structure.
1889  */
1890 static void
1891 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1892 {
1893         struct mlx5_priv *priv = dev->data->dev_private;
1894         struct mlx5_flow_handle *handle;
1895         uint32_t handle_idx;
1896
1897         if (!flow)
1898                 return;
1899         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1900                        handle_idx, handle, next) {
1901                 if (handle->drv_flow) {
1902                         claim_zero(mlx5_glue->destroy_flow(handle->drv_flow));
1903                         handle->drv_flow = NULL;
1904                 }
1905                 /* hrxq is union, don't touch it only the flag is set. */
1906                 if (handle->rix_hrxq &&
1907                     handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1908                         mlx5_hrxq_release(dev, handle->rix_hrxq);
1909                         handle->rix_hrxq = 0;
1910                 }
1911                 if (handle->vf_vlan.tag && handle->vf_vlan.created)
1912                         mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
1913         }
1914 }
1915
1916 /**
1917  * Remove the flow from the NIC and the memory.
1918  *
1919  * @param[in] dev
1920  *   Pointer to the Ethernet device structure.
1921  * @param[in, out] flow
1922  *   Pointer to flow structure.
1923  */
1924 static void
1925 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1926 {
1927         struct mlx5_priv *priv = dev->data->dev_private;
1928         struct mlx5_flow_handle *handle;
1929
1930         if (!flow)
1931                 return;
1932         flow_verbs_remove(dev, flow);
1933         while (flow->dev_handles) {
1934                 uint32_t tmp_idx = flow->dev_handles;
1935
1936                 handle = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1937                                    tmp_idx);
1938                 if (!handle)
1939                         return;
1940                 flow->dev_handles = handle->next.next;
1941                 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1942                            tmp_idx);
1943         }
1944         if (flow->counter) {
1945                 flow_verbs_counter_release(dev, flow->counter);
1946                 flow->counter = 0;
1947         }
1948 }
1949
1950 /**
1951  * Apply the flow to the NIC.
1952  *
1953  * @param[in] dev
1954  *   Pointer to the Ethernet device structure.
1955  * @param[in, out] flow
1956  *   Pointer to flow structure.
1957  * @param[out] error
1958  *   Pointer to error structure.
1959  *
1960  * @return
1961  *   0 on success, a negative errno value otherwise and rte_errno is set.
1962  */
1963 static int
1964 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1965                  struct rte_flow_error *error)
1966 {
1967         struct mlx5_priv *priv = dev->data->dev_private;
1968         struct mlx5_flow_handle *handle;
1969         struct mlx5_flow *dev_flow;
1970         struct mlx5_hrxq *hrxq;
1971         uint32_t dev_handles;
1972         int err;
1973         int idx;
1974         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
1975
1976         MLX5_ASSERT(wks);
1977         for (idx = wks->flow_idx - 1; idx >= 0; idx--) {
1978                 dev_flow = &wks->flows[idx];
1979                 handle = dev_flow->handle;
1980                 if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1981                         MLX5_ASSERT(priv->drop_queue.hrxq);
1982                         hrxq = priv->drop_queue.hrxq;
1983                 } else {
1984                         uint32_t hrxq_idx;
1985                         struct mlx5_flow_rss_desc *rss_desc = &wks->rss_desc;
1986
1987                         MLX5_ASSERT(rss_desc->queue_num);
1988                         rss_desc->key_len = MLX5_RSS_HASH_KEY_LEN;
1989                         rss_desc->hash_fields = dev_flow->hash_fields;
1990                         rss_desc->tunnel = !!(handle->layers &
1991                                               MLX5_FLOW_LAYER_TUNNEL);
1992                         rss_desc->shared_rss = 0;
1993                         hrxq_idx = mlx5_hrxq_get(dev, rss_desc);
1994                         hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1995                                               hrxq_idx);
1996                         if (!hrxq) {
1997                                 rte_flow_error_set
1998                                         (error, rte_errno,
1999                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2000                                          "cannot get hash queue");
2001                                 goto error;
2002                         }
2003                         handle->rix_hrxq = hrxq_idx;
2004                 }
2005                 MLX5_ASSERT(hrxq);
2006                 handle->drv_flow = mlx5_glue->create_flow
2007                                         (hrxq->qp, &dev_flow->verbs.attr);
2008                 if (!handle->drv_flow) {
2009                         rte_flow_error_set(error, errno,
2010                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2011                                            NULL,
2012                                            "hardware refuses to create flow");
2013                         goto error;
2014                 }
2015                 if (priv->vmwa_context &&
2016                     handle->vf_vlan.tag && !handle->vf_vlan.created) {
2017                         /*
2018                          * The rule contains the VLAN pattern.
2019                          * For VF we are going to create VLAN
2020                          * interface to make hypervisor set correct
2021                          * e-Switch vport context.
2022                          */
2023                         mlx5_vlan_vmwa_acquire(dev, &handle->vf_vlan);
2024                 }
2025         }
2026         return 0;
2027 error:
2028         err = rte_errno; /* Save rte_errno before cleanup. */
2029         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
2030                        dev_handles, handle, next) {
2031                 /* hrxq is union, don't touch it only the flag is set. */
2032                 if (handle->rix_hrxq &&
2033                     handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
2034                         mlx5_hrxq_release(dev, handle->rix_hrxq);
2035                         handle->rix_hrxq = 0;
2036                 }
2037                 if (handle->vf_vlan.tag && handle->vf_vlan.created)
2038                         mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
2039         }
2040         rte_errno = err; /* Restore rte_errno. */
2041         return -rte_errno;
2042 }
2043
2044 /**
2045  * Query a flow.
2046  *
2047  * @see rte_flow_query()
2048  * @see rte_flow_ops
2049  */
2050 static int
2051 flow_verbs_query(struct rte_eth_dev *dev,
2052                  struct rte_flow *flow,
2053                  const struct rte_flow_action *actions,
2054                  void *data,
2055                  struct rte_flow_error *error)
2056 {
2057         int ret = -EINVAL;
2058
2059         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2060                 switch (actions->type) {
2061                 case RTE_FLOW_ACTION_TYPE_VOID:
2062                         break;
2063                 case RTE_FLOW_ACTION_TYPE_COUNT:
2064                         ret = flow_verbs_counter_query(dev, flow, data, error);
2065                         break;
2066                 default:
2067                         return rte_flow_error_set(error, ENOTSUP,
2068                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2069                                                   actions,
2070                                                   "action not supported");
2071                 }
2072         }
2073         return ret;
2074 }
2075
2076 static int
2077 flow_verbs_sync_domain(struct rte_eth_dev *dev, uint32_t domains,
2078                        uint32_t flags)
2079 {
2080         RTE_SET_USED(dev);
2081         RTE_SET_USED(domains);
2082         RTE_SET_USED(flags);
2083
2084         return 0;
2085 }
2086
2087 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
2088         .validate = flow_verbs_validate,
2089         .prepare = flow_verbs_prepare,
2090         .translate = flow_verbs_translate,
2091         .apply = flow_verbs_apply,
2092         .remove = flow_verbs_remove,
2093         .destroy = flow_verbs_destroy,
2094         .query = flow_verbs_query,
2095         .sync_domain = flow_verbs_sync_domain,
2096 };