net/mlx5: add flow sync API
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 Mellanox Technologies, Ltd
3  */
4
5 #include <netinet/in.h>
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 #include <rte_common.h>
12 #include <rte_ether.h>
13 #include <rte_ethdev_driver.h>
14 #include <rte_flow.h>
15 #include <rte_flow_driver.h>
16 #include <rte_malloc.h>
17 #include <rte_ip.h>
18
19 #include <mlx5_glue.h>
20 #include <mlx5_prm.h>
21 #include <mlx5_malloc.h>
22
23 #include "mlx5_defs.h"
24 #include "mlx5.h"
25 #include "mlx5_flow.h"
26 #include "mlx5_rxtx.h"
27
28 #define VERBS_SPEC_INNER(item_flags) \
29         (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
30
31 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
32 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
33         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
34 };
35
36 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
37 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
38         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
39         { 9, 10, 11 }, { 12, 13, 14 },
40 };
41
42 /**
43  * Discover the maximum number of priority available.
44  *
45  * @param[in] dev
46  *   Pointer to the Ethernet device structure.
47  *
48  * @return
49  *   number of supported flow priority on success, a negative errno
50  *   value otherwise and rte_errno is set.
51  */
52 int
53 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
54 {
55         struct mlx5_priv *priv = dev->data->dev_private;
56         struct {
57                 struct ibv_flow_attr attr;
58                 struct ibv_flow_spec_eth eth;
59                 struct ibv_flow_spec_action_drop drop;
60         } flow_attr = {
61                 .attr = {
62                         .num_of_specs = 2,
63                         .port = (uint8_t)priv->dev_port,
64                 },
65                 .eth = {
66                         .type = IBV_FLOW_SPEC_ETH,
67                         .size = sizeof(struct ibv_flow_spec_eth),
68                 },
69                 .drop = {
70                         .size = sizeof(struct ibv_flow_spec_action_drop),
71                         .type = IBV_FLOW_SPEC_ACTION_DROP,
72                 },
73         };
74         struct ibv_flow *flow;
75         struct mlx5_hrxq *drop = mlx5_drop_action_create(dev);
76         uint16_t vprio[] = { 8, 16 };
77         int i;
78         int priority = 0;
79
80         if (!drop) {
81                 rte_errno = ENOTSUP;
82                 return -rte_errno;
83         }
84         for (i = 0; i != RTE_DIM(vprio); i++) {
85                 flow_attr.attr.priority = vprio[i] - 1;
86                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
87                 if (!flow)
88                         break;
89                 claim_zero(mlx5_glue->destroy_flow(flow));
90                 priority = vprio[i];
91         }
92         mlx5_drop_action_destroy(dev);
93         switch (priority) {
94         case 8:
95                 priority = RTE_DIM(priority_map_3);
96                 break;
97         case 16:
98                 priority = RTE_DIM(priority_map_5);
99                 break;
100         default:
101                 rte_errno = ENOTSUP;
102                 DRV_LOG(ERR,
103                         "port %u verbs maximum priority: %d expected 8/16",
104                         dev->data->port_id, priority);
105                 return -rte_errno;
106         }
107         DRV_LOG(INFO, "port %u flow maximum priority: %d",
108                 dev->data->port_id, priority);
109         return priority;
110 }
111
112 /**
113  * Adjust flow priority based on the highest layer and the request priority.
114  *
115  * @param[in] dev
116  *   Pointer to the Ethernet device structure.
117  * @param[in] priority
118  *   The rule base priority.
119  * @param[in] subpriority
120  *   The priority based on the items.
121  *
122  * @return
123  *   The new priority.
124  */
125 uint32_t
126 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
127                                    uint32_t subpriority)
128 {
129         uint32_t res = 0;
130         struct mlx5_priv *priv = dev->data->dev_private;
131
132         switch (priv->config.flow_prio) {
133         case RTE_DIM(priority_map_3):
134                 res = priority_map_3[priority][subpriority];
135                 break;
136         case RTE_DIM(priority_map_5):
137                 res = priority_map_5[priority][subpriority];
138                 break;
139         }
140         return  res;
141 }
142
143 /**
144  * Get Verbs flow counter by index.
145  *
146  * @param[in] dev
147  *   Pointer to the Ethernet device structure.
148  * @param[in] idx
149  *   mlx5 flow counter index in the container.
150  * @param[out] ppool
151  *   mlx5 flow counter pool in the container,
152  *
153  * @return
154  *   A pointer to the counter, NULL otherwise.
155  */
156 static struct mlx5_flow_counter *
157 flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev,
158                               uint32_t idx,
159                               struct mlx5_flow_counter_pool **ppool)
160 {
161         struct mlx5_priv *priv = dev->data->dev_private;
162         struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
163         struct mlx5_flow_counter_pool *pool;
164
165         idx = (idx - 1) & (MLX5_CNT_SHARED_OFFSET - 1);
166         pool = cmng->pools[idx / MLX5_COUNTERS_PER_POOL];
167         MLX5_ASSERT(pool);
168         if (ppool)
169                 *ppool = pool;
170         return MLX5_POOL_GET_CNT(pool, idx % MLX5_COUNTERS_PER_POOL);
171 }
172
173 /**
174  * Create Verbs flow counter with Verbs library.
175  *
176  * @param[in] dev
177  *   Pointer to the Ethernet device structure.
178  * @param[in, out] counter
179  *   mlx5 flow counter object, contains the counter id,
180  *   handle of created Verbs flow counter is returned
181  *   in cs field (if counters are supported).
182  *
183  * @return
184  *   0 On success else a negative errno value is returned
185  *   and rte_errno is set.
186  */
187 static int
188 flow_verbs_counter_create(struct rte_eth_dev *dev,
189                           struct mlx5_flow_counter *counter)
190 {
191 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
192         struct mlx5_priv *priv = dev->data->dev_private;
193         struct ibv_context *ctx = priv->sh->ctx;
194         struct ibv_counter_set_init_attr init = {
195                          .counter_set_id = counter->shared_info.id};
196
197         counter->dcs_when_free = mlx5_glue->create_counter_set(ctx, &init);
198         if (!counter->dcs_when_free) {
199                 rte_errno = ENOTSUP;
200                 return -ENOTSUP;
201         }
202         return 0;
203 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
204         struct mlx5_priv *priv = dev->data->dev_private;
205         struct ibv_context *ctx = priv->sh->ctx;
206         struct ibv_counters_init_attr init = {0};
207         struct ibv_counter_attach_attr attach;
208         int ret;
209
210         memset(&attach, 0, sizeof(attach));
211         counter->dcs_when_free = mlx5_glue->create_counters(ctx, &init);
212         if (!counter->dcs_when_free) {
213                 rte_errno = ENOTSUP;
214                 return -ENOTSUP;
215         }
216         attach.counter_desc = IBV_COUNTER_PACKETS;
217         attach.index = 0;
218         ret = mlx5_glue->attach_counters(counter->dcs_when_free, &attach, NULL);
219         if (!ret) {
220                 attach.counter_desc = IBV_COUNTER_BYTES;
221                 attach.index = 1;
222                 ret = mlx5_glue->attach_counters
223                                         (counter->dcs_when_free, &attach, NULL);
224         }
225         if (ret) {
226                 claim_zero(mlx5_glue->destroy_counters(counter->dcs_when_free));
227                 counter->dcs_when_free = NULL;
228                 rte_errno = ret;
229                 return -ret;
230         }
231         return 0;
232 #else
233         (void)dev;
234         (void)counter;
235         rte_errno = ENOTSUP;
236         return -ENOTSUP;
237 #endif
238 }
239
240 /**
241  * Get a flow counter.
242  *
243  * @param[in] dev
244  *   Pointer to the Ethernet device structure.
245  * @param[in] shared
246  *   Indicate if this counter is shared with other flows.
247  * @param[in] id
248  *   Counter identifier.
249  *
250  * @return
251  *   Index to the counter, 0 otherwise and rte_errno is set.
252  */
253 static uint32_t
254 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
255 {
256         struct mlx5_priv *priv = dev->data->dev_private;
257         struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
258         struct mlx5_flow_counter_pool *pool = NULL;
259         struct mlx5_flow_counter *cnt = NULL;
260         union mlx5_l3t_data data;
261         uint32_t n_valid = cmng->n_valid;
262         uint32_t pool_idx, cnt_idx;
263         uint32_t i;
264         int ret;
265
266         if (shared && !mlx5_l3t_get_entry(priv->sh->cnt_id_tbl, id, &data) &&
267             data.dword)
268                 return data.dword;
269         for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) {
270                 pool = cmng->pools[pool_idx];
271                 if (!pool)
272                         continue;
273                 cnt = TAILQ_FIRST(&pool->counters[0]);
274                 if (cnt)
275                         break;
276         }
277         if (!cnt) {
278                 struct mlx5_flow_counter_pool **pools;
279                 uint32_t size;
280
281                 if (n_valid == cmng->n) {
282                         /* Resize the container pool array. */
283                         size = sizeof(struct mlx5_flow_counter_pool *) *
284                                      (n_valid + MLX5_CNT_CONTAINER_RESIZE);
285                         pools = mlx5_malloc(MLX5_MEM_ZERO, size, 0,
286                                             SOCKET_ID_ANY);
287                         if (!pools)
288                                 return 0;
289                         if (n_valid) {
290                                 memcpy(pools, cmng->pools,
291                                        sizeof(struct mlx5_flow_counter_pool *) *
292                                        n_valid);
293                                 mlx5_free(cmng->pools);
294                         }
295                         cmng->pools = pools;
296                         cmng->n += MLX5_CNT_CONTAINER_RESIZE;
297                 }
298                 /* Allocate memory for new pool*/
299                 size = sizeof(*pool) + sizeof(*cnt) * MLX5_COUNTERS_PER_POOL;
300                 pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
301                 if (!pool)
302                         return 0;
303                 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
304                         cnt = MLX5_POOL_GET_CNT(pool, i);
305                         TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
306                 }
307                 cnt = MLX5_POOL_GET_CNT(pool, 0);
308                 cmng->pools[n_valid] = pool;
309                 pool_idx = n_valid;
310                 cmng->n_valid++;
311         }
312         TAILQ_REMOVE(&pool->counters[0], cnt, next);
313         i = MLX5_CNT_ARRAY_IDX(pool, cnt);
314         cnt_idx = MLX5_MAKE_CNT_IDX(pool_idx, i);
315         if (shared) {
316                 data.dword = cnt_idx;
317                 if (mlx5_l3t_set_entry(priv->sh->cnt_id_tbl, id, &data))
318                         return 0;
319                 cnt->shared_info.id = id;
320                 cnt_idx |= MLX5_CNT_SHARED_OFFSET;
321         }
322         /* Create counter with Verbs. */
323         ret = flow_verbs_counter_create(dev, cnt);
324         if (!ret) {
325                 cnt->dcs_when_active = cnt->dcs_when_free;
326                 cnt->hits = 0;
327                 cnt->bytes = 0;
328                 return cnt_idx;
329         }
330         TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
331         /* Some error occurred in Verbs library. */
332         rte_errno = -ret;
333         return 0;
334 }
335
336 /**
337  * Release a flow counter.
338  *
339  * @param[in] dev
340  *   Pointer to the Ethernet device structure.
341  * @param[in] counter
342  *   Index to the counter handler.
343  */
344 static void
345 flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter)
346 {
347         struct mlx5_priv *priv = dev->data->dev_private;
348         struct mlx5_flow_counter_pool *pool;
349         struct mlx5_flow_counter *cnt;
350
351         cnt = flow_verbs_counter_get_by_idx(dev, counter, &pool);
352         if (IS_SHARED_CNT(counter) &&
353             mlx5_l3t_clear_entry(priv->sh->cnt_id_tbl, cnt->shared_info.id))
354                 return;
355 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
356         claim_zero(mlx5_glue->destroy_counter_set
357                         ((struct ibv_counter_set *)cnt->dcs_when_active));
358 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
359         claim_zero(mlx5_glue->destroy_counters
360                                 ((struct ibv_counters *)cnt->dcs_when_active));
361 #endif
362         TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
363 }
364
365 /**
366  * Query a flow counter via Verbs library call.
367  *
368  * @see rte_flow_query()
369  * @see rte_flow_ops
370  */
371 static int
372 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
373                          struct rte_flow *flow, void *data,
374                          struct rte_flow_error *error)
375 {
376 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
377         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
378         if (flow->counter) {
379                 struct mlx5_flow_counter_pool *pool;
380                 struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx
381                                                 (dev, flow->counter, &pool);
382                 struct rte_flow_query_count *qc = data;
383                 uint64_t counters[2] = {0, 0};
384 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
385                 struct ibv_query_counter_set_attr query_cs_attr = {
386                         .dcs_when_free = (struct ibv_counter_set *)
387                                                 cnt->dcs_when_active,
388                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
389                 };
390                 struct ibv_counter_set_data query_out = {
391                         .out = counters,
392                         .outlen = 2 * sizeof(uint64_t),
393                 };
394                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
395                                                        &query_out);
396 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
397                 int err = mlx5_glue->query_counters
398                         ((struct ibv_counters *)cnt->dcs_when_active, counters,
399                                 RTE_DIM(counters),
400                                 IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
401 #endif
402                 if (err)
403                         return rte_flow_error_set
404                                 (error, err,
405                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
406                                  NULL,
407                                  "cannot read counter");
408                 qc->hits_set = 1;
409                 qc->bytes_set = 1;
410                 qc->hits = counters[0] - cnt->hits;
411                 qc->bytes = counters[1] - cnt->bytes;
412                 if (qc->reset) {
413                         cnt->hits = counters[0];
414                         cnt->bytes = counters[1];
415                 }
416                 return 0;
417         }
418         return rte_flow_error_set(error, EINVAL,
419                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
420                                   NULL,
421                                   "flow does not have counter");
422 #else
423         (void)flow;
424         (void)data;
425         return rte_flow_error_set(error, ENOTSUP,
426                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
427                                   NULL,
428                                   "counters are not available");
429 #endif
430 }
431
432 /**
433  * Add a verbs item specification into @p verbs.
434  *
435  * @param[out] verbs
436  *   Pointer to verbs structure.
437  * @param[in] src
438  *   Create specification.
439  * @param[in] size
440  *   Size in bytes of the specification to copy.
441  */
442 static void
443 flow_verbs_spec_add(struct mlx5_flow_verbs_workspace *verbs,
444                     void *src, unsigned int size)
445 {
446         void *dst;
447
448         if (!verbs)
449                 return;
450         MLX5_ASSERT(verbs->specs);
451         dst = (void *)(verbs->specs + verbs->size);
452         memcpy(dst, src, size);
453         ++verbs->attr.num_of_specs;
454         verbs->size += size;
455 }
456
457 /**
458  * Convert the @p item into a Verbs specification. This function assumes that
459  * the input is valid and that there is space to insert the requested item
460  * into the flow.
461  *
462  * @param[in, out] dev_flow
463  *   Pointer to dev_flow structure.
464  * @param[in] item
465  *   Item specification.
466  * @param[in] item_flags
467  *   Parsed item flags.
468  */
469 static void
470 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
471                               const struct rte_flow_item *item,
472                               uint64_t item_flags)
473 {
474         const struct rte_flow_item_eth *spec = item->spec;
475         const struct rte_flow_item_eth *mask = item->mask;
476         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
477         struct ibv_flow_spec_eth eth = {
478                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
479                 .size = size,
480         };
481
482         if (!mask)
483                 mask = &rte_flow_item_eth_mask;
484         if (spec) {
485                 unsigned int i;
486
487                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes,
488                         RTE_ETHER_ADDR_LEN);
489                 memcpy(&eth.val.src_mac, spec->src.addr_bytes,
490                         RTE_ETHER_ADDR_LEN);
491                 eth.val.ether_type = spec->type;
492                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes,
493                         RTE_ETHER_ADDR_LEN);
494                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes,
495                         RTE_ETHER_ADDR_LEN);
496                 eth.mask.ether_type = mask->type;
497                 /* Remove unwanted bits from values. */
498                 for (i = 0; i < RTE_ETHER_ADDR_LEN; ++i) {
499                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
500                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
501                 }
502                 eth.val.ether_type &= eth.mask.ether_type;
503         }
504         flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
505 }
506
507 /**
508  * Update the VLAN tag in the Verbs Ethernet specification.
509  * This function assumes that the input is valid and there is space to add
510  * the requested item.
511  *
512  * @param[in, out] attr
513  *   Pointer to Verbs attributes structure.
514  * @param[in] eth
515  *   Verbs structure containing the VLAN information to copy.
516  */
517 static void
518 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
519                             struct ibv_flow_spec_eth *eth)
520 {
521         unsigned int i;
522         const enum ibv_flow_spec_type search = eth->type;
523         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
524                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
525
526         for (i = 0; i != attr->num_of_specs; ++i) {
527                 if (hdr->type == search) {
528                         struct ibv_flow_spec_eth *e =
529                                 (struct ibv_flow_spec_eth *)hdr;
530
531                         e->val.vlan_tag = eth->val.vlan_tag;
532                         e->mask.vlan_tag = eth->mask.vlan_tag;
533                         e->val.ether_type = eth->val.ether_type;
534                         e->mask.ether_type = eth->mask.ether_type;
535                         break;
536                 }
537                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
538         }
539 }
540
541 /**
542  * Convert the @p item into a Verbs specification. This function assumes that
543  * the input is valid and that there is space to insert the requested item
544  * into the flow.
545  *
546  * @param[in, out] dev_flow
547  *   Pointer to dev_flow structure.
548  * @param[in] item
549  *   Item specification.
550  * @param[in] item_flags
551  *   Parsed item flags.
552  */
553 static void
554 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
555                                const struct rte_flow_item *item,
556                                uint64_t item_flags)
557 {
558         const struct rte_flow_item_vlan *spec = item->spec;
559         const struct rte_flow_item_vlan *mask = item->mask;
560         unsigned int size = sizeof(struct ibv_flow_spec_eth);
561         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
562         struct ibv_flow_spec_eth eth = {
563                 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
564                 .size = size,
565         };
566         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
567                                       MLX5_FLOW_LAYER_OUTER_L2;
568
569         if (!mask)
570                 mask = &rte_flow_item_vlan_mask;
571         if (spec) {
572                 eth.val.vlan_tag = spec->tci;
573                 eth.mask.vlan_tag = mask->tci;
574                 eth.val.vlan_tag &= eth.mask.vlan_tag;
575                 eth.val.ether_type = spec->inner_type;
576                 eth.mask.ether_type = mask->inner_type;
577                 eth.val.ether_type &= eth.mask.ether_type;
578         }
579         if (!(item_flags & l2m))
580                 flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
581         else
582                 flow_verbs_item_vlan_update(&dev_flow->verbs.attr, &eth);
583         if (!tunnel)
584                 dev_flow->handle->vf_vlan.tag =
585                         rte_be_to_cpu_16(spec->tci) & 0x0fff;
586 }
587
588 /**
589  * Convert the @p item into a Verbs specification. This function assumes that
590  * the input is valid and that there is space to insert the requested item
591  * into the flow.
592  *
593  * @param[in, out] dev_flow
594  *   Pointer to dev_flow structure.
595  * @param[in] item
596  *   Item specification.
597  * @param[in] item_flags
598  *   Parsed item flags.
599  */
600 static void
601 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
602                                const struct rte_flow_item *item,
603                                uint64_t item_flags)
604 {
605         const struct rte_flow_item_ipv4 *spec = item->spec;
606         const struct rte_flow_item_ipv4 *mask = item->mask;
607         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
608         struct ibv_flow_spec_ipv4_ext ipv4 = {
609                 .type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
610                 .size = size,
611         };
612
613         if (!mask)
614                 mask = &rte_flow_item_ipv4_mask;
615         if (spec) {
616                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
617                         .src_ip = spec->hdr.src_addr,
618                         .dst_ip = spec->hdr.dst_addr,
619                         .proto = spec->hdr.next_proto_id,
620                         .tos = spec->hdr.type_of_service,
621                 };
622                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
623                         .src_ip = mask->hdr.src_addr,
624                         .dst_ip = mask->hdr.dst_addr,
625                         .proto = mask->hdr.next_proto_id,
626                         .tos = mask->hdr.type_of_service,
627                 };
628                 /* Remove unwanted bits from values. */
629                 ipv4.val.src_ip &= ipv4.mask.src_ip;
630                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
631                 ipv4.val.proto &= ipv4.mask.proto;
632                 ipv4.val.tos &= ipv4.mask.tos;
633         }
634         flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
635 }
636
637 /**
638  * Convert the @p item into a Verbs specification. This function assumes that
639  * the input is valid and that there is space to insert the requested item
640  * into the flow.
641  *
642  * @param[in, out] dev_flow
643  *   Pointer to dev_flow structure.
644  * @param[in] item
645  *   Item specification.
646  * @param[in] item_flags
647  *   Parsed item flags.
648  */
649 static void
650 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
651                                const struct rte_flow_item *item,
652                                uint64_t item_flags)
653 {
654         const struct rte_flow_item_ipv6 *spec = item->spec;
655         const struct rte_flow_item_ipv6 *mask = item->mask;
656         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
657         struct ibv_flow_spec_ipv6 ipv6 = {
658                 .type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
659                 .size = size,
660         };
661
662         if (!mask)
663                 mask = &rte_flow_item_ipv6_mask;
664         if (spec) {
665                 unsigned int i;
666                 uint32_t vtc_flow_val;
667                 uint32_t vtc_flow_mask;
668
669                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
670                        RTE_DIM(ipv6.val.src_ip));
671                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
672                        RTE_DIM(ipv6.val.dst_ip));
673                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
674                        RTE_DIM(ipv6.mask.src_ip));
675                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
676                        RTE_DIM(ipv6.mask.dst_ip));
677                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
678                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
679                 ipv6.val.flow_label =
680                         rte_cpu_to_be_32((vtc_flow_val & RTE_IPV6_HDR_FL_MASK) >>
681                                          RTE_IPV6_HDR_FL_SHIFT);
682                 ipv6.val.traffic_class = (vtc_flow_val & RTE_IPV6_HDR_TC_MASK) >>
683                                          RTE_IPV6_HDR_TC_SHIFT;
684                 ipv6.val.next_hdr = spec->hdr.proto;
685                 ipv6.mask.flow_label =
686                         rte_cpu_to_be_32((vtc_flow_mask & RTE_IPV6_HDR_FL_MASK) >>
687                                          RTE_IPV6_HDR_FL_SHIFT);
688                 ipv6.mask.traffic_class = (vtc_flow_mask & RTE_IPV6_HDR_TC_MASK) >>
689                                           RTE_IPV6_HDR_TC_SHIFT;
690                 ipv6.mask.next_hdr = mask->hdr.proto;
691                 /* Remove unwanted bits from values. */
692                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
693                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
694                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
695                 }
696                 ipv6.val.flow_label &= ipv6.mask.flow_label;
697                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
698                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
699         }
700         flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
701 }
702
703 /**
704  * Convert the @p item into a Verbs specification. This function assumes that
705  * the input is valid and that there is space to insert the requested item
706  * into the flow.
707  *
708  * @param[in, out] dev_flow
709  *   Pointer to dev_flow structure.
710  * @param[in] item
711  *   Item specification.
712  * @param[in] item_flags
713  *   Parsed item flags.
714  */
715 static void
716 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
717                               const struct rte_flow_item *item,
718                               uint64_t item_flags __rte_unused)
719 {
720         const struct rte_flow_item_tcp *spec = item->spec;
721         const struct rte_flow_item_tcp *mask = item->mask;
722         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
723         struct ibv_flow_spec_tcp_udp tcp = {
724                 .type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
725                 .size = size,
726         };
727
728         if (!mask)
729                 mask = &rte_flow_item_tcp_mask;
730         if (spec) {
731                 tcp.val.dst_port = spec->hdr.dst_port;
732                 tcp.val.src_port = spec->hdr.src_port;
733                 tcp.mask.dst_port = mask->hdr.dst_port;
734                 tcp.mask.src_port = mask->hdr.src_port;
735                 /* Remove unwanted bits from values. */
736                 tcp.val.src_port &= tcp.mask.src_port;
737                 tcp.val.dst_port &= tcp.mask.dst_port;
738         }
739         flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
740 }
741
742 /**
743  * Convert the @p item into a Verbs specification. This function assumes that
744  * the input is valid and that there is space to insert the requested item
745  * into the flow.
746  *
747  * @param[in, out] dev_flow
748  *   Pointer to dev_flow structure.
749  * @param[in] item
750  *   Item specification.
751  * @param[in] item_flags
752  *   Parsed item flags.
753  */
754 static void
755 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
756                               const struct rte_flow_item *item,
757                               uint64_t item_flags __rte_unused)
758 {
759         const struct rte_flow_item_udp *spec = item->spec;
760         const struct rte_flow_item_udp *mask = item->mask;
761         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
762         struct ibv_flow_spec_tcp_udp udp = {
763                 .type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
764                 .size = size,
765         };
766
767         if (!mask)
768                 mask = &rte_flow_item_udp_mask;
769         if (spec) {
770                 udp.val.dst_port = spec->hdr.dst_port;
771                 udp.val.src_port = spec->hdr.src_port;
772                 udp.mask.dst_port = mask->hdr.dst_port;
773                 udp.mask.src_port = mask->hdr.src_port;
774                 /* Remove unwanted bits from values. */
775                 udp.val.src_port &= udp.mask.src_port;
776                 udp.val.dst_port &= udp.mask.dst_port;
777         }
778         item++;
779         while (item->type == RTE_FLOW_ITEM_TYPE_VOID)
780                 item++;
781         if (!(udp.val.dst_port & udp.mask.dst_port)) {
782                 switch ((item)->type) {
783                 case RTE_FLOW_ITEM_TYPE_VXLAN:
784                         udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN);
785                         udp.mask.dst_port = 0xffff;
786                         break;
787                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
788                         udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN_GPE);
789                         udp.mask.dst_port = 0xffff;
790                         break;
791                 case RTE_FLOW_ITEM_TYPE_MPLS:
792                         udp.val.dst_port = htons(MLX5_UDP_PORT_MPLS);
793                         udp.mask.dst_port = 0xffff;
794                         break;
795                 default:
796                         break;
797                 }
798         }
799
800         flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
801 }
802
803 /**
804  * Convert the @p item into a Verbs specification. This function assumes that
805  * the input is valid and that there is space to insert the requested item
806  * into the flow.
807  *
808  * @param[in, out] dev_flow
809  *   Pointer to dev_flow structure.
810  * @param[in] item
811  *   Item specification.
812  * @param[in] item_flags
813  *   Parsed item flags.
814  */
815 static void
816 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
817                                 const struct rte_flow_item *item,
818                                 uint64_t item_flags __rte_unused)
819 {
820         const struct rte_flow_item_vxlan *spec = item->spec;
821         const struct rte_flow_item_vxlan *mask = item->mask;
822         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
823         struct ibv_flow_spec_tunnel vxlan = {
824                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
825                 .size = size,
826         };
827         union vni {
828                 uint32_t vlan_id;
829                 uint8_t vni[4];
830         } id = { .vlan_id = 0, };
831
832         if (!mask)
833                 mask = &rte_flow_item_vxlan_mask;
834         if (spec) {
835                 memcpy(&id.vni[1], spec->vni, 3);
836                 vxlan.val.tunnel_id = id.vlan_id;
837                 memcpy(&id.vni[1], mask->vni, 3);
838                 vxlan.mask.tunnel_id = id.vlan_id;
839                 /* Remove unwanted bits from values. */
840                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
841         }
842         flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
843 }
844
845 /**
846  * Convert the @p item into a Verbs specification. This function assumes that
847  * the input is valid and that there is space to insert the requested item
848  * into the flow.
849  *
850  * @param[in, out] dev_flow
851  *   Pointer to dev_flow structure.
852  * @param[in] item
853  *   Item specification.
854  * @param[in] item_flags
855  *   Parsed item flags.
856  */
857 static void
858 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
859                                     const struct rte_flow_item *item,
860                                     uint64_t item_flags __rte_unused)
861 {
862         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
863         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
864         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
865         struct ibv_flow_spec_tunnel vxlan_gpe = {
866                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
867                 .size = size,
868         };
869         union vni {
870                 uint32_t vlan_id;
871                 uint8_t vni[4];
872         } id = { .vlan_id = 0, };
873
874         if (!mask)
875                 mask = &rte_flow_item_vxlan_gpe_mask;
876         if (spec) {
877                 memcpy(&id.vni[1], spec->vni, 3);
878                 vxlan_gpe.val.tunnel_id = id.vlan_id;
879                 memcpy(&id.vni[1], mask->vni, 3);
880                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
881                 /* Remove unwanted bits from values. */
882                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
883         }
884         flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
885 }
886
887 /**
888  * Update the protocol in Verbs IPv4/IPv6 spec.
889  *
890  * @param[in, out] attr
891  *   Pointer to Verbs attributes structure.
892  * @param[in] search
893  *   Specification type to search in order to update the IP protocol.
894  * @param[in] protocol
895  *   Protocol value to set if none is present in the specification.
896  */
897 static void
898 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
899                                        enum ibv_flow_spec_type search,
900                                        uint8_t protocol)
901 {
902         unsigned int i;
903         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
904                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
905
906         if (!attr)
907                 return;
908         for (i = 0; i != attr->num_of_specs; ++i) {
909                 if (hdr->type == search) {
910                         union {
911                                 struct ibv_flow_spec_ipv4_ext *ipv4;
912                                 struct ibv_flow_spec_ipv6 *ipv6;
913                         } ip;
914
915                         switch (search) {
916                         case IBV_FLOW_SPEC_IPV4_EXT:
917                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
918                                 if (!ip.ipv4->val.proto) {
919                                         ip.ipv4->val.proto = protocol;
920                                         ip.ipv4->mask.proto = 0xff;
921                                 }
922                                 break;
923                         case IBV_FLOW_SPEC_IPV6:
924                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
925                                 if (!ip.ipv6->val.next_hdr) {
926                                         ip.ipv6->val.next_hdr = protocol;
927                                         ip.ipv6->mask.next_hdr = 0xff;
928                                 }
929                                 break;
930                         default:
931                                 break;
932                         }
933                         break;
934                 }
935                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
936         }
937 }
938
939 /**
940  * Convert the @p item into a Verbs specification. This function assumes that
941  * the input is valid and that there is space to insert the requested item
942  * into the flow.
943  *
944  * @param[in, out] dev_flow
945  *   Pointer to dev_flow structure.
946  * @param[in] item
947  *   Item specification.
948  * @param[in] item_flags
949  *   Parsed item flags.
950  */
951 static void
952 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
953                               const struct rte_flow_item *item __rte_unused,
954                               uint64_t item_flags)
955 {
956         struct mlx5_flow_verbs_workspace *verbs = &dev_flow->verbs;
957 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
958         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
959         struct ibv_flow_spec_tunnel tunnel = {
960                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
961                 .size = size,
962         };
963 #else
964         const struct rte_flow_item_gre *spec = item->spec;
965         const struct rte_flow_item_gre *mask = item->mask;
966         unsigned int size = sizeof(struct ibv_flow_spec_gre);
967         struct ibv_flow_spec_gre tunnel = {
968                 .type = IBV_FLOW_SPEC_GRE,
969                 .size = size,
970         };
971
972         if (!mask)
973                 mask = &rte_flow_item_gre_mask;
974         if (spec) {
975                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
976                 tunnel.val.protocol = spec->protocol;
977                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
978                 tunnel.mask.protocol = mask->protocol;
979                 /* Remove unwanted bits from values. */
980                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
981                 tunnel.val.protocol &= tunnel.mask.protocol;
982                 tunnel.val.key &= tunnel.mask.key;
983         }
984 #endif
985         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
986                 flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
987                                                        IBV_FLOW_SPEC_IPV4_EXT,
988                                                        IPPROTO_GRE);
989         else
990                 flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
991                                                        IBV_FLOW_SPEC_IPV6,
992                                                        IPPROTO_GRE);
993         flow_verbs_spec_add(verbs, &tunnel, size);
994 }
995
996 /**
997  * Convert the @p action into a Verbs specification. This function assumes that
998  * the input is valid and that there is space to insert the requested action
999  * into the flow. This function also return the action that was added.
1000  *
1001  * @param[in, out] dev_flow
1002  *   Pointer to dev_flow structure.
1003  * @param[in] item
1004  *   Item specification.
1005  * @param[in] item_flags
1006  *   Parsed item flags.
1007  */
1008 static void
1009 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
1010                                const struct rte_flow_item *item __rte_unused,
1011                                uint64_t item_flags __rte_unused)
1012 {
1013 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1014         const struct rte_flow_item_mpls *spec = item->spec;
1015         const struct rte_flow_item_mpls *mask = item->mask;
1016         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
1017         struct ibv_flow_spec_mpls mpls = {
1018                 .type = IBV_FLOW_SPEC_MPLS,
1019                 .size = size,
1020         };
1021
1022         if (!mask)
1023                 mask = &rte_flow_item_mpls_mask;
1024         if (spec) {
1025                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
1026                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
1027                 /* Remove unwanted bits from values.  */
1028                 mpls.val.label &= mpls.mask.label;
1029         }
1030         flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
1031 #endif
1032 }
1033
1034 /**
1035  * Convert the @p action into a Verbs specification. This function assumes that
1036  * the input is valid and that there is space to insert the requested action
1037  * into the flow.
1038  *
1039  * @param[in] dev_flow
1040  *   Pointer to mlx5_flow.
1041  * @param[in] action
1042  *   Action configuration.
1043  */
1044 static void
1045 flow_verbs_translate_action_drop
1046         (struct mlx5_flow *dev_flow,
1047          const struct rte_flow_action *action __rte_unused)
1048 {
1049         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1050         struct ibv_flow_spec_action_drop drop = {
1051                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1052                         .size = size,
1053         };
1054
1055         flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
1056 }
1057
1058 /**
1059  * Convert the @p action into a Verbs specification. This function assumes that
1060  * the input is valid and that there is space to insert the requested action
1061  * into the flow.
1062  *
1063  * @param[in] rss_desc
1064  *   Pointer to mlx5_flow_rss_desc.
1065  * @param[in] action
1066  *   Action configuration.
1067  */
1068 static void
1069 flow_verbs_translate_action_queue(struct mlx5_flow_rss_desc *rss_desc,
1070                                   const struct rte_flow_action *action)
1071 {
1072         const struct rte_flow_action_queue *queue = action->conf;
1073
1074         rss_desc->queue[0] = queue->index;
1075         rss_desc->queue_num = 1;
1076 }
1077
1078 /**
1079  * Convert the @p action into a Verbs specification. This function assumes that
1080  * the input is valid and that there is space to insert the requested action
1081  * into the flow.
1082  *
1083  * @param[in] rss_desc
1084  *   Pointer to mlx5_flow_rss_desc.
1085  * @param[in] action
1086  *   Action configuration.
1087  */
1088 static void
1089 flow_verbs_translate_action_rss(struct mlx5_flow_rss_desc *rss_desc,
1090                                 const struct rte_flow_action *action)
1091 {
1092         const struct rte_flow_action_rss *rss = action->conf;
1093         const uint8_t *rss_key;
1094
1095         memcpy(rss_desc->queue, rss->queue, rss->queue_num * sizeof(uint16_t));
1096         rss_desc->queue_num = rss->queue_num;
1097         /* NULL RSS key indicates default RSS key. */
1098         rss_key = !rss->key ? rss_hash_default_key : rss->key;
1099         memcpy(rss_desc->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
1100         /*
1101          * rss->level and rss.types should be set in advance when expanding
1102          * items for RSS.
1103          */
1104 }
1105
1106 /**
1107  * Convert the @p action into a Verbs specification. This function assumes that
1108  * the input is valid and that there is space to insert the requested action
1109  * into the flow.
1110  *
1111  * @param[in] dev_flow
1112  *   Pointer to mlx5_flow.
1113  * @param[in] action
1114  *   Action configuration.
1115  */
1116 static void
1117 flow_verbs_translate_action_flag
1118         (struct mlx5_flow *dev_flow,
1119          const struct rte_flow_action *action __rte_unused)
1120 {
1121         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1122         struct ibv_flow_spec_action_tag tag = {
1123                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1124                 .size = size,
1125                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1126         };
1127
1128         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1129 }
1130
1131 /**
1132  * Convert the @p action into a Verbs specification. This function assumes that
1133  * the input is valid and that there is space to insert the requested action
1134  * into the flow.
1135  *
1136  * @param[in] dev_flow
1137  *   Pointer to mlx5_flow.
1138  * @param[in] action
1139  *   Action configuration.
1140  */
1141 static void
1142 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
1143                                  const struct rte_flow_action *action)
1144 {
1145         const struct rte_flow_action_mark *mark = action->conf;
1146         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1147         struct ibv_flow_spec_action_tag tag = {
1148                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1149                 .size = size,
1150                 .tag_id = mlx5_flow_mark_set(mark->id),
1151         };
1152
1153         flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1154 }
1155
1156 /**
1157  * Convert the @p action into a Verbs specification. This function assumes that
1158  * the input is valid and that there is space to insert the requested action
1159  * into the flow.
1160  *
1161  * @param[in] dev
1162  *   Pointer to the Ethernet device structure.
1163  * @param[in] action
1164  *   Action configuration.
1165  * @param[in] dev_flow
1166  *   Pointer to mlx5_flow.
1167  * @param[out] error
1168  *   Pointer to error structure.
1169  *
1170  * @return
1171  *   0 On success else a negative errno value is returned and rte_errno is set.
1172  */
1173 static int
1174 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
1175                                   const struct rte_flow_action *action,
1176                                   struct rte_eth_dev *dev,
1177                                   struct rte_flow_error *error)
1178 {
1179         const struct rte_flow_action_count *count = action->conf;
1180         struct rte_flow *flow = dev_flow->flow;
1181 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1182         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1183         struct mlx5_flow_counter_pool *pool;
1184         struct mlx5_flow_counter *cnt = NULL;
1185         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1186         struct ibv_flow_spec_counter_action counter = {
1187                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1188                 .size = size,
1189         };
1190 #endif
1191
1192         if (!flow->counter) {
1193                 flow->counter = flow_verbs_counter_new(dev, count->shared,
1194                                                        count->id);
1195                 if (!flow->counter)
1196                         return rte_flow_error_set(error, rte_errno,
1197                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1198                                                   action,
1199                                                   "cannot get counter"
1200                                                   " context.");
1201         }
1202 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
1203         cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1204         counter.counter_set_handle =
1205                 ((struct ibv_counter_set *)cnt->dcs_when_active)->handle;
1206         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1207 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1208         cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1209         counter.counters = (struct ibv_counters *)cnt->dcs_when_active;
1210         flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1211 #endif
1212         return 0;
1213 }
1214
1215 /**
1216  * Internal validation function. For validating both actions and items.
1217  *
1218  * @param[in] dev
1219  *   Pointer to the Ethernet device structure.
1220  * @param[in] attr
1221  *   Pointer to the flow attributes.
1222  * @param[in] items
1223  *   Pointer to the list of items.
1224  * @param[in] actions
1225  *   Pointer to the list of actions.
1226  * @param[in] external
1227  *   This flow rule is created by request external to PMD.
1228  * @param[in] hairpin
1229  *   Number of hairpin TX actions, 0 means classic flow.
1230  * @param[out] error
1231  *   Pointer to the error structure.
1232  *
1233  * @return
1234  *   0 on success, a negative errno value otherwise and rte_errno is set.
1235  */
1236 static int
1237 flow_verbs_validate(struct rte_eth_dev *dev,
1238                     const struct rte_flow_attr *attr,
1239                     const struct rte_flow_item items[],
1240                     const struct rte_flow_action actions[],
1241                     bool external __rte_unused,
1242                     int hairpin __rte_unused,
1243                     struct rte_flow_error *error)
1244 {
1245         int ret;
1246         uint64_t action_flags = 0;
1247         uint64_t item_flags = 0;
1248         uint64_t last_item = 0;
1249         uint8_t next_protocol = 0xff;
1250         uint16_t ether_type = 0;
1251         char errstr[32];
1252
1253         if (items == NULL)
1254                 return -1;
1255         ret = mlx5_flow_validate_attributes(dev, attr, error);
1256         if (ret < 0)
1257                 return ret;
1258         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1259                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1260                 int ret = 0;
1261
1262                 switch (items->type) {
1263                 case RTE_FLOW_ITEM_TYPE_VOID:
1264                         break;
1265                 case RTE_FLOW_ITEM_TYPE_ETH:
1266                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1267                                                           false, error);
1268                         if (ret < 0)
1269                                 return ret;
1270                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1271                                              MLX5_FLOW_LAYER_OUTER_L2;
1272                         if (items->mask != NULL && items->spec != NULL) {
1273                                 ether_type =
1274                                         ((const struct rte_flow_item_eth *)
1275                                          items->spec)->type;
1276                                 ether_type &=
1277                                         ((const struct rte_flow_item_eth *)
1278                                          items->mask)->type;
1279                                 ether_type = rte_be_to_cpu_16(ether_type);
1280                         } else {
1281                                 ether_type = 0;
1282                         }
1283                         break;
1284                 case RTE_FLOW_ITEM_TYPE_VLAN:
1285                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1286                                                            dev, error);
1287                         if (ret < 0)
1288                                 return ret;
1289                         last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1290                                               MLX5_FLOW_LAYER_INNER_VLAN) :
1291                                              (MLX5_FLOW_LAYER_OUTER_L2 |
1292                                               MLX5_FLOW_LAYER_OUTER_VLAN);
1293                         if (items->mask != NULL && items->spec != NULL) {
1294                                 ether_type =
1295                                         ((const struct rte_flow_item_vlan *)
1296                                          items->spec)->inner_type;
1297                                 ether_type &=
1298                                         ((const struct rte_flow_item_vlan *)
1299                                          items->mask)->inner_type;
1300                                 ether_type = rte_be_to_cpu_16(ether_type);
1301                         } else {
1302                                 ether_type = 0;
1303                         }
1304                         break;
1305                 case RTE_FLOW_ITEM_TYPE_IPV4:
1306                         ret = mlx5_flow_validate_item_ipv4
1307                                                 (items, item_flags,
1308                                                  last_item, ether_type, NULL,
1309                                                  MLX5_ITEM_RANGE_NOT_ACCEPTED,
1310                                                  error);
1311                         if (ret < 0)
1312                                 return ret;
1313                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1314                                              MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1315                         if (items->mask != NULL &&
1316                             ((const struct rte_flow_item_ipv4 *)
1317                              items->mask)->hdr.next_proto_id) {
1318                                 next_protocol =
1319                                         ((const struct rte_flow_item_ipv4 *)
1320                                          (items->spec))->hdr.next_proto_id;
1321                                 next_protocol &=
1322                                         ((const struct rte_flow_item_ipv4 *)
1323                                          (items->mask))->hdr.next_proto_id;
1324                         } else {
1325                                 /* Reset for inner layer. */
1326                                 next_protocol = 0xff;
1327                         }
1328                         break;
1329                 case RTE_FLOW_ITEM_TYPE_IPV6:
1330                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1331                                                            last_item,
1332                                                            ether_type, NULL,
1333                                                            error);
1334                         if (ret < 0)
1335                                 return ret;
1336                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1337                                              MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1338                         if (items->mask != NULL &&
1339                             ((const struct rte_flow_item_ipv6 *)
1340                              items->mask)->hdr.proto) {
1341                                 next_protocol =
1342                                         ((const struct rte_flow_item_ipv6 *)
1343                                          items->spec)->hdr.proto;
1344                                 next_protocol &=
1345                                         ((const struct rte_flow_item_ipv6 *)
1346                                          items->mask)->hdr.proto;
1347                         } else {
1348                                 /* Reset for inner layer. */
1349                                 next_protocol = 0xff;
1350                         }
1351                         break;
1352                 case RTE_FLOW_ITEM_TYPE_UDP:
1353                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1354                                                           next_protocol,
1355                                                           error);
1356                         if (ret < 0)
1357                                 return ret;
1358                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1359                                              MLX5_FLOW_LAYER_OUTER_L4_UDP;
1360                         break;
1361                 case RTE_FLOW_ITEM_TYPE_TCP:
1362                         ret = mlx5_flow_validate_item_tcp
1363                                                 (items, item_flags,
1364                                                  next_protocol,
1365                                                  &rte_flow_item_tcp_mask,
1366                                                  error);
1367                         if (ret < 0)
1368                                 return ret;
1369                         last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1370                                              MLX5_FLOW_LAYER_OUTER_L4_TCP;
1371                         break;
1372                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1373                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1374                                                             error);
1375                         if (ret < 0)
1376                                 return ret;
1377                         last_item = MLX5_FLOW_LAYER_VXLAN;
1378                         break;
1379                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1380                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
1381                                                                 item_flags,
1382                                                                 dev, error);
1383                         if (ret < 0)
1384                                 return ret;
1385                         last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1386                         break;
1387                 case RTE_FLOW_ITEM_TYPE_GRE:
1388                         ret = mlx5_flow_validate_item_gre(items, item_flags,
1389                                                           next_protocol, error);
1390                         if (ret < 0)
1391                                 return ret;
1392                         last_item = MLX5_FLOW_LAYER_GRE;
1393                         break;
1394                 case RTE_FLOW_ITEM_TYPE_MPLS:
1395                         ret = mlx5_flow_validate_item_mpls(dev, items,
1396                                                            item_flags,
1397                                                            last_item, error);
1398                         if (ret < 0)
1399                                 return ret;
1400                         last_item = MLX5_FLOW_LAYER_MPLS;
1401                         break;
1402                 default:
1403                         snprintf(errstr, sizeof(errstr), "item type %d not supported",
1404                                  items->type);
1405                         return rte_flow_error_set(error, ENOTSUP,
1406                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1407                                                   NULL, errstr);
1408                 }
1409                 item_flags |= last_item;
1410         }
1411         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1412                 switch (actions->type) {
1413                 case RTE_FLOW_ACTION_TYPE_VOID:
1414                         break;
1415                 case RTE_FLOW_ACTION_TYPE_FLAG:
1416                         ret = mlx5_flow_validate_action_flag(action_flags,
1417                                                              attr,
1418                                                              error);
1419                         if (ret < 0)
1420                                 return ret;
1421                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1422                         break;
1423                 case RTE_FLOW_ACTION_TYPE_MARK:
1424                         ret = mlx5_flow_validate_action_mark(actions,
1425                                                              action_flags,
1426                                                              attr,
1427                                                              error);
1428                         if (ret < 0)
1429                                 return ret;
1430                         action_flags |= MLX5_FLOW_ACTION_MARK;
1431                         break;
1432                 case RTE_FLOW_ACTION_TYPE_DROP:
1433                         ret = mlx5_flow_validate_action_drop(action_flags,
1434                                                              attr,
1435                                                              error);
1436                         if (ret < 0)
1437                                 return ret;
1438                         action_flags |= MLX5_FLOW_ACTION_DROP;
1439                         break;
1440                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1441                         ret = mlx5_flow_validate_action_queue(actions,
1442                                                               action_flags, dev,
1443                                                               attr,
1444                                                               error);
1445                         if (ret < 0)
1446                                 return ret;
1447                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1448                         break;
1449                 case RTE_FLOW_ACTION_TYPE_RSS:
1450                         ret = mlx5_flow_validate_action_rss(actions,
1451                                                             action_flags, dev,
1452                                                             attr, item_flags,
1453                                                             error);
1454                         if (ret < 0)
1455                                 return ret;
1456                         action_flags |= MLX5_FLOW_ACTION_RSS;
1457                         break;
1458                 case RTE_FLOW_ACTION_TYPE_COUNT:
1459                         ret = mlx5_flow_validate_action_count(dev, attr, error);
1460                         if (ret < 0)
1461                                 return ret;
1462                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1463                         break;
1464                 default:
1465                         return rte_flow_error_set(error, ENOTSUP,
1466                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1467                                                   actions,
1468                                                   "action not supported");
1469                 }
1470         }
1471         /*
1472          * Validate the drop action mutual exclusion with other actions.
1473          * Drop action is mutually-exclusive with any other action, except for
1474          * Count action.
1475          */
1476         if ((action_flags & MLX5_FLOW_ACTION_DROP) &&
1477             (action_flags & ~(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_COUNT)))
1478                 return rte_flow_error_set(error, EINVAL,
1479                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1480                                           "Drop action is mutually-exclusive "
1481                                           "with any other action, except for "
1482                                           "Count action");
1483         if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1484                 return rte_flow_error_set(error, EINVAL,
1485                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1486                                           "no fate action is found");
1487         return 0;
1488 }
1489
1490 /**
1491  * Calculate the required bytes that are needed for the action part of the verbs
1492  * flow.
1493  *
1494  * @param[in] actions
1495  *   Pointer to the list of actions.
1496  *
1497  * @return
1498  *   The size of the memory needed for all actions.
1499  */
1500 static int
1501 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1502 {
1503         int size = 0;
1504
1505         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1506                 switch (actions->type) {
1507                 case RTE_FLOW_ACTION_TYPE_VOID:
1508                         break;
1509                 case RTE_FLOW_ACTION_TYPE_FLAG:
1510                         size += sizeof(struct ibv_flow_spec_action_tag);
1511                         break;
1512                 case RTE_FLOW_ACTION_TYPE_MARK:
1513                         size += sizeof(struct ibv_flow_spec_action_tag);
1514                         break;
1515                 case RTE_FLOW_ACTION_TYPE_DROP:
1516                         size += sizeof(struct ibv_flow_spec_action_drop);
1517                         break;
1518                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1519                         break;
1520                 case RTE_FLOW_ACTION_TYPE_RSS:
1521                         break;
1522                 case RTE_FLOW_ACTION_TYPE_COUNT:
1523 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1524         defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1525                         size += sizeof(struct ibv_flow_spec_counter_action);
1526 #endif
1527                         break;
1528                 default:
1529                         break;
1530                 }
1531         }
1532         return size;
1533 }
1534
1535 /**
1536  * Calculate the required bytes that are needed for the item part of the verbs
1537  * flow.
1538  *
1539  * @param[in] items
1540  *   Pointer to the list of items.
1541  *
1542  * @return
1543  *   The size of the memory needed for all items.
1544  */
1545 static int
1546 flow_verbs_get_items_size(const struct rte_flow_item items[])
1547 {
1548         int size = 0;
1549
1550         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1551                 switch (items->type) {
1552                 case RTE_FLOW_ITEM_TYPE_VOID:
1553                         break;
1554                 case RTE_FLOW_ITEM_TYPE_ETH:
1555                         size += sizeof(struct ibv_flow_spec_eth);
1556                         break;
1557                 case RTE_FLOW_ITEM_TYPE_VLAN:
1558                         size += sizeof(struct ibv_flow_spec_eth);
1559                         break;
1560                 case RTE_FLOW_ITEM_TYPE_IPV4:
1561                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
1562                         break;
1563                 case RTE_FLOW_ITEM_TYPE_IPV6:
1564                         size += sizeof(struct ibv_flow_spec_ipv6);
1565                         break;
1566                 case RTE_FLOW_ITEM_TYPE_UDP:
1567                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1568                         break;
1569                 case RTE_FLOW_ITEM_TYPE_TCP:
1570                         size += sizeof(struct ibv_flow_spec_tcp_udp);
1571                         break;
1572                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1573                         size += sizeof(struct ibv_flow_spec_tunnel);
1574                         break;
1575                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1576                         size += sizeof(struct ibv_flow_spec_tunnel);
1577                         break;
1578 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1579                 case RTE_FLOW_ITEM_TYPE_GRE:
1580                         size += sizeof(struct ibv_flow_spec_gre);
1581                         break;
1582                 case RTE_FLOW_ITEM_TYPE_MPLS:
1583                         size += sizeof(struct ibv_flow_spec_mpls);
1584                         break;
1585 #else
1586                 case RTE_FLOW_ITEM_TYPE_GRE:
1587                         size += sizeof(struct ibv_flow_spec_tunnel);
1588                         break;
1589 #endif
1590                 default:
1591                         break;
1592                 }
1593         }
1594         return size;
1595 }
1596
1597 /**
1598  * Internal preparation function. Allocate mlx5_flow with the required size.
1599  * The required size is calculate based on the actions and items. This function
1600  * also returns the detected actions and items for later use.
1601  *
1602  * @param[in] dev
1603  *   Pointer to Ethernet device.
1604  * @param[in] attr
1605  *   Pointer to the flow attributes.
1606  * @param[in] items
1607  *   Pointer to the list of items.
1608  * @param[in] actions
1609  *   Pointer to the list of actions.
1610  * @param[out] error
1611  *   Pointer to the error structure.
1612  *
1613  * @return
1614  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1615  *   is set.
1616  */
1617 static struct mlx5_flow *
1618 flow_verbs_prepare(struct rte_eth_dev *dev,
1619                    const struct rte_flow_attr *attr __rte_unused,
1620                    const struct rte_flow_item items[],
1621                    const struct rte_flow_action actions[],
1622                    struct rte_flow_error *error)
1623 {
1624         size_t size = 0;
1625         uint32_t handle_idx = 0;
1626         struct mlx5_flow *dev_flow;
1627         struct mlx5_flow_handle *dev_handle;
1628         struct mlx5_priv *priv = dev->data->dev_private;
1629
1630         size += flow_verbs_get_actions_size(actions);
1631         size += flow_verbs_get_items_size(items);
1632         if (size > MLX5_VERBS_MAX_SPEC_ACT_SIZE) {
1633                 rte_flow_error_set(error, E2BIG,
1634                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1635                                    "Verbs spec/action size too large");
1636                 return NULL;
1637         }
1638         /* In case of corrupting the memory. */
1639         if (priv->flow_idx >= MLX5_NUM_MAX_DEV_FLOWS) {
1640                 rte_flow_error_set(error, ENOSPC,
1641                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1642                                    "not free temporary device flow");
1643                 return NULL;
1644         }
1645         dev_handle = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1646                                    &handle_idx);
1647         if (!dev_handle) {
1648                 rte_flow_error_set(error, ENOMEM,
1649                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1650                                    "not enough memory to create flow handle");
1651                 return NULL;
1652         }
1653         /* No multi-thread supporting. */
1654         dev_flow = &((struct mlx5_flow *)priv->inter_flows)[priv->flow_idx++];
1655         dev_flow->handle = dev_handle;
1656         dev_flow->handle_idx = handle_idx;
1657         /* Memcpy is used, only size needs to be cleared to 0. */
1658         dev_flow->verbs.size = 0;
1659         dev_flow->verbs.attr.num_of_specs = 0;
1660         dev_flow->ingress = attr->ingress;
1661         dev_flow->hash_fields = 0;
1662         /* Need to set transfer attribute: not supported in Verbs mode. */
1663         return dev_flow;
1664 }
1665
1666 /**
1667  * Fill the flow with verb spec.
1668  *
1669  * @param[in] dev
1670  *   Pointer to Ethernet device.
1671  * @param[in, out] dev_flow
1672  *   Pointer to the mlx5 flow.
1673  * @param[in] attr
1674  *   Pointer to the flow attributes.
1675  * @param[in] items
1676  *   Pointer to the list of items.
1677  * @param[in] actions
1678  *   Pointer to the list of actions.
1679  * @param[out] error
1680  *   Pointer to the error structure.
1681  *
1682  * @return
1683  *   0 on success, else a negative errno value otherwise and rte_errno is set.
1684  */
1685 static int
1686 flow_verbs_translate(struct rte_eth_dev *dev,
1687                      struct mlx5_flow *dev_flow,
1688                      const struct rte_flow_attr *attr,
1689                      const struct rte_flow_item items[],
1690                      const struct rte_flow_action actions[],
1691                      struct rte_flow_error *error)
1692 {
1693         uint64_t item_flags = 0;
1694         uint64_t action_flags = 0;
1695         uint64_t priority = attr->priority;
1696         uint32_t subpriority = 0;
1697         struct mlx5_priv *priv = dev->data->dev_private;
1698         struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
1699                                               priv->rss_desc)
1700                                               [!!priv->flow_nested_idx];
1701         char errstr[32];
1702
1703         if (priority == MLX5_FLOW_PRIO_RSVD)
1704                 priority = priv->config.flow_prio - 1;
1705         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1706                 int ret;
1707
1708                 switch (actions->type) {
1709                 case RTE_FLOW_ACTION_TYPE_VOID:
1710                         break;
1711                 case RTE_FLOW_ACTION_TYPE_FLAG:
1712                         flow_verbs_translate_action_flag(dev_flow, actions);
1713                         action_flags |= MLX5_FLOW_ACTION_FLAG;
1714                         dev_flow->handle->mark = 1;
1715                         break;
1716                 case RTE_FLOW_ACTION_TYPE_MARK:
1717                         flow_verbs_translate_action_mark(dev_flow, actions);
1718                         action_flags |= MLX5_FLOW_ACTION_MARK;
1719                         dev_flow->handle->mark = 1;
1720                         break;
1721                 case RTE_FLOW_ACTION_TYPE_DROP:
1722                         flow_verbs_translate_action_drop(dev_flow, actions);
1723                         action_flags |= MLX5_FLOW_ACTION_DROP;
1724                         dev_flow->handle->fate_action = MLX5_FLOW_FATE_DROP;
1725                         break;
1726                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1727                         flow_verbs_translate_action_queue(rss_desc, actions);
1728                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
1729                         dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1730                         break;
1731                 case RTE_FLOW_ACTION_TYPE_RSS:
1732                         flow_verbs_translate_action_rss(rss_desc, actions);
1733                         action_flags |= MLX5_FLOW_ACTION_RSS;
1734                         dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1735                         break;
1736                 case RTE_FLOW_ACTION_TYPE_COUNT:
1737                         ret = flow_verbs_translate_action_count(dev_flow,
1738                                                                 actions,
1739                                                                 dev, error);
1740                         if (ret < 0)
1741                                 return ret;
1742                         action_flags |= MLX5_FLOW_ACTION_COUNT;
1743                         break;
1744                 default:
1745                         return rte_flow_error_set(error, ENOTSUP,
1746                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1747                                                   actions,
1748                                                   "action not supported");
1749                 }
1750         }
1751         dev_flow->act_flags = action_flags;
1752         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1753                 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1754
1755                 switch (items->type) {
1756                 case RTE_FLOW_ITEM_TYPE_VOID:
1757                         break;
1758                 case RTE_FLOW_ITEM_TYPE_ETH:
1759                         flow_verbs_translate_item_eth(dev_flow, items,
1760                                                       item_flags);
1761                         subpriority = MLX5_PRIORITY_MAP_L2;
1762                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1763                                                MLX5_FLOW_LAYER_OUTER_L2;
1764                         break;
1765                 case RTE_FLOW_ITEM_TYPE_VLAN:
1766                         flow_verbs_translate_item_vlan(dev_flow, items,
1767                                                        item_flags);
1768                         subpriority = MLX5_PRIORITY_MAP_L2;
1769                         item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1770                                                 MLX5_FLOW_LAYER_INNER_VLAN) :
1771                                                (MLX5_FLOW_LAYER_OUTER_L2 |
1772                                                 MLX5_FLOW_LAYER_OUTER_VLAN);
1773                         break;
1774                 case RTE_FLOW_ITEM_TYPE_IPV4:
1775                         flow_verbs_translate_item_ipv4(dev_flow, items,
1776                                                        item_flags);
1777                         subpriority = MLX5_PRIORITY_MAP_L3;
1778                         dev_flow->hash_fields |=
1779                                 mlx5_flow_hashfields_adjust
1780                                         (rss_desc, tunnel,
1781                                          MLX5_IPV4_LAYER_TYPES,
1782                                          MLX5_IPV4_IBV_RX_HASH);
1783                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1784                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1785                         break;
1786                 case RTE_FLOW_ITEM_TYPE_IPV6:
1787                         flow_verbs_translate_item_ipv6(dev_flow, items,
1788                                                        item_flags);
1789                         subpriority = MLX5_PRIORITY_MAP_L3;
1790                         dev_flow->hash_fields |=
1791                                 mlx5_flow_hashfields_adjust
1792                                         (rss_desc, tunnel,
1793                                          MLX5_IPV6_LAYER_TYPES,
1794                                          MLX5_IPV6_IBV_RX_HASH);
1795                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1796                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1797                         break;
1798                 case RTE_FLOW_ITEM_TYPE_TCP:
1799                         flow_verbs_translate_item_tcp(dev_flow, items,
1800                                                       item_flags);
1801                         subpriority = MLX5_PRIORITY_MAP_L4;
1802                         dev_flow->hash_fields |=
1803                                 mlx5_flow_hashfields_adjust
1804                                         (rss_desc, tunnel, ETH_RSS_TCP,
1805                                          (IBV_RX_HASH_SRC_PORT_TCP |
1806                                           IBV_RX_HASH_DST_PORT_TCP));
1807                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1808                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
1809                         break;
1810                 case RTE_FLOW_ITEM_TYPE_UDP:
1811                         flow_verbs_translate_item_udp(dev_flow, items,
1812                                                       item_flags);
1813                         subpriority = MLX5_PRIORITY_MAP_L4;
1814                         dev_flow->hash_fields |=
1815                                 mlx5_flow_hashfields_adjust
1816                                         (rss_desc, tunnel, ETH_RSS_UDP,
1817                                          (IBV_RX_HASH_SRC_PORT_UDP |
1818                                           IBV_RX_HASH_DST_PORT_UDP));
1819                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1820                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
1821                         break;
1822                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1823                         flow_verbs_translate_item_vxlan(dev_flow, items,
1824                                                         item_flags);
1825                         subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1826                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
1827                         break;
1828                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1829                         flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1830                                                             item_flags);
1831                         subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1832                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1833                         break;
1834                 case RTE_FLOW_ITEM_TYPE_GRE:
1835                         flow_verbs_translate_item_gre(dev_flow, items,
1836                                                       item_flags);
1837                         subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1838                         item_flags |= MLX5_FLOW_LAYER_GRE;
1839                         break;
1840                 case RTE_FLOW_ITEM_TYPE_MPLS:
1841                         flow_verbs_translate_item_mpls(dev_flow, items,
1842                                                        item_flags);
1843                         subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1844                         item_flags |= MLX5_FLOW_LAYER_MPLS;
1845                         break;
1846                 default:
1847                         snprintf(errstr, sizeof(errstr), "item type %d not supported",
1848                                  items->type);
1849                         return rte_flow_error_set(error, ENOTSUP,
1850                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1851                                                   NULL, errstr);
1852                 }
1853         }
1854         dev_flow->handle->layers = item_flags;
1855         /* Other members of attr will be ignored. */
1856         dev_flow->verbs.attr.priority =
1857                 mlx5_flow_adjust_priority(dev, priority, subpriority);
1858         dev_flow->verbs.attr.port = (uint8_t)priv->dev_port;
1859         return 0;
1860 }
1861
1862 /**
1863  * Remove the flow from the NIC but keeps it in memory.
1864  *
1865  * @param[in] dev
1866  *   Pointer to the Ethernet device structure.
1867  * @param[in, out] flow
1868  *   Pointer to flow structure.
1869  */
1870 static void
1871 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1872 {
1873         struct mlx5_priv *priv = dev->data->dev_private;
1874         struct mlx5_flow_handle *handle;
1875         uint32_t handle_idx;
1876
1877         if (!flow)
1878                 return;
1879         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1880                        handle_idx, handle, next) {
1881                 if (handle->drv_flow) {
1882                         claim_zero(mlx5_glue->destroy_flow(handle->drv_flow));
1883                         handle->drv_flow = NULL;
1884                 }
1885                 /* hrxq is union, don't touch it only the flag is set. */
1886                 if (handle->rix_hrxq) {
1887                         if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1888                                 mlx5_drop_action_destroy(dev);
1889                                 handle->rix_hrxq = 0;
1890                         } else if (handle->fate_action ==
1891                                    MLX5_FLOW_FATE_QUEUE) {
1892                                 mlx5_hrxq_release(dev, handle->rix_hrxq);
1893                                 handle->rix_hrxq = 0;
1894                         }
1895                 }
1896                 if (handle->vf_vlan.tag && handle->vf_vlan.created)
1897                         mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
1898         }
1899 }
1900
1901 /**
1902  * Remove the flow from the NIC and the memory.
1903  *
1904  * @param[in] dev
1905  *   Pointer to the Ethernet device structure.
1906  * @param[in, out] flow
1907  *   Pointer to flow structure.
1908  */
1909 static void
1910 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1911 {
1912         struct mlx5_priv *priv = dev->data->dev_private;
1913         struct mlx5_flow_handle *handle;
1914
1915         if (!flow)
1916                 return;
1917         flow_verbs_remove(dev, flow);
1918         while (flow->dev_handles) {
1919                 uint32_t tmp_idx = flow->dev_handles;
1920
1921                 handle = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1922                                    tmp_idx);
1923                 if (!handle)
1924                         return;
1925                 flow->dev_handles = handle->next.next;
1926                 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1927                            tmp_idx);
1928         }
1929         if (flow->counter) {
1930                 flow_verbs_counter_release(dev, flow->counter);
1931                 flow->counter = 0;
1932         }
1933 }
1934
1935 /**
1936  * Apply the flow to the NIC.
1937  *
1938  * @param[in] dev
1939  *   Pointer to the Ethernet device structure.
1940  * @param[in, out] flow
1941  *   Pointer to flow structure.
1942  * @param[out] error
1943  *   Pointer to error structure.
1944  *
1945  * @return
1946  *   0 on success, a negative errno value otherwise and rte_errno is set.
1947  */
1948 static int
1949 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1950                  struct rte_flow_error *error)
1951 {
1952         struct mlx5_priv *priv = dev->data->dev_private;
1953         struct mlx5_flow_handle *handle;
1954         struct mlx5_flow *dev_flow;
1955         struct mlx5_hrxq *hrxq;
1956         uint32_t dev_handles;
1957         int err;
1958         int idx;
1959
1960         for (idx = priv->flow_idx - 1; idx >= priv->flow_nested_idx; idx--) {
1961                 dev_flow = &((struct mlx5_flow *)priv->inter_flows)[idx];
1962                 handle = dev_flow->handle;
1963                 if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1964                         hrxq = mlx5_drop_action_create(dev);
1965                         if (!hrxq) {
1966                                 rte_flow_error_set
1967                                         (error, errno,
1968                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1969                                          "cannot get drop hash queue");
1970                                 goto error;
1971                         }
1972                 } else {
1973                         uint32_t hrxq_idx;
1974                         struct mlx5_flow_rss_desc *rss_desc =
1975                                 &((struct mlx5_flow_rss_desc *)priv->rss_desc)
1976                                 [!!priv->flow_nested_idx];
1977
1978                         MLX5_ASSERT(rss_desc->queue_num);
1979                         hrxq_idx = mlx5_hrxq_get(dev, rss_desc->key,
1980                                                  MLX5_RSS_HASH_KEY_LEN,
1981                                                  dev_flow->hash_fields,
1982                                                  rss_desc->queue,
1983                                                  rss_desc->queue_num);
1984                         if (!hrxq_idx)
1985                                 hrxq_idx = mlx5_hrxq_new
1986                                                 (dev, rss_desc->key,
1987                                                  MLX5_RSS_HASH_KEY_LEN,
1988                                                  dev_flow->hash_fields,
1989                                                  rss_desc->queue,
1990                                                  rss_desc->queue_num,
1991                                                  !!(handle->layers &
1992                                                  MLX5_FLOW_LAYER_TUNNEL),
1993                                                  false);
1994                         hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1995                                               hrxq_idx);
1996                         if (!hrxq) {
1997                                 rte_flow_error_set
1998                                         (error, rte_errno,
1999                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2000                                          "cannot get hash queue");
2001                                 goto error;
2002                         }
2003                         handle->rix_hrxq = hrxq_idx;
2004                 }
2005                 MLX5_ASSERT(hrxq);
2006                 handle->drv_flow = mlx5_glue->create_flow
2007                                         (hrxq->qp, &dev_flow->verbs.attr);
2008                 if (!handle->drv_flow) {
2009                         rte_flow_error_set(error, errno,
2010                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2011                                            NULL,
2012                                            "hardware refuses to create flow");
2013                         goto error;
2014                 }
2015                 if (priv->vmwa_context &&
2016                     handle->vf_vlan.tag && !handle->vf_vlan.created) {
2017                         /*
2018                          * The rule contains the VLAN pattern.
2019                          * For VF we are going to create VLAN
2020                          * interface to make hypervisor set correct
2021                          * e-Switch vport context.
2022                          */
2023                         mlx5_vlan_vmwa_acquire(dev, &handle->vf_vlan);
2024                 }
2025         }
2026         return 0;
2027 error:
2028         err = rte_errno; /* Save rte_errno before cleanup. */
2029         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
2030                        dev_handles, handle, next) {
2031                 /* hrxq is union, don't touch it only the flag is set. */
2032                 if (handle->rix_hrxq) {
2033                         if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
2034                                 mlx5_drop_action_destroy(dev);
2035                                 handle->rix_hrxq = 0;
2036                         } else if (handle->fate_action ==
2037                                    MLX5_FLOW_FATE_QUEUE) {
2038                                 mlx5_hrxq_release(dev, handle->rix_hrxq);
2039                                 handle->rix_hrxq = 0;
2040                         }
2041                 }
2042                 if (handle->vf_vlan.tag && handle->vf_vlan.created)
2043                         mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
2044         }
2045         rte_errno = err; /* Restore rte_errno. */
2046         return -rte_errno;
2047 }
2048
2049 /**
2050  * Query a flow.
2051  *
2052  * @see rte_flow_query()
2053  * @see rte_flow_ops
2054  */
2055 static int
2056 flow_verbs_query(struct rte_eth_dev *dev,
2057                  struct rte_flow *flow,
2058                  const struct rte_flow_action *actions,
2059                  void *data,
2060                  struct rte_flow_error *error)
2061 {
2062         int ret = -EINVAL;
2063
2064         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2065                 switch (actions->type) {
2066                 case RTE_FLOW_ACTION_TYPE_VOID:
2067                         break;
2068                 case RTE_FLOW_ACTION_TYPE_COUNT:
2069                         ret = flow_verbs_counter_query(dev, flow, data, error);
2070                         break;
2071                 default:
2072                         return rte_flow_error_set(error, ENOTSUP,
2073                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2074                                                   actions,
2075                                                   "action not supported");
2076                 }
2077         }
2078         return ret;
2079 }
2080
2081 static int
2082 flow_verbs_sync_domain(struct rte_eth_dev *dev, uint32_t domains,
2083                        uint32_t flags)
2084 {
2085         RTE_SET_USED(dev);
2086         RTE_SET_USED(domains);
2087         RTE_SET_USED(flags);
2088
2089         return 0;
2090 }
2091
2092 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
2093         .validate = flow_verbs_validate,
2094         .prepare = flow_verbs_prepare,
2095         .translate = flow_verbs_translate,
2096         .apply = flow_verbs_apply,
2097         .remove = flow_verbs_remove,
2098         .destroy = flow_verbs_destroy,
2099         .query = flow_verbs_query,
2100         .sync_domain = flow_verbs_sync_domain,
2101 };