1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 Mellanox Technologies, Ltd
5 #include <netinet/in.h>
11 #include <rte_common.h>
12 #include <rte_ether.h>
13 #include <rte_ethdev_driver.h>
15 #include <rte_flow_driver.h>
16 #include <rte_malloc.h>
19 #include <mlx5_glue.h>
21 #include <mlx5_malloc.h>
23 #include "mlx5_defs.h"
25 #include "mlx5_flow.h"
26 #include "mlx5_rxtx.h"
28 #define VERBS_SPEC_INNER(item_flags) \
29 (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
31 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
32 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
33 { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
36 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
37 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
38 { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
39 { 9, 10, 11 }, { 12, 13, 14 },
43 * Discover the maximum number of priority available.
46 * Pointer to the Ethernet device structure.
49 * number of supported flow priority on success, a negative errno
50 * value otherwise and rte_errno is set.
53 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
55 struct mlx5_priv *priv = dev->data->dev_private;
57 struct ibv_flow_attr attr;
58 struct ibv_flow_spec_eth eth;
59 struct ibv_flow_spec_action_drop drop;
63 .port = (uint8_t)priv->dev_port,
66 .type = IBV_FLOW_SPEC_ETH,
67 .size = sizeof(struct ibv_flow_spec_eth),
70 .size = sizeof(struct ibv_flow_spec_action_drop),
71 .type = IBV_FLOW_SPEC_ACTION_DROP,
74 struct ibv_flow *flow;
75 struct mlx5_hrxq *drop = mlx5_drop_action_create(dev);
76 uint16_t vprio[] = { 8, 16 };
84 for (i = 0; i != RTE_DIM(vprio); i++) {
85 flow_attr.attr.priority = vprio[i] - 1;
86 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
89 claim_zero(mlx5_glue->destroy_flow(flow));
92 mlx5_drop_action_destroy(dev);
95 priority = RTE_DIM(priority_map_3);
98 priority = RTE_DIM(priority_map_5);
103 "port %u verbs maximum priority: %d expected 8/16",
104 dev->data->port_id, priority);
107 DRV_LOG(INFO, "port %u flow maximum priority: %d",
108 dev->data->port_id, priority);
113 * Adjust flow priority based on the highest layer and the request priority.
116 * Pointer to the Ethernet device structure.
117 * @param[in] priority
118 * The rule base priority.
119 * @param[in] subpriority
120 * The priority based on the items.
126 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
127 uint32_t subpriority)
130 struct mlx5_priv *priv = dev->data->dev_private;
132 switch (priv->config.flow_prio) {
133 case RTE_DIM(priority_map_3):
134 res = priority_map_3[priority][subpriority];
136 case RTE_DIM(priority_map_5):
137 res = priority_map_5[priority][subpriority];
144 * Get Verbs flow counter by index.
147 * Pointer to the Ethernet device structure.
149 * mlx5 flow counter index in the container.
151 * mlx5 flow counter pool in the container,
154 * A pointer to the counter, NULL otherwise.
156 static struct mlx5_flow_counter *
157 flow_verbs_counter_get_by_idx(struct rte_eth_dev *dev,
159 struct mlx5_flow_counter_pool **ppool)
161 struct mlx5_priv *priv = dev->data->dev_private;
162 struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0);
163 struct mlx5_flow_counter_pool *pool;
165 idx = (idx - 1) & (MLX5_CNT_SHARED_OFFSET - 1);
166 pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
170 return MLX5_POOL_GET_CNT(pool, idx % MLX5_COUNTERS_PER_POOL);
174 * Create Verbs flow counter with Verbs library.
177 * Pointer to the Ethernet device structure.
178 * @param[in, out] counter
179 * mlx5 flow counter object, contains the counter id,
180 * handle of created Verbs flow counter is returned
181 * in cs field (if counters are supported).
184 * 0 On success else a negative errno value is returned
185 * and rte_errno is set.
188 flow_verbs_counter_create(struct rte_eth_dev *dev,
189 struct mlx5_flow_counter_ext *counter)
191 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
192 struct mlx5_priv *priv = dev->data->dev_private;
193 struct ibv_context *ctx = priv->sh->ctx;
194 struct ibv_counter_set_init_attr init = {
195 .counter_set_id = counter->id};
197 counter->cs = mlx5_glue->create_counter_set(ctx, &init);
203 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
204 struct mlx5_priv *priv = dev->data->dev_private;
205 struct ibv_context *ctx = priv->sh->ctx;
206 struct ibv_counters_init_attr init = {0};
207 struct ibv_counter_attach_attr attach;
210 memset(&attach, 0, sizeof(attach));
211 counter->cs = mlx5_glue->create_counters(ctx, &init);
216 attach.counter_desc = IBV_COUNTER_PACKETS;
218 ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
220 attach.counter_desc = IBV_COUNTER_BYTES;
222 ret = mlx5_glue->attach_counters
223 (counter->cs, &attach, NULL);
226 claim_zero(mlx5_glue->destroy_counters(counter->cs));
241 * Get a flow counter.
244 * Pointer to the Ethernet device structure.
246 * Indicate if this counter is shared with other flows.
248 * Counter identifier.
251 * Index to the counter, 0 otherwise and rte_errno is set.
254 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
256 struct mlx5_priv *priv = dev->data->dev_private;
257 struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0);
258 struct mlx5_flow_counter_pool *pool = NULL;
259 struct mlx5_flow_counter_ext *cnt_ext = NULL;
260 struct mlx5_flow_counter *cnt = NULL;
261 union mlx5_l3t_data data;
262 uint32_t n_valid = rte_atomic16_read(&cont->n_valid);
263 uint32_t pool_idx, cnt_idx;
267 if (shared && !mlx5_l3t_get_entry(priv->sh->cnt_id_tbl, id, &data) &&
269 cnt = flow_verbs_counter_get_by_idx(dev, data.dword, NULL);
270 if (cnt->shared_info.ref_cnt + 1 == 0) {
274 cnt->shared_info.ref_cnt++;
277 for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) {
278 pool = cont->pools[pool_idx];
281 cnt = TAILQ_FIRST(&pool->counters[0]);
286 struct mlx5_flow_counter_pool **pools;
289 if (n_valid == cont->n) {
290 /* Resize the container pool array. */
291 size = sizeof(struct mlx5_flow_counter_pool *) *
292 (n_valid + MLX5_CNT_CONTAINER_RESIZE);
293 pools = mlx5_malloc(MLX5_MEM_ZERO, size, 0,
298 memcpy(pools, cont->pools,
299 sizeof(struct mlx5_flow_counter_pool *) *
301 mlx5_free(cont->pools);
304 cont->n += MLX5_CNT_CONTAINER_RESIZE;
306 /* Allocate memory for new pool*/
307 size = sizeof(*pool) + (sizeof(*cnt_ext) + sizeof(*cnt)) *
308 MLX5_COUNTERS_PER_POOL;
309 pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
312 pool->type |= CNT_POOL_TYPE_EXT;
313 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
314 cnt = MLX5_POOL_GET_CNT(pool, i);
315 TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
317 cnt = MLX5_POOL_GET_CNT(pool, 0);
318 cont->pools[n_valid] = pool;
320 rte_atomic16_add(&cont->n_valid, 1);
321 TAILQ_INSERT_HEAD(&cont->pool_list, pool, next);
323 i = MLX5_CNT_ARRAY_IDX(pool, cnt);
324 cnt_idx = MLX5_MAKE_CNT_IDX(pool_idx, i);
326 data.dword = cnt_idx;
327 if (mlx5_l3t_set_entry(priv->sh->cnt_id_tbl, id, &data))
329 cnt->shared_info.ref_cnt = 1;
330 cnt->shared_info.id = id;
331 cnt_idx |= MLX5_CNT_SHARED_OFFSET;
333 cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
336 /* Create counter with Verbs. */
337 ret = flow_verbs_counter_create(dev, cnt_ext);
339 TAILQ_REMOVE(&pool->counters[0], cnt, next);
342 /* Some error occurred in Verbs library. */
348 * Release a flow counter.
351 * Pointer to the Ethernet device structure.
353 * Index to the counter handler.
356 flow_verbs_counter_release(struct rte_eth_dev *dev, uint32_t counter)
358 struct mlx5_priv *priv = dev->data->dev_private;
359 struct mlx5_flow_counter_pool *pool;
360 struct mlx5_flow_counter *cnt;
361 struct mlx5_flow_counter_ext *cnt_ext;
363 cnt = flow_verbs_counter_get_by_idx(dev, counter, &pool);
364 if (IS_SHARED_CNT(counter)) {
365 if (--cnt->shared_info.ref_cnt)
367 mlx5_l3t_clear_entry(priv->sh->cnt_id_tbl,
368 cnt->shared_info.id);
370 cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
371 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
372 claim_zero(mlx5_glue->destroy_counter_set(cnt_ext->cs));
374 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
375 claim_zero(mlx5_glue->destroy_counters(cnt_ext->cs));
379 TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
383 * Query a flow counter via Verbs library call.
385 * @see rte_flow_query()
389 flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
390 struct rte_flow *flow, void *data,
391 struct rte_flow_error *error)
393 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
394 defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
396 struct mlx5_flow_counter_pool *pool;
397 struct mlx5_flow_counter *cnt = flow_verbs_counter_get_by_idx
398 (dev, flow->counter, &pool);
399 struct mlx5_flow_counter_ext *cnt_ext = MLX5_CNT_TO_CNT_EXT
401 struct rte_flow_query_count *qc = data;
402 uint64_t counters[2] = {0, 0};
403 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
404 struct ibv_query_counter_set_attr query_cs_attr = {
406 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
408 struct ibv_counter_set_data query_out = {
410 .outlen = 2 * sizeof(uint64_t),
412 int err = mlx5_glue->query_counter_set(&query_cs_attr,
414 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
415 int err = mlx5_glue->query_counters
416 (cnt_ext->cs, counters,
418 IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
421 return rte_flow_error_set
423 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
425 "cannot read counter");
428 qc->hits = counters[0] - cnt->hits;
429 qc->bytes = counters[1] - cnt->bytes;
431 cnt->hits = counters[0];
432 cnt->bytes = counters[1];
436 return rte_flow_error_set(error, EINVAL,
437 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
439 "flow does not have counter");
443 return rte_flow_error_set(error, ENOTSUP,
444 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
446 "counters are not available");
451 * Add a verbs item specification into @p verbs.
454 * Pointer to verbs structure.
456 * Create specification.
458 * Size in bytes of the specification to copy.
461 flow_verbs_spec_add(struct mlx5_flow_verbs_workspace *verbs,
462 void *src, unsigned int size)
468 MLX5_ASSERT(verbs->specs);
469 dst = (void *)(verbs->specs + verbs->size);
470 memcpy(dst, src, size);
471 ++verbs->attr.num_of_specs;
476 * Convert the @p item into a Verbs specification. This function assumes that
477 * the input is valid and that there is space to insert the requested item
480 * @param[in, out] dev_flow
481 * Pointer to dev_flow structure.
483 * Item specification.
484 * @param[in] item_flags
488 flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
489 const struct rte_flow_item *item,
492 const struct rte_flow_item_eth *spec = item->spec;
493 const struct rte_flow_item_eth *mask = item->mask;
494 const unsigned int size = sizeof(struct ibv_flow_spec_eth);
495 struct ibv_flow_spec_eth eth = {
496 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
501 mask = &rte_flow_item_eth_mask;
505 memcpy(ð.val.dst_mac, spec->dst.addr_bytes,
507 memcpy(ð.val.src_mac, spec->src.addr_bytes,
509 eth.val.ether_type = spec->type;
510 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes,
512 memcpy(ð.mask.src_mac, mask->src.addr_bytes,
514 eth.mask.ether_type = mask->type;
515 /* Remove unwanted bits from values. */
516 for (i = 0; i < RTE_ETHER_ADDR_LEN; ++i) {
517 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
518 eth.val.src_mac[i] &= eth.mask.src_mac[i];
520 eth.val.ether_type &= eth.mask.ether_type;
522 flow_verbs_spec_add(&dev_flow->verbs, ð, size);
526 * Update the VLAN tag in the Verbs Ethernet specification.
527 * This function assumes that the input is valid and there is space to add
528 * the requested item.
530 * @param[in, out] attr
531 * Pointer to Verbs attributes structure.
533 * Verbs structure containing the VLAN information to copy.
536 flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
537 struct ibv_flow_spec_eth *eth)
540 const enum ibv_flow_spec_type search = eth->type;
541 struct ibv_spec_header *hdr = (struct ibv_spec_header *)
542 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
544 for (i = 0; i != attr->num_of_specs; ++i) {
545 if (hdr->type == search) {
546 struct ibv_flow_spec_eth *e =
547 (struct ibv_flow_spec_eth *)hdr;
549 e->val.vlan_tag = eth->val.vlan_tag;
550 e->mask.vlan_tag = eth->mask.vlan_tag;
551 e->val.ether_type = eth->val.ether_type;
552 e->mask.ether_type = eth->mask.ether_type;
555 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
560 * Convert the @p item into a Verbs specification. This function assumes that
561 * the input is valid and that there is space to insert the requested item
564 * @param[in, out] dev_flow
565 * Pointer to dev_flow structure.
567 * Item specification.
568 * @param[in] item_flags
572 flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
573 const struct rte_flow_item *item,
576 const struct rte_flow_item_vlan *spec = item->spec;
577 const struct rte_flow_item_vlan *mask = item->mask;
578 unsigned int size = sizeof(struct ibv_flow_spec_eth);
579 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
580 struct ibv_flow_spec_eth eth = {
581 .type = IBV_FLOW_SPEC_ETH | VERBS_SPEC_INNER(item_flags),
584 const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
585 MLX5_FLOW_LAYER_OUTER_L2;
588 mask = &rte_flow_item_vlan_mask;
590 eth.val.vlan_tag = spec->tci;
591 eth.mask.vlan_tag = mask->tci;
592 eth.val.vlan_tag &= eth.mask.vlan_tag;
593 eth.val.ether_type = spec->inner_type;
594 eth.mask.ether_type = mask->inner_type;
595 eth.val.ether_type &= eth.mask.ether_type;
597 if (!(item_flags & l2m))
598 flow_verbs_spec_add(&dev_flow->verbs, ð, size);
600 flow_verbs_item_vlan_update(&dev_flow->verbs.attr, ð);
602 dev_flow->handle->vf_vlan.tag =
603 rte_be_to_cpu_16(spec->tci) & 0x0fff;
607 * Convert the @p item into a Verbs specification. This function assumes that
608 * the input is valid and that there is space to insert the requested item
611 * @param[in, out] dev_flow
612 * Pointer to dev_flow structure.
614 * Item specification.
615 * @param[in] item_flags
619 flow_verbs_translate_item_ipv4(struct mlx5_flow *dev_flow,
620 const struct rte_flow_item *item,
623 const struct rte_flow_item_ipv4 *spec = item->spec;
624 const struct rte_flow_item_ipv4 *mask = item->mask;
625 unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
626 struct ibv_flow_spec_ipv4_ext ipv4 = {
627 .type = IBV_FLOW_SPEC_IPV4_EXT | VERBS_SPEC_INNER(item_flags),
632 mask = &rte_flow_item_ipv4_mask;
634 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
635 .src_ip = spec->hdr.src_addr,
636 .dst_ip = spec->hdr.dst_addr,
637 .proto = spec->hdr.next_proto_id,
638 .tos = spec->hdr.type_of_service,
640 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
641 .src_ip = mask->hdr.src_addr,
642 .dst_ip = mask->hdr.dst_addr,
643 .proto = mask->hdr.next_proto_id,
644 .tos = mask->hdr.type_of_service,
646 /* Remove unwanted bits from values. */
647 ipv4.val.src_ip &= ipv4.mask.src_ip;
648 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
649 ipv4.val.proto &= ipv4.mask.proto;
650 ipv4.val.tos &= ipv4.mask.tos;
652 flow_verbs_spec_add(&dev_flow->verbs, &ipv4, size);
656 * Convert the @p item into a Verbs specification. This function assumes that
657 * the input is valid and that there is space to insert the requested item
660 * @param[in, out] dev_flow
661 * Pointer to dev_flow structure.
663 * Item specification.
664 * @param[in] item_flags
668 flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
669 const struct rte_flow_item *item,
672 const struct rte_flow_item_ipv6 *spec = item->spec;
673 const struct rte_flow_item_ipv6 *mask = item->mask;
674 unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
675 struct ibv_flow_spec_ipv6 ipv6 = {
676 .type = IBV_FLOW_SPEC_IPV6 | VERBS_SPEC_INNER(item_flags),
681 mask = &rte_flow_item_ipv6_mask;
684 uint32_t vtc_flow_val;
685 uint32_t vtc_flow_mask;
687 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
688 RTE_DIM(ipv6.val.src_ip));
689 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
690 RTE_DIM(ipv6.val.dst_ip));
691 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
692 RTE_DIM(ipv6.mask.src_ip));
693 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
694 RTE_DIM(ipv6.mask.dst_ip));
695 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
696 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
697 ipv6.val.flow_label =
698 rte_cpu_to_be_32((vtc_flow_val & RTE_IPV6_HDR_FL_MASK) >>
699 RTE_IPV6_HDR_FL_SHIFT);
700 ipv6.val.traffic_class = (vtc_flow_val & RTE_IPV6_HDR_TC_MASK) >>
701 RTE_IPV6_HDR_TC_SHIFT;
702 ipv6.val.next_hdr = spec->hdr.proto;
703 ipv6.mask.flow_label =
704 rte_cpu_to_be_32((vtc_flow_mask & RTE_IPV6_HDR_FL_MASK) >>
705 RTE_IPV6_HDR_FL_SHIFT);
706 ipv6.mask.traffic_class = (vtc_flow_mask & RTE_IPV6_HDR_TC_MASK) >>
707 RTE_IPV6_HDR_TC_SHIFT;
708 ipv6.mask.next_hdr = mask->hdr.proto;
709 /* Remove unwanted bits from values. */
710 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
711 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
712 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
714 ipv6.val.flow_label &= ipv6.mask.flow_label;
715 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
716 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
718 flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size);
722 * Convert the @p item into a Verbs specification. This function assumes that
723 * the input is valid and that there is space to insert the requested item
726 * @param[in, out] dev_flow
727 * Pointer to dev_flow structure.
729 * Item specification.
730 * @param[in] item_flags
734 flow_verbs_translate_item_tcp(struct mlx5_flow *dev_flow,
735 const struct rte_flow_item *item,
736 uint64_t item_flags __rte_unused)
738 const struct rte_flow_item_tcp *spec = item->spec;
739 const struct rte_flow_item_tcp *mask = item->mask;
740 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
741 struct ibv_flow_spec_tcp_udp tcp = {
742 .type = IBV_FLOW_SPEC_TCP | VERBS_SPEC_INNER(item_flags),
747 mask = &rte_flow_item_tcp_mask;
749 tcp.val.dst_port = spec->hdr.dst_port;
750 tcp.val.src_port = spec->hdr.src_port;
751 tcp.mask.dst_port = mask->hdr.dst_port;
752 tcp.mask.src_port = mask->hdr.src_port;
753 /* Remove unwanted bits from values. */
754 tcp.val.src_port &= tcp.mask.src_port;
755 tcp.val.dst_port &= tcp.mask.dst_port;
757 flow_verbs_spec_add(&dev_flow->verbs, &tcp, size);
761 * Convert the @p item into a Verbs specification. This function assumes that
762 * the input is valid and that there is space to insert the requested item
765 * @param[in, out] dev_flow
766 * Pointer to dev_flow structure.
768 * Item specification.
769 * @param[in] item_flags
773 flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow,
774 const struct rte_flow_item *item,
775 uint64_t item_flags __rte_unused)
777 const struct rte_flow_item_udp *spec = item->spec;
778 const struct rte_flow_item_udp *mask = item->mask;
779 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
780 struct ibv_flow_spec_tcp_udp udp = {
781 .type = IBV_FLOW_SPEC_UDP | VERBS_SPEC_INNER(item_flags),
786 mask = &rte_flow_item_udp_mask;
788 udp.val.dst_port = spec->hdr.dst_port;
789 udp.val.src_port = spec->hdr.src_port;
790 udp.mask.dst_port = mask->hdr.dst_port;
791 udp.mask.src_port = mask->hdr.src_port;
792 /* Remove unwanted bits from values. */
793 udp.val.src_port &= udp.mask.src_port;
794 udp.val.dst_port &= udp.mask.dst_port;
797 while (item->type == RTE_FLOW_ITEM_TYPE_VOID)
799 if (!(udp.val.dst_port & udp.mask.dst_port)) {
800 switch ((item)->type) {
801 case RTE_FLOW_ITEM_TYPE_VXLAN:
802 udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN);
803 udp.mask.dst_port = 0xffff;
805 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
806 udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN_GPE);
807 udp.mask.dst_port = 0xffff;
809 case RTE_FLOW_ITEM_TYPE_MPLS:
810 udp.val.dst_port = htons(MLX5_UDP_PORT_MPLS);
811 udp.mask.dst_port = 0xffff;
818 flow_verbs_spec_add(&dev_flow->verbs, &udp, size);
822 * Convert the @p item into a Verbs specification. This function assumes that
823 * the input is valid and that there is space to insert the requested item
826 * @param[in, out] dev_flow
827 * Pointer to dev_flow structure.
829 * Item specification.
830 * @param[in] item_flags
834 flow_verbs_translate_item_vxlan(struct mlx5_flow *dev_flow,
835 const struct rte_flow_item *item,
836 uint64_t item_flags __rte_unused)
838 const struct rte_flow_item_vxlan *spec = item->spec;
839 const struct rte_flow_item_vxlan *mask = item->mask;
840 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
841 struct ibv_flow_spec_tunnel vxlan = {
842 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
848 } id = { .vlan_id = 0, };
851 mask = &rte_flow_item_vxlan_mask;
853 memcpy(&id.vni[1], spec->vni, 3);
854 vxlan.val.tunnel_id = id.vlan_id;
855 memcpy(&id.vni[1], mask->vni, 3);
856 vxlan.mask.tunnel_id = id.vlan_id;
857 /* Remove unwanted bits from values. */
858 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
860 flow_verbs_spec_add(&dev_flow->verbs, &vxlan, size);
864 * Convert the @p item into a Verbs specification. This function assumes that
865 * the input is valid and that there is space to insert the requested item
868 * @param[in, out] dev_flow
869 * Pointer to dev_flow structure.
871 * Item specification.
872 * @param[in] item_flags
876 flow_verbs_translate_item_vxlan_gpe(struct mlx5_flow *dev_flow,
877 const struct rte_flow_item *item,
878 uint64_t item_flags __rte_unused)
880 const struct rte_flow_item_vxlan_gpe *spec = item->spec;
881 const struct rte_flow_item_vxlan_gpe *mask = item->mask;
882 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
883 struct ibv_flow_spec_tunnel vxlan_gpe = {
884 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
890 } id = { .vlan_id = 0, };
893 mask = &rte_flow_item_vxlan_gpe_mask;
895 memcpy(&id.vni[1], spec->vni, 3);
896 vxlan_gpe.val.tunnel_id = id.vlan_id;
897 memcpy(&id.vni[1], mask->vni, 3);
898 vxlan_gpe.mask.tunnel_id = id.vlan_id;
899 /* Remove unwanted bits from values. */
900 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
902 flow_verbs_spec_add(&dev_flow->verbs, &vxlan_gpe, size);
906 * Update the protocol in Verbs IPv4/IPv6 spec.
908 * @param[in, out] attr
909 * Pointer to Verbs attributes structure.
911 * Specification type to search in order to update the IP protocol.
912 * @param[in] protocol
913 * Protocol value to set if none is present in the specification.
916 flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
917 enum ibv_flow_spec_type search,
921 struct ibv_spec_header *hdr = (struct ibv_spec_header *)
922 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
926 for (i = 0; i != attr->num_of_specs; ++i) {
927 if (hdr->type == search) {
929 struct ibv_flow_spec_ipv4_ext *ipv4;
930 struct ibv_flow_spec_ipv6 *ipv6;
934 case IBV_FLOW_SPEC_IPV4_EXT:
935 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
936 if (!ip.ipv4->val.proto) {
937 ip.ipv4->val.proto = protocol;
938 ip.ipv4->mask.proto = 0xff;
941 case IBV_FLOW_SPEC_IPV6:
942 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
943 if (!ip.ipv6->val.next_hdr) {
944 ip.ipv6->val.next_hdr = protocol;
945 ip.ipv6->mask.next_hdr = 0xff;
953 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
958 * Convert the @p item into a Verbs specification. This function assumes that
959 * the input is valid and that there is space to insert the requested item
962 * @param[in, out] dev_flow
963 * Pointer to dev_flow structure.
965 * Item specification.
966 * @param[in] item_flags
970 flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow,
971 const struct rte_flow_item *item __rte_unused,
974 struct mlx5_flow_verbs_workspace *verbs = &dev_flow->verbs;
975 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
976 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
977 struct ibv_flow_spec_tunnel tunnel = {
978 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
982 const struct rte_flow_item_gre *spec = item->spec;
983 const struct rte_flow_item_gre *mask = item->mask;
984 unsigned int size = sizeof(struct ibv_flow_spec_gre);
985 struct ibv_flow_spec_gre tunnel = {
986 .type = IBV_FLOW_SPEC_GRE,
991 mask = &rte_flow_item_gre_mask;
993 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
994 tunnel.val.protocol = spec->protocol;
995 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
996 tunnel.mask.protocol = mask->protocol;
997 /* Remove unwanted bits from values. */
998 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
999 tunnel.val.protocol &= tunnel.mask.protocol;
1000 tunnel.val.key &= tunnel.mask.key;
1003 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
1004 flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
1005 IBV_FLOW_SPEC_IPV4_EXT,
1008 flow_verbs_item_gre_ip_protocol_update(&verbs->attr,
1011 flow_verbs_spec_add(verbs, &tunnel, size);
1015 * Convert the @p action into a Verbs specification. This function assumes that
1016 * the input is valid and that there is space to insert the requested action
1017 * into the flow. This function also return the action that was added.
1019 * @param[in, out] dev_flow
1020 * Pointer to dev_flow structure.
1022 * Item specification.
1023 * @param[in] item_flags
1024 * Parsed item flags.
1027 flow_verbs_translate_item_mpls(struct mlx5_flow *dev_flow __rte_unused,
1028 const struct rte_flow_item *item __rte_unused,
1029 uint64_t item_flags __rte_unused)
1031 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1032 const struct rte_flow_item_mpls *spec = item->spec;
1033 const struct rte_flow_item_mpls *mask = item->mask;
1034 unsigned int size = sizeof(struct ibv_flow_spec_mpls);
1035 struct ibv_flow_spec_mpls mpls = {
1036 .type = IBV_FLOW_SPEC_MPLS,
1041 mask = &rte_flow_item_mpls_mask;
1043 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
1044 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
1045 /* Remove unwanted bits from values. */
1046 mpls.val.label &= mpls.mask.label;
1048 flow_verbs_spec_add(&dev_flow->verbs, &mpls, size);
1053 * Convert the @p action into a Verbs specification. This function assumes that
1054 * the input is valid and that there is space to insert the requested action
1057 * @param[in] dev_flow
1058 * Pointer to mlx5_flow.
1060 * Action configuration.
1063 flow_verbs_translate_action_drop
1064 (struct mlx5_flow *dev_flow,
1065 const struct rte_flow_action *action __rte_unused)
1067 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1068 struct ibv_flow_spec_action_drop drop = {
1069 .type = IBV_FLOW_SPEC_ACTION_DROP,
1073 flow_verbs_spec_add(&dev_flow->verbs, &drop, size);
1077 * Convert the @p action into a Verbs specification. This function assumes that
1078 * the input is valid and that there is space to insert the requested action
1081 * @param[in] rss_desc
1082 * Pointer to mlx5_flow_rss_desc.
1084 * Action configuration.
1087 flow_verbs_translate_action_queue(struct mlx5_flow_rss_desc *rss_desc,
1088 const struct rte_flow_action *action)
1090 const struct rte_flow_action_queue *queue = action->conf;
1092 rss_desc->queue[0] = queue->index;
1093 rss_desc->queue_num = 1;
1097 * Convert the @p action into a Verbs specification. This function assumes that
1098 * the input is valid and that there is space to insert the requested action
1101 * @param[in] rss_desc
1102 * Pointer to mlx5_flow_rss_desc.
1104 * Action configuration.
1107 flow_verbs_translate_action_rss(struct mlx5_flow_rss_desc *rss_desc,
1108 const struct rte_flow_action *action)
1110 const struct rte_flow_action_rss *rss = action->conf;
1111 const uint8_t *rss_key;
1113 memcpy(rss_desc->queue, rss->queue, rss->queue_num * sizeof(uint16_t));
1114 rss_desc->queue_num = rss->queue_num;
1115 /* NULL RSS key indicates default RSS key. */
1116 rss_key = !rss->key ? rss_hash_default_key : rss->key;
1117 memcpy(rss_desc->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
1119 * rss->level and rss.types should be set in advance when expanding
1125 * Convert the @p action into a Verbs specification. This function assumes that
1126 * the input is valid and that there is space to insert the requested action
1129 * @param[in] dev_flow
1130 * Pointer to mlx5_flow.
1132 * Action configuration.
1135 flow_verbs_translate_action_flag
1136 (struct mlx5_flow *dev_flow,
1137 const struct rte_flow_action *action __rte_unused)
1139 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1140 struct ibv_flow_spec_action_tag tag = {
1141 .type = IBV_FLOW_SPEC_ACTION_TAG,
1143 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1146 flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1150 * Convert the @p action into a Verbs specification. This function assumes that
1151 * the input is valid and that there is space to insert the requested action
1154 * @param[in] dev_flow
1155 * Pointer to mlx5_flow.
1157 * Action configuration.
1160 flow_verbs_translate_action_mark(struct mlx5_flow *dev_flow,
1161 const struct rte_flow_action *action)
1163 const struct rte_flow_action_mark *mark = action->conf;
1164 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1165 struct ibv_flow_spec_action_tag tag = {
1166 .type = IBV_FLOW_SPEC_ACTION_TAG,
1168 .tag_id = mlx5_flow_mark_set(mark->id),
1171 flow_verbs_spec_add(&dev_flow->verbs, &tag, size);
1175 * Convert the @p action into a Verbs specification. This function assumes that
1176 * the input is valid and that there is space to insert the requested action
1180 * Pointer to the Ethernet device structure.
1182 * Action configuration.
1183 * @param[in] dev_flow
1184 * Pointer to mlx5_flow.
1186 * Pointer to error structure.
1189 * 0 On success else a negative errno value is returned and rte_errno is set.
1192 flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
1193 const struct rte_flow_action *action,
1194 struct rte_eth_dev *dev,
1195 struct rte_flow_error *error)
1197 const struct rte_flow_action_count *count = action->conf;
1198 struct rte_flow *flow = dev_flow->flow;
1199 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1200 defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1201 struct mlx5_flow_counter_pool *pool;
1202 struct mlx5_flow_counter *cnt = NULL;
1203 struct mlx5_flow_counter_ext *cnt_ext;
1204 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1205 struct ibv_flow_spec_counter_action counter = {
1206 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1211 if (!flow->counter) {
1212 flow->counter = flow_verbs_counter_new(dev, count->shared,
1215 return rte_flow_error_set(error, rte_errno,
1216 RTE_FLOW_ERROR_TYPE_ACTION,
1218 "cannot get counter"
1221 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
1222 cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1223 cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
1224 counter.counter_set_handle = cnt_ext->cs->handle;
1225 flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1226 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1227 cnt = flow_verbs_counter_get_by_idx(dev, flow->counter, &pool);
1228 cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
1229 counter.counters = cnt_ext->cs;
1230 flow_verbs_spec_add(&dev_flow->verbs, &counter, size);
1236 * Internal validation function. For validating both actions and items.
1239 * Pointer to the Ethernet device structure.
1241 * Pointer to the flow attributes.
1243 * Pointer to the list of items.
1244 * @param[in] actions
1245 * Pointer to the list of actions.
1246 * @param[in] external
1247 * This flow rule is created by request external to PMD.
1248 * @param[in] hairpin
1249 * Number of hairpin TX actions, 0 means classic flow.
1251 * Pointer to the error structure.
1254 * 0 on success, a negative errno value otherwise and rte_errno is set.
1257 flow_verbs_validate(struct rte_eth_dev *dev,
1258 const struct rte_flow_attr *attr,
1259 const struct rte_flow_item items[],
1260 const struct rte_flow_action actions[],
1261 bool external __rte_unused,
1262 int hairpin __rte_unused,
1263 struct rte_flow_error *error)
1266 uint64_t action_flags = 0;
1267 uint64_t item_flags = 0;
1268 uint64_t last_item = 0;
1269 uint8_t next_protocol = 0xff;
1270 uint16_t ether_type = 0;
1274 ret = mlx5_flow_validate_attributes(dev, attr, error);
1277 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1278 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1281 switch (items->type) {
1282 case RTE_FLOW_ITEM_TYPE_VOID:
1284 case RTE_FLOW_ITEM_TYPE_ETH:
1285 ret = mlx5_flow_validate_item_eth(items, item_flags,
1289 last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1290 MLX5_FLOW_LAYER_OUTER_L2;
1291 if (items->mask != NULL && items->spec != NULL) {
1293 ((const struct rte_flow_item_eth *)
1296 ((const struct rte_flow_item_eth *)
1298 ether_type = rte_be_to_cpu_16(ether_type);
1303 case RTE_FLOW_ITEM_TYPE_VLAN:
1304 ret = mlx5_flow_validate_item_vlan(items, item_flags,
1308 last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1309 MLX5_FLOW_LAYER_INNER_VLAN) :
1310 (MLX5_FLOW_LAYER_OUTER_L2 |
1311 MLX5_FLOW_LAYER_OUTER_VLAN);
1312 if (items->mask != NULL && items->spec != NULL) {
1314 ((const struct rte_flow_item_vlan *)
1315 items->spec)->inner_type;
1317 ((const struct rte_flow_item_vlan *)
1318 items->mask)->inner_type;
1319 ether_type = rte_be_to_cpu_16(ether_type);
1324 case RTE_FLOW_ITEM_TYPE_IPV4:
1325 ret = mlx5_flow_validate_item_ipv4
1327 last_item, ether_type, NULL,
1328 MLX5_ITEM_RANGE_NOT_ACCEPTED,
1332 last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1333 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1334 if (items->mask != NULL &&
1335 ((const struct rte_flow_item_ipv4 *)
1336 items->mask)->hdr.next_proto_id) {
1338 ((const struct rte_flow_item_ipv4 *)
1339 (items->spec))->hdr.next_proto_id;
1341 ((const struct rte_flow_item_ipv4 *)
1342 (items->mask))->hdr.next_proto_id;
1344 /* Reset for inner layer. */
1345 next_protocol = 0xff;
1348 case RTE_FLOW_ITEM_TYPE_IPV6:
1349 ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1355 last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1356 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1357 if (items->mask != NULL &&
1358 ((const struct rte_flow_item_ipv6 *)
1359 items->mask)->hdr.proto) {
1361 ((const struct rte_flow_item_ipv6 *)
1362 items->spec)->hdr.proto;
1364 ((const struct rte_flow_item_ipv6 *)
1365 items->mask)->hdr.proto;
1367 /* Reset for inner layer. */
1368 next_protocol = 0xff;
1371 case RTE_FLOW_ITEM_TYPE_UDP:
1372 ret = mlx5_flow_validate_item_udp(items, item_flags,
1377 last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1378 MLX5_FLOW_LAYER_OUTER_L4_UDP;
1380 case RTE_FLOW_ITEM_TYPE_TCP:
1381 ret = mlx5_flow_validate_item_tcp
1384 &rte_flow_item_tcp_mask,
1388 last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1389 MLX5_FLOW_LAYER_OUTER_L4_TCP;
1391 case RTE_FLOW_ITEM_TYPE_VXLAN:
1392 ret = mlx5_flow_validate_item_vxlan(items, item_flags,
1396 last_item = MLX5_FLOW_LAYER_VXLAN;
1398 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1399 ret = mlx5_flow_validate_item_vxlan_gpe(items,
1404 last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
1406 case RTE_FLOW_ITEM_TYPE_GRE:
1407 ret = mlx5_flow_validate_item_gre(items, item_flags,
1408 next_protocol, error);
1411 last_item = MLX5_FLOW_LAYER_GRE;
1413 case RTE_FLOW_ITEM_TYPE_MPLS:
1414 ret = mlx5_flow_validate_item_mpls(dev, items,
1419 last_item = MLX5_FLOW_LAYER_MPLS;
1422 return rte_flow_error_set(error, ENOTSUP,
1423 RTE_FLOW_ERROR_TYPE_ITEM,
1424 NULL, "item not supported");
1426 item_flags |= last_item;
1428 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1429 switch (actions->type) {
1430 case RTE_FLOW_ACTION_TYPE_VOID:
1432 case RTE_FLOW_ACTION_TYPE_FLAG:
1433 ret = mlx5_flow_validate_action_flag(action_flags,
1438 action_flags |= MLX5_FLOW_ACTION_FLAG;
1440 case RTE_FLOW_ACTION_TYPE_MARK:
1441 ret = mlx5_flow_validate_action_mark(actions,
1447 action_flags |= MLX5_FLOW_ACTION_MARK;
1449 case RTE_FLOW_ACTION_TYPE_DROP:
1450 ret = mlx5_flow_validate_action_drop(action_flags,
1455 action_flags |= MLX5_FLOW_ACTION_DROP;
1457 case RTE_FLOW_ACTION_TYPE_QUEUE:
1458 ret = mlx5_flow_validate_action_queue(actions,
1464 action_flags |= MLX5_FLOW_ACTION_QUEUE;
1466 case RTE_FLOW_ACTION_TYPE_RSS:
1467 ret = mlx5_flow_validate_action_rss(actions,
1473 action_flags |= MLX5_FLOW_ACTION_RSS;
1475 case RTE_FLOW_ACTION_TYPE_COUNT:
1476 ret = mlx5_flow_validate_action_count(dev, attr, error);
1479 action_flags |= MLX5_FLOW_ACTION_COUNT;
1482 return rte_flow_error_set(error, ENOTSUP,
1483 RTE_FLOW_ERROR_TYPE_ACTION,
1485 "action not supported");
1489 * Validate the drop action mutual exclusion with other actions.
1490 * Drop action is mutually-exclusive with any other action, except for
1493 if ((action_flags & MLX5_FLOW_ACTION_DROP) &&
1494 (action_flags & ~(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_COUNT)))
1495 return rte_flow_error_set(error, EINVAL,
1496 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1497 "Drop action is mutually-exclusive "
1498 "with any other action, except for "
1500 if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
1501 return rte_flow_error_set(error, EINVAL,
1502 RTE_FLOW_ERROR_TYPE_ACTION, actions,
1503 "no fate action is found");
1508 * Calculate the required bytes that are needed for the action part of the verbs
1511 * @param[in] actions
1512 * Pointer to the list of actions.
1515 * The size of the memory needed for all actions.
1518 flow_verbs_get_actions_size(const struct rte_flow_action actions[])
1522 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1523 switch (actions->type) {
1524 case RTE_FLOW_ACTION_TYPE_VOID:
1526 case RTE_FLOW_ACTION_TYPE_FLAG:
1527 size += sizeof(struct ibv_flow_spec_action_tag);
1529 case RTE_FLOW_ACTION_TYPE_MARK:
1530 size += sizeof(struct ibv_flow_spec_action_tag);
1532 case RTE_FLOW_ACTION_TYPE_DROP:
1533 size += sizeof(struct ibv_flow_spec_action_drop);
1535 case RTE_FLOW_ACTION_TYPE_QUEUE:
1537 case RTE_FLOW_ACTION_TYPE_RSS:
1539 case RTE_FLOW_ACTION_TYPE_COUNT:
1540 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
1541 defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
1542 size += sizeof(struct ibv_flow_spec_counter_action);
1553 * Calculate the required bytes that are needed for the item part of the verbs
1557 * Pointer to the list of items.
1560 * The size of the memory needed for all items.
1563 flow_verbs_get_items_size(const struct rte_flow_item items[])
1567 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1568 switch (items->type) {
1569 case RTE_FLOW_ITEM_TYPE_VOID:
1571 case RTE_FLOW_ITEM_TYPE_ETH:
1572 size += sizeof(struct ibv_flow_spec_eth);
1574 case RTE_FLOW_ITEM_TYPE_VLAN:
1575 size += sizeof(struct ibv_flow_spec_eth);
1577 case RTE_FLOW_ITEM_TYPE_IPV4:
1578 size += sizeof(struct ibv_flow_spec_ipv4_ext);
1580 case RTE_FLOW_ITEM_TYPE_IPV6:
1581 size += sizeof(struct ibv_flow_spec_ipv6);
1583 case RTE_FLOW_ITEM_TYPE_UDP:
1584 size += sizeof(struct ibv_flow_spec_tcp_udp);
1586 case RTE_FLOW_ITEM_TYPE_TCP:
1587 size += sizeof(struct ibv_flow_spec_tcp_udp);
1589 case RTE_FLOW_ITEM_TYPE_VXLAN:
1590 size += sizeof(struct ibv_flow_spec_tunnel);
1592 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1593 size += sizeof(struct ibv_flow_spec_tunnel);
1595 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1596 case RTE_FLOW_ITEM_TYPE_GRE:
1597 size += sizeof(struct ibv_flow_spec_gre);
1599 case RTE_FLOW_ITEM_TYPE_MPLS:
1600 size += sizeof(struct ibv_flow_spec_mpls);
1603 case RTE_FLOW_ITEM_TYPE_GRE:
1604 size += sizeof(struct ibv_flow_spec_tunnel);
1615 * Internal preparation function. Allocate mlx5_flow with the required size.
1616 * The required size is calculate based on the actions and items. This function
1617 * also returns the detected actions and items for later use.
1620 * Pointer to Ethernet device.
1622 * Pointer to the flow attributes.
1624 * Pointer to the list of items.
1625 * @param[in] actions
1626 * Pointer to the list of actions.
1628 * Pointer to the error structure.
1631 * Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
1634 static struct mlx5_flow *
1635 flow_verbs_prepare(struct rte_eth_dev *dev,
1636 const struct rte_flow_attr *attr __rte_unused,
1637 const struct rte_flow_item items[],
1638 const struct rte_flow_action actions[],
1639 struct rte_flow_error *error)
1642 uint32_t handle_idx = 0;
1643 struct mlx5_flow *dev_flow;
1644 struct mlx5_flow_handle *dev_handle;
1645 struct mlx5_priv *priv = dev->data->dev_private;
1647 size += flow_verbs_get_actions_size(actions);
1648 size += flow_verbs_get_items_size(items);
1649 if (size > MLX5_VERBS_MAX_SPEC_ACT_SIZE) {
1650 rte_flow_error_set(error, E2BIG,
1651 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1652 "Verbs spec/action size too large");
1655 /* In case of corrupting the memory. */
1656 if (priv->flow_idx >= MLX5_NUM_MAX_DEV_FLOWS) {
1657 rte_flow_error_set(error, ENOSPC,
1658 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1659 "not free temporary device flow");
1662 dev_handle = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1665 rte_flow_error_set(error, ENOMEM,
1666 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1667 "not enough memory to create flow handle");
1670 /* No multi-thread supporting. */
1671 dev_flow = &((struct mlx5_flow *)priv->inter_flows)[priv->flow_idx++];
1672 dev_flow->handle = dev_handle;
1673 dev_flow->handle_idx = handle_idx;
1674 /* Memcpy is used, only size needs to be cleared to 0. */
1675 dev_flow->verbs.size = 0;
1676 dev_flow->verbs.attr.num_of_specs = 0;
1677 dev_flow->ingress = attr->ingress;
1678 dev_flow->hash_fields = 0;
1679 /* Need to set transfer attribute: not supported in Verbs mode. */
1684 * Fill the flow with verb spec.
1687 * Pointer to Ethernet device.
1688 * @param[in, out] dev_flow
1689 * Pointer to the mlx5 flow.
1691 * Pointer to the flow attributes.
1693 * Pointer to the list of items.
1694 * @param[in] actions
1695 * Pointer to the list of actions.
1697 * Pointer to the error structure.
1700 * 0 on success, else a negative errno value otherwise and rte_errno is set.
1703 flow_verbs_translate(struct rte_eth_dev *dev,
1704 struct mlx5_flow *dev_flow,
1705 const struct rte_flow_attr *attr,
1706 const struct rte_flow_item items[],
1707 const struct rte_flow_action actions[],
1708 struct rte_flow_error *error)
1710 uint64_t item_flags = 0;
1711 uint64_t action_flags = 0;
1712 uint64_t priority = attr->priority;
1713 uint32_t subpriority = 0;
1714 struct mlx5_priv *priv = dev->data->dev_private;
1715 struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
1717 [!!priv->flow_nested_idx];
1719 if (priority == MLX5_FLOW_PRIO_RSVD)
1720 priority = priv->config.flow_prio - 1;
1721 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1724 switch (actions->type) {
1725 case RTE_FLOW_ACTION_TYPE_VOID:
1727 case RTE_FLOW_ACTION_TYPE_FLAG:
1728 flow_verbs_translate_action_flag(dev_flow, actions);
1729 action_flags |= MLX5_FLOW_ACTION_FLAG;
1730 dev_flow->handle->mark = 1;
1732 case RTE_FLOW_ACTION_TYPE_MARK:
1733 flow_verbs_translate_action_mark(dev_flow, actions);
1734 action_flags |= MLX5_FLOW_ACTION_MARK;
1735 dev_flow->handle->mark = 1;
1737 case RTE_FLOW_ACTION_TYPE_DROP:
1738 flow_verbs_translate_action_drop(dev_flow, actions);
1739 action_flags |= MLX5_FLOW_ACTION_DROP;
1740 dev_flow->handle->fate_action = MLX5_FLOW_FATE_DROP;
1742 case RTE_FLOW_ACTION_TYPE_QUEUE:
1743 flow_verbs_translate_action_queue(rss_desc, actions);
1744 action_flags |= MLX5_FLOW_ACTION_QUEUE;
1745 dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1747 case RTE_FLOW_ACTION_TYPE_RSS:
1748 flow_verbs_translate_action_rss(rss_desc, actions);
1749 action_flags |= MLX5_FLOW_ACTION_RSS;
1750 dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE;
1752 case RTE_FLOW_ACTION_TYPE_COUNT:
1753 ret = flow_verbs_translate_action_count(dev_flow,
1758 action_flags |= MLX5_FLOW_ACTION_COUNT;
1761 return rte_flow_error_set(error, ENOTSUP,
1762 RTE_FLOW_ERROR_TYPE_ACTION,
1764 "action not supported");
1767 dev_flow->act_flags = action_flags;
1768 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1769 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1771 switch (items->type) {
1772 case RTE_FLOW_ITEM_TYPE_VOID:
1774 case RTE_FLOW_ITEM_TYPE_ETH:
1775 flow_verbs_translate_item_eth(dev_flow, items,
1777 subpriority = MLX5_PRIORITY_MAP_L2;
1778 item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1779 MLX5_FLOW_LAYER_OUTER_L2;
1781 case RTE_FLOW_ITEM_TYPE_VLAN:
1782 flow_verbs_translate_item_vlan(dev_flow, items,
1784 subpriority = MLX5_PRIORITY_MAP_L2;
1785 item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
1786 MLX5_FLOW_LAYER_INNER_VLAN) :
1787 (MLX5_FLOW_LAYER_OUTER_L2 |
1788 MLX5_FLOW_LAYER_OUTER_VLAN);
1790 case RTE_FLOW_ITEM_TYPE_IPV4:
1791 flow_verbs_translate_item_ipv4(dev_flow, items,
1793 subpriority = MLX5_PRIORITY_MAP_L3;
1794 dev_flow->hash_fields |=
1795 mlx5_flow_hashfields_adjust
1797 MLX5_IPV4_LAYER_TYPES,
1798 MLX5_IPV4_IBV_RX_HASH);
1799 item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1800 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1802 case RTE_FLOW_ITEM_TYPE_IPV6:
1803 flow_verbs_translate_item_ipv6(dev_flow, items,
1805 subpriority = MLX5_PRIORITY_MAP_L3;
1806 dev_flow->hash_fields |=
1807 mlx5_flow_hashfields_adjust
1809 MLX5_IPV6_LAYER_TYPES,
1810 MLX5_IPV6_IBV_RX_HASH);
1811 item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1812 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1814 case RTE_FLOW_ITEM_TYPE_TCP:
1815 flow_verbs_translate_item_tcp(dev_flow, items,
1817 subpriority = MLX5_PRIORITY_MAP_L4;
1818 dev_flow->hash_fields |=
1819 mlx5_flow_hashfields_adjust
1820 (rss_desc, tunnel, ETH_RSS_TCP,
1821 (IBV_RX_HASH_SRC_PORT_TCP |
1822 IBV_RX_HASH_DST_PORT_TCP));
1823 item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1824 MLX5_FLOW_LAYER_OUTER_L4_TCP;
1826 case RTE_FLOW_ITEM_TYPE_UDP:
1827 flow_verbs_translate_item_udp(dev_flow, items,
1829 subpriority = MLX5_PRIORITY_MAP_L4;
1830 dev_flow->hash_fields |=
1831 mlx5_flow_hashfields_adjust
1832 (rss_desc, tunnel, ETH_RSS_UDP,
1833 (IBV_RX_HASH_SRC_PORT_UDP |
1834 IBV_RX_HASH_DST_PORT_UDP));
1835 item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1836 MLX5_FLOW_LAYER_OUTER_L4_UDP;
1838 case RTE_FLOW_ITEM_TYPE_VXLAN:
1839 flow_verbs_translate_item_vxlan(dev_flow, items,
1841 subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1842 item_flags |= MLX5_FLOW_LAYER_VXLAN;
1844 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1845 flow_verbs_translate_item_vxlan_gpe(dev_flow, items,
1847 subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1848 item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
1850 case RTE_FLOW_ITEM_TYPE_GRE:
1851 flow_verbs_translate_item_gre(dev_flow, items,
1853 subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1854 item_flags |= MLX5_FLOW_LAYER_GRE;
1856 case RTE_FLOW_ITEM_TYPE_MPLS:
1857 flow_verbs_translate_item_mpls(dev_flow, items,
1859 subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc);
1860 item_flags |= MLX5_FLOW_LAYER_MPLS;
1863 return rte_flow_error_set(error, ENOTSUP,
1864 RTE_FLOW_ERROR_TYPE_ITEM,
1866 "item not supported");
1869 dev_flow->handle->layers = item_flags;
1870 /* Other members of attr will be ignored. */
1871 dev_flow->verbs.attr.priority =
1872 mlx5_flow_adjust_priority(dev, priority, subpriority);
1873 dev_flow->verbs.attr.port = (uint8_t)priv->dev_port;
1878 * Remove the flow from the NIC but keeps it in memory.
1881 * Pointer to the Ethernet device structure.
1882 * @param[in, out] flow
1883 * Pointer to flow structure.
1886 flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1888 struct mlx5_priv *priv = dev->data->dev_private;
1889 struct mlx5_flow_handle *handle;
1890 uint32_t handle_idx;
1894 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1895 handle_idx, handle, next) {
1896 if (handle->drv_flow) {
1897 claim_zero(mlx5_glue->destroy_flow(handle->drv_flow));
1898 handle->drv_flow = NULL;
1900 /* hrxq is union, don't touch it only the flag is set. */
1901 if (handle->rix_hrxq) {
1902 if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1903 mlx5_drop_action_destroy(dev);
1904 handle->rix_hrxq = 0;
1905 } else if (handle->fate_action ==
1906 MLX5_FLOW_FATE_QUEUE) {
1907 mlx5_hrxq_release(dev, handle->rix_hrxq);
1908 handle->rix_hrxq = 0;
1911 if (handle->vf_vlan.tag && handle->vf_vlan.created)
1912 mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
1917 * Remove the flow from the NIC and the memory.
1920 * Pointer to the Ethernet device structure.
1921 * @param[in, out] flow
1922 * Pointer to flow structure.
1925 flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1927 struct mlx5_priv *priv = dev->data->dev_private;
1928 struct mlx5_flow_handle *handle;
1932 flow_verbs_remove(dev, flow);
1933 while (flow->dev_handles) {
1934 uint32_t tmp_idx = flow->dev_handles;
1936 handle = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1940 flow->dev_handles = handle->next.next;
1941 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
1944 if (flow->counter) {
1945 flow_verbs_counter_release(dev, flow->counter);
1951 * Apply the flow to the NIC.
1954 * Pointer to the Ethernet device structure.
1955 * @param[in, out] flow
1956 * Pointer to flow structure.
1958 * Pointer to error structure.
1961 * 0 on success, a negative errno value otherwise and rte_errno is set.
1964 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1965 struct rte_flow_error *error)
1967 struct mlx5_priv *priv = dev->data->dev_private;
1968 struct mlx5_flow_handle *handle;
1969 struct mlx5_flow *dev_flow;
1970 struct mlx5_hrxq *hrxq;
1971 uint32_t dev_handles;
1975 for (idx = priv->flow_idx - 1; idx >= priv->flow_nested_idx; idx--) {
1976 dev_flow = &((struct mlx5_flow *)priv->inter_flows)[idx];
1977 handle = dev_flow->handle;
1978 if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
1979 hrxq = mlx5_drop_action_create(dev);
1983 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1984 "cannot get drop hash queue");
1989 struct mlx5_flow_rss_desc *rss_desc =
1990 &((struct mlx5_flow_rss_desc *)priv->rss_desc)
1991 [!!priv->flow_nested_idx];
1993 MLX5_ASSERT(rss_desc->queue_num);
1994 hrxq_idx = mlx5_hrxq_get(dev, rss_desc->key,
1995 MLX5_RSS_HASH_KEY_LEN,
1996 dev_flow->hash_fields,
1998 rss_desc->queue_num);
2000 hrxq_idx = mlx5_hrxq_new
2001 (dev, rss_desc->key,
2002 MLX5_RSS_HASH_KEY_LEN,
2003 dev_flow->hash_fields,
2005 rss_desc->queue_num,
2007 MLX5_FLOW_LAYER_TUNNEL));
2008 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
2013 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2014 "cannot get hash queue");
2017 handle->rix_hrxq = hrxq_idx;
2020 handle->drv_flow = mlx5_glue->create_flow
2021 (hrxq->qp, &dev_flow->verbs.attr);
2022 if (!handle->drv_flow) {
2023 rte_flow_error_set(error, errno,
2024 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2026 "hardware refuses to create flow");
2029 if (priv->vmwa_context &&
2030 handle->vf_vlan.tag && !handle->vf_vlan.created) {
2032 * The rule contains the VLAN pattern.
2033 * For VF we are going to create VLAN
2034 * interface to make hypervisor set correct
2035 * e-Switch vport context.
2037 mlx5_vlan_vmwa_acquire(dev, &handle->vf_vlan);
2042 err = rte_errno; /* Save rte_errno before cleanup. */
2043 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
2044 dev_handles, handle, next) {
2045 /* hrxq is union, don't touch it only the flag is set. */
2046 if (handle->rix_hrxq) {
2047 if (handle->fate_action == MLX5_FLOW_FATE_DROP) {
2048 mlx5_drop_action_destroy(dev);
2049 handle->rix_hrxq = 0;
2050 } else if (handle->fate_action ==
2051 MLX5_FLOW_FATE_QUEUE) {
2052 mlx5_hrxq_release(dev, handle->rix_hrxq);
2053 handle->rix_hrxq = 0;
2056 if (handle->vf_vlan.tag && handle->vf_vlan.created)
2057 mlx5_vlan_vmwa_release(dev, &handle->vf_vlan);
2059 rte_errno = err; /* Restore rte_errno. */
2066 * @see rte_flow_query()
2070 flow_verbs_query(struct rte_eth_dev *dev,
2071 struct rte_flow *flow,
2072 const struct rte_flow_action *actions,
2074 struct rte_flow_error *error)
2078 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2079 switch (actions->type) {
2080 case RTE_FLOW_ACTION_TYPE_VOID:
2082 case RTE_FLOW_ACTION_TYPE_COUNT:
2083 ret = flow_verbs_counter_query(dev, flow, data, error);
2086 return rte_flow_error_set(error, ENOTSUP,
2087 RTE_FLOW_ERROR_TYPE_ACTION,
2089 "action not supported");
2095 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
2096 .validate = flow_verbs_validate,
2097 .prepare = flow_verbs_prepare,
2098 .translate = flow_verbs_translate,
2099 .apply = flow_verbs_apply,
2100 .remove = flow_verbs_remove,
2101 .destroy = flow_verbs_destroy,
2102 .query = flow_verbs_query,