net/mlx5: support TOS and TTL fields on E-Switch
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_tcf.c
index e12ca91..80781bd 100644 (file)
@@ -125,6 +125,14 @@ struct tc_pedit_sel {
 #define TCA_TUNNEL_KEY_NO_CSUM 10
 #endif
 
+#ifndef HAVE_TCA_TUNNEL_KEY_ENC_TOS
+#define TCA_TUNNEL_KEY_ENC_TOS 12
+#endif
+
+#ifndef        HAVE_TCA_TUNNEL_KEY_ENC_TTL
+#define TCA_TUNNEL_KEY_ENC_TTL 13
+#endif
+
 #else /* HAVE_TC_ACT_TUNNEL_KEY */
 
 #define TCA_ACT_TUNNEL_KEY 17
@@ -138,6 +146,8 @@ struct tc_pedit_sel {
 #define TCA_TUNNEL_KEY_ENC_KEY_ID 7
 #define TCA_TUNNEL_KEY_ENC_DST_PORT 9
 #define TCA_TUNNEL_KEY_NO_CSUM 10
+#define TCA_TUNNEL_KEY_ENC_TOS 12
+#define TCA_TUNNEL_KEY_ENC_TTL 13
 
 struct tc_tunnel_key {
        tc_gen;
@@ -292,6 +302,31 @@ struct tc_tunnel_key {
 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
 #endif
+#ifndef        HAVE_TCA_FLOWER_KEY_IP_TOS
+#define        TCA_FLOWER_KEY_IP_TOS 73
+#endif
+#ifndef        HAVE_TCA_FLOWER_KEY_IP_TOS_MASK
+#define TCA_FLOWER_KEY_IP_TOS_MASK 74
+#endif
+#ifndef        HAVE_TCA_FLOWER_KEY_IP_TTL
+#define TCA_FLOWER_KEY_IP_TTL 75
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IP_TTL_MASK
+#define TCA_FLOWER_KEY_IP_TTL_MASK 76
+#endif
+#ifndef        HAVE_TCA_FLOWER_KEY_ENC_IP_TOS
+#define TCA_FLOWER_KEY_ENC_IP_TOS 80
+#endif
+#ifndef        HAVE_TCA_FLOWER_KEY_ENC_IP_TOS_MASK
+#define TCA_FLOWER_KEY_ENC_IP_TOS_MASK 81
+#endif
+#ifndef        HAVE_TCA_FLOWER_KEY_ENC_IP_TTL
+#define        TCA_FLOWER_KEY_ENC_IP_TTL 82
+#endif
+#ifndef        HAVE_TCA_FLOWER_KEY_ENC_IP_TTL_MASK
+#define TCA_FLOWER_KEY_ENC_IP_TTL_MASK 83
+#endif
+
 #ifndef HAVE_TC_ACT_GOTO_CHAIN
 #define TC_ACT_GOTO_CHAIN 0x20000000
 #endif
@@ -316,9 +351,8 @@ struct tc_tunnel_key {
 #define TCA_ACT_MAX_PRIO 32
 #endif
 
-/** UDP port range of VXLAN devices created by driver. */
-#define MLX5_VXLAN_PORT_MIN 30000
-#define MLX5_VXLAN_PORT_MAX 60000
+/** Parameters of VXLAN devices created by driver. */
+#define MLX5_VXLAN_DEFAULT_VNI 1
 #define MLX5_VXLAN_DEVICE_PFX "vmlx_"
 
 /** Tunnel action type, used for @p type in header structure. */
@@ -337,6 +371,8 @@ enum flow_tcf_tunact_type {
 #define FLOW_TCF_ENCAP_UDP_SRC (1u << 6)
 #define FLOW_TCF_ENCAP_UDP_DST (1u << 7)
 #define FLOW_TCF_ENCAP_VXLAN_VNI (1u << 8)
+#define FLOW_TCF_ENCAP_IP_TTL (1u << 9)
+#define FLOW_TCF_ENCAP_IP_TOS (1u << 10)
 
 /**
  * Structure for holding netlink context.
@@ -406,11 +442,8 @@ struct tcf_irule {
 /** VXLAN virtual netdev. */
 struct tcf_vtep {
        LIST_ENTRY(tcf_vtep) next;
-       LIST_HEAD(, tcf_neigh_rule) neigh;
-       LIST_HEAD(, tcf_local_rule) local;
        uint32_t refcnt;
        unsigned int ifindex; /**< Own interface index. */
-       unsigned int ifouter; /**< Index of device attached to. */
        uint16_t port;
        uint8_t created;
 };
@@ -432,6 +465,8 @@ struct flow_tcf_vxlan_encap {
        struct flow_tcf_tunnel_hdr hdr;
        struct tcf_irule *iface;
        uint32_t mask;
+       uint8_t ip_tos;
+       uint8_t ip_ttl_hop;
        struct {
                struct ether_addr dst;
                struct ether_addr src;
@@ -1278,6 +1313,20 @@ flow_tcf_validate_vxlan_encap_ipv4(const struct rte_flow_item *item,
                                          " must be specified for"
                                          " vxlan encapsulation");
        }
+       if (mask->hdr.type_of_service &&
+           mask->hdr.type_of_service != 0xff)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                         "no support for partial mask on"
+                                         " \"ipv4.hdr.type_of_service\" field"
+                                         " for vxlan encapsulation");
+       if (mask->hdr.time_to_live &&
+           mask->hdr.time_to_live != 0xff)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                         "no support for partial mask on"
+                                         " \"ipv4.hdr.time_to_live\" field"
+                                         " for vxlan encapsulation");
        return 0;
 }
 
@@ -1299,6 +1348,7 @@ flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item,
 {
        const struct rte_flow_item_ipv6 *spec = item->spec;
        const struct rte_flow_item_ipv6 *mask = item->mask;
+       uint8_t msk6;
 
        if (!spec) {
                /*
@@ -1364,6 +1414,20 @@ flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item,
                                          " must be specified for"
                                          " vxlan encapsulation");
        }
+       msk6 = (rte_be_to_cpu_32(mask->hdr.vtc_flow) >>
+               IPV6_HDR_TC_SHIFT) & 0xff;
+       if (msk6 && msk6 != 0xff)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                         "no support for partial mask on"
+                                         " \"ipv6.hdr.vtc_flow.tos\" field"
+                                         " for vxlan encapsulation");
+       if (mask->hdr.hop_limits && mask->hdr.hop_limits != 0xff)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                         "no support for partial mask on"
+                                         " \"ipv6.hdr.hop_limits\" field"
+                                         " for vxlan encapsulation");
        return 0;
 }
 
@@ -2451,16 +2515,31 @@ flow_tcf_get_items_size(const struct rte_flow_attr *attr,
                                SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
                                SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
                        break;
-               case RTE_FLOW_ITEM_TYPE_IPV4:
+               case RTE_FLOW_ITEM_TYPE_IPV4: {
+                       const struct rte_flow_item_ipv4 *ipv4 = items->mask;
+
                        size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
                                SZ_NLATTR_TYPE_OF(uint32_t) * 4;
                                /* dst/src IP addr and mask. */
+                       if (ipv4 && ipv4->hdr.time_to_live)
+                               size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
+                       if (ipv4 && ipv4->hdr.type_of_service)
+                               size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
                        break;
-               case RTE_FLOW_ITEM_TYPE_IPV6:
+               }
+               case RTE_FLOW_ITEM_TYPE_IPV6: {
+                       const struct rte_flow_item_ipv6 *ipv6 = items->mask;
+
                        size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
                                SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 4;
                                /* dst/src IP addr and mask. */
+                       if (ipv6 && ipv6->hdr.hop_limits)
+                               size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
+                       if (ipv6 && (rte_be_to_cpu_32(ipv6->hdr.vtc_flow) &
+                                    (0xfful << IPV6_HDR_TC_SHIFT)))
+                               size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
                        break;
+               }
                case RTE_FLOW_ITEM_TYPE_UDP:
                        size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
                                SZ_NLATTR_TYPE_OF(uint16_t) * 4;
@@ -2528,12 +2607,27 @@ flow_tcf_vxlan_encap_size(const struct rte_flow_action *action)
                case RTE_FLOW_ITEM_TYPE_ETH:
                        /* This item does not require message buffer. */
                        break;
-               case RTE_FLOW_ITEM_TYPE_IPV4:
+               case RTE_FLOW_ITEM_TYPE_IPV4: {
+                       const struct rte_flow_item_ipv4 *ipv4 = items->mask;
+
                        size += SZ_NLATTR_DATA_OF(IPV4_ADDR_LEN) * 2;
+                       if (ipv4 && ipv4->hdr.time_to_live)
+                               size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
+                       if (ipv4 && ipv4->hdr.type_of_service)
+                               size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
                        break;
-               case RTE_FLOW_ITEM_TYPE_IPV6:
+               }
+               case RTE_FLOW_ITEM_TYPE_IPV6: {
+                       const struct rte_flow_item_ipv6 *ipv6 = items->mask;
+
                        size += SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 2;
+                       if (ipv6 && ipv6->hdr.hop_limits)
+                               size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
+                       if (ipv6 && (rte_be_to_cpu_32(ipv6->hdr.vtc_flow) &
+                                    (0xfful << IPV6_HDR_TC_SHIFT)))
+                               size += SZ_NLATTR_TYPE_OF(uint8_t) * 2;
                        break;
+               }
                case RTE_FLOW_ITEM_TYPE_UDP: {
                        const struct rte_flow_item_udp *udp = items->mask;
 
@@ -2911,11 +3005,14 @@ flow_tcf_parse_vxlan_encap_eth(const struct rte_flow_item_eth *spec,
  *
  * @param[in] spec
  *   RTE_FLOW_ITEM_TYPE_IPV4 entry specification.
+ * @param[in] mask
+ *  RTE_FLOW_ITEM_TYPE_IPV4 entry mask.
  * @param[out] encap
  *   Structure to fill the gathered IPV4 address data.
  */
 static void
 flow_tcf_parse_vxlan_encap_ipv4(const struct rte_flow_item_ipv4 *spec,
+                               const struct rte_flow_item_ipv4 *mask,
                                struct flow_tcf_vxlan_encap *encap)
 {
        /* Item must be validated before. No redundant checks. */
@@ -2924,6 +3021,14 @@ flow_tcf_parse_vxlan_encap_ipv4(const struct rte_flow_item_ipv4 *spec,
        encap->ipv4.src = spec->hdr.src_addr;
        encap->mask |= FLOW_TCF_ENCAP_IPV4_SRC |
                       FLOW_TCF_ENCAP_IPV4_DST;
+       if (mask && mask->hdr.type_of_service) {
+               encap->mask |= FLOW_TCF_ENCAP_IP_TOS;
+               encap->ip_tos = spec->hdr.type_of_service;
+       }
+       if (mask && mask->hdr.time_to_live) {
+               encap->mask |= FLOW_TCF_ENCAP_IP_TTL;
+               encap->ip_ttl_hop = spec->hdr.time_to_live;
+       }
 }
 
 /**
@@ -2934,11 +3039,14 @@ flow_tcf_parse_vxlan_encap_ipv4(const struct rte_flow_item_ipv4 *spec,
  *
  * @param[in] spec
  *   RTE_FLOW_ITEM_TYPE_IPV6 entry specification.
+ * @param[in] mask
+ *  RTE_FLOW_ITEM_TYPE_IPV6 entry mask.
  * @param[out] encap
  *   Structure to fill the gathered IPV6 address data.
  */
 static void
 flow_tcf_parse_vxlan_encap_ipv6(const struct rte_flow_item_ipv6 *spec,
+                               const struct rte_flow_item_ipv6 *mask,
                                struct flow_tcf_vxlan_encap *encap)
 {
        /* Item must be validated before. No redundant checks. */
@@ -2947,6 +3055,19 @@ flow_tcf_parse_vxlan_encap_ipv6(const struct rte_flow_item_ipv6 *spec,
        memcpy(encap->ipv6.src, spec->hdr.src_addr, IPV6_ADDR_LEN);
        encap->mask |= FLOW_TCF_ENCAP_IPV6_SRC |
                       FLOW_TCF_ENCAP_IPV6_DST;
+       if (mask) {
+               if ((rte_be_to_cpu_32(mask->hdr.vtc_flow) >>
+                   IPV6_HDR_TC_SHIFT) & 0xff) {
+                       encap->mask |= FLOW_TCF_ENCAP_IP_TOS;
+                       encap->ip_tos = (rte_be_to_cpu_32
+                                               (spec->hdr.vtc_flow) >>
+                                                IPV6_HDR_TC_SHIFT) & 0xff;
+               }
+               if (mask->hdr.hop_limits) {
+                       encap->mask |= FLOW_TCF_ENCAP_IP_TTL;
+                       encap->ip_ttl_hop = spec->hdr.hop_limits;
+               }
+       }
 }
 
 /**
@@ -3041,11 +3162,15 @@ flow_tcf_vxlan_encap_parse(const struct rte_flow_action *action,
                        break;
                case RTE_FLOW_ITEM_TYPE_IPV4:
                        spec.ipv4 = items->spec;
-                       flow_tcf_parse_vxlan_encap_ipv4(spec.ipv4, encap);
+                       mask.ipv4 = items->mask;
+                       flow_tcf_parse_vxlan_encap_ipv4(spec.ipv4, mask.ipv4,
+                                                       encap);
                        break;
                case RTE_FLOW_ITEM_TYPE_IPV6:
                        spec.ipv6 = items->spec;
-                       flow_tcf_parse_vxlan_encap_ipv6(spec.ipv6, encap);
+                       mask.ipv6 = items->mask;
+                       flow_tcf_parse_vxlan_encap_ipv6(spec.ipv6, mask.ipv6,
+                                                       encap);
                        break;
                case RTE_FLOW_ITEM_TYPE_UDP:
                        mask.udp = items->mask;
@@ -3358,10 +3483,35 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                                         TCA_FLOWER_KEY_IPV4_DST_MASK,
                                         mask.ipv4->hdr.dst_addr);
                        }
+                       if (mask.ipv4->hdr.time_to_live) {
+                               mnl_attr_put_u8
+                                       (nlh, tunnel_outer ?
+                                        TCA_FLOWER_KEY_ENC_IP_TTL :
+                                        TCA_FLOWER_KEY_IP_TTL,
+                                        spec.ipv4->hdr.time_to_live);
+                               mnl_attr_put_u8
+                                       (nlh, tunnel_outer ?
+                                        TCA_FLOWER_KEY_ENC_IP_TTL_MASK :
+                                        TCA_FLOWER_KEY_IP_TTL_MASK,
+                                        mask.ipv4->hdr.time_to_live);
+                       }
+                       if (mask.ipv4->hdr.type_of_service) {
+                               mnl_attr_put_u8
+                                       (nlh, tunnel_outer ?
+                                        TCA_FLOWER_KEY_ENC_IP_TOS :
+                                        TCA_FLOWER_KEY_IP_TOS,
+                                        spec.ipv4->hdr.type_of_service);
+                               mnl_attr_put_u8
+                                       (nlh, tunnel_outer ?
+                                        TCA_FLOWER_KEY_ENC_IP_TOS_MASK :
+                                        TCA_FLOWER_KEY_IP_TOS_MASK,
+                                        mask.ipv4->hdr.type_of_service);
+                       }
                        assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
                        break;
                case RTE_FLOW_ITEM_TYPE_IPV6: {
                        bool ipv6_src, ipv6_dst;
+                       uint8_t msk6, tos6;
 
                        item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
                                      MLX5_FLOW_LAYER_INNER_L3_IPV6 :
@@ -3447,6 +3597,33 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                                             IPV6_ADDR_LEN,
                                             mask.ipv6->hdr.dst_addr);
                        }
+                       if (mask.ipv6->hdr.hop_limits) {
+                               mnl_attr_put_u8
+                                       (nlh, tunnel_outer ?
+                                        TCA_FLOWER_KEY_ENC_IP_TTL :
+                                        TCA_FLOWER_KEY_IP_TTL,
+                                        spec.ipv6->hdr.hop_limits);
+                               mnl_attr_put_u8
+                                       (nlh, tunnel_outer ?
+                                        TCA_FLOWER_KEY_ENC_IP_TTL_MASK :
+                                        TCA_FLOWER_KEY_IP_TTL_MASK,
+                                        mask.ipv6->hdr.hop_limits);
+                       }
+                       msk6 = (rte_be_to_cpu_32(mask.ipv6->hdr.vtc_flow) >>
+                               IPV6_HDR_TC_SHIFT) & 0xff;
+                       if (msk6) {
+                               tos6 = (rte_be_to_cpu_32
+                                       (spec.ipv6->hdr.vtc_flow) >>
+                                               IPV6_HDR_TC_SHIFT) & 0xff;
+                               mnl_attr_put_u8
+                                       (nlh, tunnel_outer ?
+                                        TCA_FLOWER_KEY_ENC_IP_TOS :
+                                        TCA_FLOWER_KEY_IP_TOS, tos6);
+                               mnl_attr_put_u8
+                                       (nlh, tunnel_outer ?
+                                        TCA_FLOWER_KEY_ENC_IP_TOS_MASK :
+                                        TCA_FLOWER_KEY_IP_TOS_MASK, msk6);
+                       }
                        assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
                        break;
                }
@@ -3799,6 +3976,14 @@ override_na_vlan_priority:
                                         TCA_TUNNEL_KEY_ENC_IPV6_DST,
                                         sizeof(encap.vxlan->ipv6.dst),
                                         &encap.vxlan->ipv6.dst);
+                       if (encap.vxlan->mask & FLOW_TCF_ENCAP_IP_TTL)
+                               mnl_attr_put_u8(nlh,
+                                        TCA_TUNNEL_KEY_ENC_TTL,
+                                        encap.vxlan->ip_ttl_hop);
+                       if (encap.vxlan->mask & FLOW_TCF_ENCAP_IP_TOS)
+                               mnl_attr_put_u8(nlh,
+                                        TCA_TUNNEL_KEY_ENC_TOS,
+                                        encap.vxlan->ip_tos);
                        if (encap.vxlan->mask & FLOW_TCF_ENCAP_VXLAN_VNI)
                                mnl_attr_put_u32(nlh,
                                         TCA_TUNNEL_KEY_ENC_KEY_ID,
@@ -4912,11 +5097,6 @@ flow_tcf_vtep_delete(struct mlx5_flow_tcf_context *tcf,
  *
  * @param[in] tcf
  *   Context object initialized by mlx5_flow_tcf_context_create().
- * @param[in] ifouter
- *   Outer interface to attach new-created VXLAN device
- *   If zero the VXLAN device will not be attached to any device.
- *   These VTEPs are used for decapsulation and can be precreated
- *   and shared between processes.
  * @param[in] port
  *   UDP port of created VTEP device.
  * @param[out] error
@@ -4926,10 +5106,8 @@ flow_tcf_vtep_delete(struct mlx5_flow_tcf_context *tcf,
  * Pointer to created device structure on success,
  * NULL otherwise and rte_errno is set.
  */
-#ifdef HAVE_IFLA_VXLAN_COLLECT_METADATA
 static struct tcf_vtep*
 flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf,
-                    unsigned int ifouter,
                     uint16_t port, struct rte_flow_error *error)
 {
        struct tcf_vtep *vtep;
@@ -4959,8 +5137,6 @@ flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf,
        }
        *vtep = (struct tcf_vtep){
                        .port = port,
-                       .local = LIST_HEAD_INITIALIZER(),
-                       .neigh = LIST_HEAD_INITIALIZER(),
        };
        memset(buf, 0, sizeof(buf));
        nlh = mnl_nlmsg_put_header(buf);
@@ -4978,13 +5154,25 @@ flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf,
        assert(na_info);
        mnl_attr_put_strz(nlh, IFLA_INFO_KIND, "vxlan");
        na_vxlan = mnl_attr_nest_start(nlh, IFLA_INFO_DATA);
-       if (ifouter)
-               mnl_attr_put_u32(nlh, IFLA_VXLAN_LINK, ifouter);
        assert(na_vxlan);
+#ifdef HAVE_IFLA_VXLAN_COLLECT_METADATA
+       /*
+        * RH 7.2 does not support metadata for tunnel device.
+        * It does not matter because we are going to use the
+        * hardware offload by mlx5 driver.
+        */
        mnl_attr_put_u8(nlh, IFLA_VXLAN_COLLECT_METADATA, 1);
+#endif
        mnl_attr_put_u8(nlh, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, 1);
        mnl_attr_put_u8(nlh, IFLA_VXLAN_LEARNING, 0);
        mnl_attr_put_u16(nlh, IFLA_VXLAN_PORT, vxlan_port);
+#ifndef HAVE_IFLA_VXLAN_COLLECT_METADATA
+       /*
+        *  We must specify VNI explicitly if metadata not supported.
+        *  Note, VNI is transferred with native endianness format.
+        */
+       mnl_attr_put_u16(nlh, IFLA_VXLAN_ID, MLX5_VXLAN_DEFAULT_VNI);
+#endif
        mnl_attr_nest_end(nlh, na_vxlan);
        mnl_attr_nest_end(nlh, na_info);
        assert(sizeof(buf) >= nlh->nlmsg_len);
@@ -4993,7 +5181,7 @@ flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf,
                DRV_LOG(WARNING,
                        "netlink: VTEP %s create failure (%d)",
                        name, rte_errno);
-               if (rte_errno != EEXIST || ifouter)
+               if (rte_errno != EEXIST)
                        /*
                         * Some unhandled error occurred or device is
                         * for encapsulation and cannot be shared.
@@ -5019,7 +5207,6 @@ flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf,
                goto error;
        }
        vtep->ifindex = ret;
-       vtep->ifouter = ifouter;
        memset(buf, 0, sizeof(buf));
        nlh = mnl_nlmsg_put_header(buf);
        nlh->nlmsg_type = RTM_NEWLINK;
@@ -5054,20 +5241,6 @@ error:
        rte_free(vtep);
        return NULL;
 }
-#else
-static struct tcf_vtep*
-flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf __rte_unused,
-                    unsigned int ifouter __rte_unused,
-                    uint16_t port __rte_unused,
-                    struct rte_flow_error *error)
-{
-       rte_flow_error_set(error, ENOTSUP,
-                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-                          "netlink: failed to create VTEP, "
-                          "vxlan metadata are not supported by kernel");
-       return NULL;
-}
-#endif /* HAVE_IFLA_VXLAN_COLLECT_METADATA */
 
 /**
  * Acquire target interface index for VXLAN tunneling decapsulation.
@@ -5096,13 +5269,6 @@ flow_tcf_decap_vtep_acquire(struct mlx5_flow_tcf_context *tcf,
                if (vtep->port == port)
                        break;
        }
-       if (vtep && vtep->ifouter) {
-               rte_flow_error_set(error, -errno,
-                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-                                  "Failed to create decap VTEP with specified"
-                                  " UDP port, atatched device exists");
-               return NULL;
-       }
        if (vtep) {
                /* Device exists, just increment the reference counter. */
                vtep->refcnt++;
@@ -5110,7 +5276,7 @@ flow_tcf_decap_vtep_acquire(struct mlx5_flow_tcf_context *tcf,
                return vtep;
        }
        /* No decapsulation device exists, try to create the new one. */
-       vtep = flow_tcf_vtep_create(tcf, 0, port, error);
+       vtep = flow_tcf_vtep_create(tcf, port, error);
        if (vtep)
                LIST_INSERT_HEAD(&vtep_list_vxlan, vtep, next);
        return vtep;
@@ -5134,60 +5300,31 @@ flow_tcf_decap_vtep_acquire(struct mlx5_flow_tcf_context *tcf,
 static struct tcf_vtep*
 flow_tcf_encap_vtep_acquire(struct mlx5_flow_tcf_context *tcf,
                            unsigned int ifouter,
-                           struct mlx5_flow *dev_flow __rte_unused,
+                           struct mlx5_flow *dev_flow,
                            struct rte_flow_error *error)
 {
-       static uint16_t encap_port = MLX5_VXLAN_PORT_MIN - 1;
+       static uint16_t port;
        struct tcf_vtep *vtep;
        struct tcf_irule *iface;
        int ret;
 
        assert(ifouter);
-       /* Look whether the attached VTEP for encap is created. */
+       /* Look whether the VTEP for specified port is created. */
+       port = rte_be_to_cpu_16(dev_flow->tcf.vxlan_encap->udp.dst);
        LIST_FOREACH(vtep, &vtep_list_vxlan, next) {
-               if (vtep->ifouter == ifouter)
+               if (vtep->port == port)
                        break;
        }
        if (vtep) {
                /* VTEP already exists, just increment the reference. */
                vtep->refcnt++;
        } else {
-               uint16_t pcnt;
-
-               /* Not found, we should create the new attached VTEP. */
-               flow_tcf_encap_iface_cleanup(tcf, ifouter);
-               flow_tcf_encap_local_cleanup(tcf, ifouter);
-               flow_tcf_encap_neigh_cleanup(tcf, ifouter);
-               for (pcnt = 0; pcnt <= (MLX5_VXLAN_PORT_MAX
-                                    - MLX5_VXLAN_PORT_MIN); pcnt++) {
-                       encap_port++;
-                       /* Wraparound the UDP port index. */
-                       if (encap_port < MLX5_VXLAN_PORT_MIN ||
-                           encap_port > MLX5_VXLAN_PORT_MAX)
-                               encap_port = MLX5_VXLAN_PORT_MIN;
-                       /* Check whether UDP port is in already in use. */
-                       LIST_FOREACH(vtep, &vtep_list_vxlan, next) {
-                               if (vtep->port == encap_port)
-                                       break;
-                       }
-                       if (vtep) {
-                               /* Port is in use, try the next one. */
-                               vtep = NULL;
-                               continue;
-                       }
-                       vtep = flow_tcf_vtep_create(tcf, ifouter,
-                                                   encap_port, error);
-                       if (vtep) {
-                               LIST_INSERT_HEAD(&vtep_list_vxlan, vtep, next);
-                               break;
-                       }
-                       if (rte_errno != EEXIST)
-                               break;
-               }
+               /* Not found, we should create the new VTEP. */
+               vtep = flow_tcf_vtep_create(tcf, port, error);
                if (!vtep)
                        return NULL;
+               LIST_INSERT_HEAD(&vtep_list_vxlan, vtep, next);
        }
-       assert(vtep->ifouter == ifouter);
        assert(vtep->ifindex);
        iface = flow_tcf_encap_irule_acquire(tcf, ifouter, error);
        if (!iface) {
@@ -5222,7 +5359,7 @@ flow_tcf_encap_vtep_acquire(struct mlx5_flow_tcf_context *tcf,
  * @param[in] tcf
  *   Context object initialized by mlx5_flow_tcf_context_create().
  * @param[in] ifouter
- *   Network interface index to attach VXLAN encap device to.
+ *   Network interface index to create VXLAN encap rules on.
  * @param[in] dev_flow
  *   Flow tcf object with tunnel structure pointer set.
  * @param[out] error