net/mlx5: add VXLAN to flow translate routine
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_tcf.c
index 194910b..ba17806 100644 (file)
@@ -6,6 +6,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <libmnl/libmnl.h>
+#include <linux/gen_stats.h>
 #include <linux/if_ether.h>
 #include <linux/netlink.h>
 #include <linux/pkt_cls.h>
@@ -26,6 +27,7 @@
 #include <rte_ether.h>
 #include <rte_flow.h>
 #include <rte_malloc.h>
+#include <rte_common.h>
 
 #include "mlx5.h"
 #include "mlx5_flow.h"
@@ -53,6 +55,97 @@ struct tc_vlan {
 
 #endif /* HAVE_TC_ACT_VLAN */
 
+#ifdef HAVE_TC_ACT_PEDIT
+
+#include <linux/tc_act/tc_pedit.h>
+
+#else /* HAVE_TC_ACT_VLAN */
+
+enum {
+       TCA_PEDIT_UNSPEC,
+       TCA_PEDIT_TM,
+       TCA_PEDIT_PARMS,
+       TCA_PEDIT_PAD,
+       TCA_PEDIT_PARMS_EX,
+       TCA_PEDIT_KEYS_EX,
+       TCA_PEDIT_KEY_EX,
+       __TCA_PEDIT_MAX
+};
+
+enum {
+       TCA_PEDIT_KEY_EX_HTYPE = 1,
+       TCA_PEDIT_KEY_EX_CMD = 2,
+       __TCA_PEDIT_KEY_EX_MAX
+};
+
+enum pedit_header_type {
+       TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
+       TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
+       TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
+       TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
+       TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
+       TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
+       __PEDIT_HDR_TYPE_MAX,
+};
+
+enum pedit_cmd {
+       TCA_PEDIT_KEY_EX_CMD_SET = 0,
+       TCA_PEDIT_KEY_EX_CMD_ADD = 1,
+       __PEDIT_CMD_MAX,
+};
+
+struct tc_pedit_key {
+       __u32 mask; /* AND */
+       __u32 val; /*XOR */
+       __u32 off; /*offset */
+       __u32 at;
+       __u32 offmask;
+       __u32 shift;
+};
+
+__extension__
+struct tc_pedit_sel {
+       tc_gen;
+       unsigned char nkeys;
+       unsigned char flags;
+       struct tc_pedit_key keys[0];
+};
+
+#endif /* HAVE_TC_ACT_VLAN */
+
+#ifdef HAVE_TC_ACT_TUNNEL_KEY
+
+#include <linux/tc_act/tc_tunnel_key.h>
+
+#ifndef HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT
+#define TCA_TUNNEL_KEY_ENC_DST_PORT 9
+#endif
+
+#ifndef HAVE_TCA_TUNNEL_KEY_NO_CSUM
+#define TCA_TUNNEL_KEY_NO_CSUM 10
+#endif
+
+#else /* HAVE_TC_ACT_TUNNEL_KEY */
+
+#define TCA_ACT_TUNNEL_KEY 17
+#define TCA_TUNNEL_KEY_ACT_SET 1
+#define TCA_TUNNEL_KEY_ACT_RELEASE 2
+#define TCA_TUNNEL_KEY_PARMS 2
+#define TCA_TUNNEL_KEY_ENC_IPV4_SRC 3
+#define TCA_TUNNEL_KEY_ENC_IPV4_DST 4
+#define TCA_TUNNEL_KEY_ENC_IPV6_SRC 5
+#define TCA_TUNNEL_KEY_ENC_IPV6_DST 6
+#define TCA_TUNNEL_KEY_ENC_KEY_ID 7
+#define TCA_TUNNEL_KEY_ENC_DST_PORT 9
+#define TCA_TUNNEL_KEY_NO_CSUM 10
+
+struct tc_tunnel_key {
+       tc_gen;
+       int t_action;
+};
+
+#endif /* HAVE_TC_ACT_TUNNEL_KEY */
+
 /* Normally found in linux/netlink.h. */
 #ifndef NETLINK_CAP_ACK
 #define NETLINK_CAP_ACK 10
@@ -67,6 +160,9 @@ struct tc_vlan {
 #ifndef TCA_CLS_FLAGS_SKIP_SW
 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
 #endif
+#ifndef HAVE_TCA_CHAIN
+#define TCA_CHAIN 11
+#endif
 #ifndef HAVE_TCA_FLOWER_ACT
 #define TCA_FLOWER_ACT 3
 #endif
@@ -148,11 +244,212 @@ struct tc_vlan {
 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
 #endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_KEY_ID
+#define TCA_FLOWER_KEY_ENC_KEY_ID 26
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC
+#define TCA_FLOWER_KEY_ENC_IPV4_SRC 27
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK
+#define TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK 28
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST
+#define TCA_FLOWER_KEY_ENC_IPV4_DST 29
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK
+#define TCA_FLOWER_KEY_ENC_IPV4_DST_MASK 30
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC
+#define TCA_FLOWER_KEY_ENC_IPV6_SRC 31
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK
+#define TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK 32
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST
+#define TCA_FLOWER_KEY_ENC_IPV6_DST 33
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK
+#define TCA_FLOWER_KEY_ENC_IPV6_DST_MASK 34
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT
+#define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT 43
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK
+#define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK 44
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT
+#define TCA_FLOWER_KEY_ENC_UDP_DST_PORT 45
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK
+#define TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK 46
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
+#define TCA_FLOWER_KEY_TCP_FLAGS 71
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
+#define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
+#endif
+#ifndef HAVE_TC_ACT_GOTO_CHAIN
+#define TC_ACT_GOTO_CHAIN 0x20000000
+#endif
 
 #ifndef IPV6_ADDR_LEN
 #define IPV6_ADDR_LEN 16
 #endif
 
+#ifndef IPV4_ADDR_LEN
+#define IPV4_ADDR_LEN 4
+#endif
+
+#ifndef TP_PORT_LEN
+#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
+#endif
+
+#ifndef TTL_LEN
+#define TTL_LEN 1
+#endif
+
+#ifndef TCA_ACT_MAX_PRIO
+#define TCA_ACT_MAX_PRIO 32
+#endif
+
+/** UDP port range of VXLAN devices created by driver. */
+#define MLX5_VXLAN_PORT_MIN 30000
+#define MLX5_VXLAN_PORT_MAX 60000
+#define MLX5_VXLAN_DEVICE_PFX "vmlx_"
+
+/** Tunnel action type, used for @p type in header structure. */
+enum flow_tcf_tunact_type {
+       FLOW_TCF_TUNACT_VXLAN_DECAP,
+       FLOW_TCF_TUNACT_VXLAN_ENCAP,
+};
+
+/** Flags used for @p mask in tunnel action encap descriptors. */
+#define FLOW_TCF_ENCAP_ETH_SRC (1u << 0)
+#define FLOW_TCF_ENCAP_ETH_DST (1u << 1)
+#define FLOW_TCF_ENCAP_IPV4_SRC (1u << 2)
+#define FLOW_TCF_ENCAP_IPV4_DST (1u << 3)
+#define FLOW_TCF_ENCAP_IPV6_SRC (1u << 4)
+#define FLOW_TCF_ENCAP_IPV6_DST (1u << 5)
+#define FLOW_TCF_ENCAP_UDP_SRC (1u << 6)
+#define FLOW_TCF_ENCAP_UDP_DST (1u << 7)
+#define FLOW_TCF_ENCAP_VXLAN_VNI (1u << 8)
+
+/**
+ * Structure for holding netlink context.
+ * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
+ * Using this (8KB) buffer size ensures that netlink messages will never be
+ * truncated.
+ */
+struct mlx5_flow_tcf_context {
+       struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
+       uint32_t seq; /* Message sequence number. */
+       uint32_t buf_size; /* Message buffer size. */
+       uint8_t *buf; /* Message buffer. */
+};
+
+/**
+ * Neigh rule structure. The neigh rule is applied via Netlink to
+ * outer tunnel iface in order to provide destination MAC address
+ * for the VXLAN encapsultion. The neigh rule is implicitly related
+ * to the Flow itself and can be shared by multiple Flows.
+ */
+struct tcf_neigh_rule {
+       LIST_ENTRY(tcf_neigh_rule) next;
+       uint32_t refcnt;
+       struct ether_addr eth;
+       uint16_t mask;
+       union {
+               struct {
+                       rte_be32_t dst;
+               } ipv4;
+               struct {
+                       uint8_t dst[IPV6_ADDR_LEN];
+               } ipv6;
+       };
+};
+
+/**
+ * Local rule structure. The local rule is applied via Netlink to
+ * outer tunnel iface in order to provide local and peer IP addresses
+ * of the VXLAN tunnel for encapsulation. The local rule is implicitly
+ * related to the Flow itself and can be shared by multiple Flows.
+ */
+struct tcf_local_rule {
+       LIST_ENTRY(tcf_local_rule) next;
+       uint32_t refcnt;
+       uint16_t mask;
+       union {
+               struct {
+                       rte_be32_t dst;
+                       rte_be32_t src;
+               } ipv4;
+               struct {
+                       uint8_t dst[IPV6_ADDR_LEN];
+                       uint8_t src[IPV6_ADDR_LEN];
+               } ipv6;
+       };
+};
+
+/** VXLAN virtual netdev. */
+struct tcf_vtep {
+       LIST_ENTRY(tcf_vtep) next;
+       LIST_HEAD(, tcf_neigh_rule) neigh;
+       LIST_HEAD(, tcf_local_rule) local;
+       uint32_t refcnt;
+       unsigned int ifindex; /**< Own interface index. */
+       unsigned int ifouter; /**< Index of device attached to. */
+       uint16_t port;
+       uint8_t created;
+};
+
+/** Tunnel descriptor header, common for all tunnel types. */
+struct flow_tcf_tunnel_hdr {
+       uint32_t type; /**< Tunnel action type. */
+       struct tcf_vtep *vtep; /**< Virtual tunnel endpoint device. */
+       unsigned int ifindex_org; /**< Original dst/src interface */
+       unsigned int *ifindex_ptr; /**< Interface ptr in message. */
+};
+
+struct flow_tcf_vxlan_decap {
+       struct flow_tcf_tunnel_hdr hdr;
+       uint16_t udp_port;
+};
+
+struct flow_tcf_vxlan_encap {
+       struct flow_tcf_tunnel_hdr hdr;
+       uint32_t mask;
+       struct {
+               struct ether_addr dst;
+               struct ether_addr src;
+       } eth;
+       union {
+               struct {
+                       rte_be32_t dst;
+                       rte_be32_t src;
+               } ipv4;
+               struct {
+                       uint8_t dst[IPV6_ADDR_LEN];
+                       uint8_t src[IPV6_ADDR_LEN];
+               } ipv6;
+       };
+struct {
+               rte_be16_t src;
+               rte_be16_t dst;
+       } udp;
+       struct {
+               uint8_t vni[3];
+       } vxlan;
+};
+
+/** Structure used when extracting the values of a flow counters
+ * from a netlink message.
+ */
+struct flow_tcf_stats_basic {
+       bool valid;
+       struct gnet_stats_basic counters;
+};
+
 /** Empty masks for known item types. */
 static const union {
        struct rte_flow_item_port_id port_id;
@@ -162,6 +459,7 @@ static const union {
        struct rte_flow_item_ipv6 ipv6;
        struct rte_flow_item_tcp tcp;
        struct rte_flow_item_udp udp;
+       struct rte_flow_item_vxlan vxlan;
 } flow_tcf_mask_empty;
 
 /** Supported masks for known item types. */
@@ -173,6 +471,7 @@ static const struct {
        struct rte_flow_item_ipv6 ipv6;
        struct rte_flow_item_tcp tcp;
        struct rte_flow_item_udp udp;
+       struct rte_flow_item_vxlan vxlan;
 } flow_tcf_mask_supported = {
        .port_id = {
                .id = 0xffffffff,
@@ -204,11 +503,15 @@ static const struct {
        .tcp.hdr = {
                .src_port = RTE_BE16(0xffff),
                .dst_port = RTE_BE16(0xffff),
+               .tcp_flags = 0xff,
        },
        .udp.hdr = {
                .src_port = RTE_BE16(0xffff),
                .dst_port = RTE_BE16(0xffff),
        },
+       .vxlan = {
+              .vni = "\xff\xff\xff",
+       },
 };
 
 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
@@ -225,7 +528,418 @@ struct flow_tcf_ptoi {
        unsigned int ifindex; /**< Network interface index. */
 };
 
-#define MLX5_TCF_FATE_ACTIONS (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID)
+/* Due to a limitation on driver/FW. */
+#define MLX5_TCF_GROUP_ID_MAX 3
+#define MLX5_TCF_GROUP_PRIORITY_MAX 14
+
+#define MLX5_TCF_FATE_ACTIONS \
+       (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
+        MLX5_FLOW_ACTION_JUMP)
+
+#define MLX5_TCF_VLAN_ACTIONS \
+       (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
+        MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
+
+#define MLX5_TCF_VXLAN_ACTIONS \
+       (MLX5_FLOW_ACTION_VXLAN_ENCAP | MLX5_FLOW_ACTION_VXLAN_DECAP)
+
+#define MLX5_TCF_PEDIT_ACTIONS \
+       (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
+        MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
+        MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
+        MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
+        MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
+
+#define MLX5_TCF_CONFIG_ACTIONS \
+       (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
+        MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
+        MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
+        (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
+
+#define MAX_PEDIT_KEYS 128
+#define SZ_PEDIT_KEY_VAL 4
+
+#define NUM_OF_PEDIT_KEYS(sz) \
+       (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
+
+struct pedit_key_ex {
+       enum pedit_header_type htype;
+       enum pedit_cmd cmd;
+};
+
+struct pedit_parser {
+       struct tc_pedit_sel sel;
+       struct tc_pedit_key keys[MAX_PEDIT_KEYS];
+       struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
+};
+
+/**
+ * Create space for using the implicitly created TC flow counter.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ *
+ * @return
+ *   A pointer to the counter data structure, NULL otherwise and
+ *   rte_errno is set.
+ */
+static struct mlx5_flow_counter *
+flow_tcf_counter_new(void)
+{
+       struct mlx5_flow_counter *cnt;
+
+       /*
+        * eswitch counter cannot be shared and its id is unknown.
+        * currently returning all with id 0.
+        * in the future maybe better to switch to unique numbers.
+        */
+       struct mlx5_flow_counter tmpl = {
+               .ref_cnt = 1,
+       };
+       cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
+       if (!cnt) {
+               rte_errno = ENOMEM;
+               return NULL;
+       }
+       *cnt = tmpl;
+       /* Implicit counter, do not add to list. */
+       return cnt;
+}
+
+/**
+ * Set pedit key of MAC address
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
+                          struct pedit_parser *p_parser)
+{
+       int idx = p_parser->sel.nkeys;
+       uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
+                                       offsetof(struct ether_hdr, s_addr) :
+                                       offsetof(struct ether_hdr, d_addr);
+       const struct rte_flow_action_set_mac *conf =
+               (const struct rte_flow_action_set_mac *)actions->conf;
+
+       p_parser->keys[idx].off = off;
+       p_parser->keys[idx].mask = ~UINT32_MAX;
+       p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
+       p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+       memcpy(&p_parser->keys[idx].val,
+               conf->mac_addr, SZ_PEDIT_KEY_VAL);
+       idx++;
+       p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
+       p_parser->keys[idx].mask = 0xFFFF0000;
+       p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
+       p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+       memcpy(&p_parser->keys[idx].val,
+               conf->mac_addr + SZ_PEDIT_KEY_VAL,
+               ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
+       p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Set pedit key of decrease/set ttl
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ * @param[in] item_flags
+ *   flags of all items presented
+ */
+static void
+flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
+                               struct pedit_parser *p_parser,
+                               uint64_t item_flags)
+{
+       int idx = p_parser->sel.nkeys;
+
+       p_parser->keys[idx].mask = 0xFFFFFF00;
+       if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
+               p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
+               p_parser->keys[idx].off =
+                       offsetof(struct ipv4_hdr, time_to_live);
+       }
+       if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
+               p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
+               p_parser->keys[idx].off =
+                       offsetof(struct ipv6_hdr, hop_limits);
+       }
+       if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
+               p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
+               p_parser->keys[idx].val = 0x000000FF;
+       } else {
+               p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+               p_parser->keys[idx].val =
+                       (__u32)((const struct rte_flow_action_set_ttl *)
+                        actions->conf)->ttl_value;
+       }
+       p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Set pedit key of transport (TCP/UDP) port value
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ * @param[in] item_flags
+ *   flags of all items presented
+ */
+static void
+flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
+                               struct pedit_parser *p_parser,
+                               uint64_t item_flags)
+{
+       int idx = p_parser->sel.nkeys;
+
+       if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
+               p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
+       if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
+               p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
+       p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+       /* offset of src/dst port is same for TCP and UDP */
+       p_parser->keys[idx].off =
+               actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
+               offsetof(struct tcp_hdr, src_port) :
+               offsetof(struct tcp_hdr, dst_port);
+       p_parser->keys[idx].mask = 0xFFFF0000;
+       p_parser->keys[idx].val =
+               (__u32)((const struct rte_flow_action_set_tp *)
+                               actions->conf)->port;
+       p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Set pedit key of ipv6 address
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
+                                struct pedit_parser *p_parser)
+{
+       int idx = p_parser->sel.nkeys;
+       int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
+       int off_base =
+               actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
+               offsetof(struct ipv6_hdr, src_addr) :
+               offsetof(struct ipv6_hdr, dst_addr);
+       const struct rte_flow_action_set_ipv6 *conf =
+               (const struct rte_flow_action_set_ipv6 *)actions->conf;
+
+       for (int i = 0; i < keys; i++, idx++) {
+               p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
+               p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+               p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
+               p_parser->keys[idx].mask = ~UINT32_MAX;
+               memcpy(&p_parser->keys[idx].val,
+                       conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
+                       SZ_PEDIT_KEY_VAL);
+       }
+       p_parser->sel.nkeys += keys;
+}
+
+/**
+ * Set pedit key of ipv4 address
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
+                                struct pedit_parser *p_parser)
+{
+       int idx = p_parser->sel.nkeys;
+
+       p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
+       p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+       p_parser->keys[idx].off =
+               actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
+               offsetof(struct ipv4_hdr, src_addr) :
+               offsetof(struct ipv4_hdr, dst_addr);
+       p_parser->keys[idx].mask = ~UINT32_MAX;
+       p_parser->keys[idx].val =
+               ((const struct rte_flow_action_set_ipv4 *)
+                actions->conf)->ipv4_addr;
+       p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Create the pedit's na attribute in netlink message
+ * on pre-allocate message buffer
+ *
+ * @param[in,out] nl
+ *   pointer to pre-allocated netlink message buffer
+ * @param[in,out] actions
+ *   pointer to pointer of actions specification.
+ * @param[in,out] action_flags
+ *   pointer to actions flags
+ * @param[in] item_flags
+ *   flags of all item presented
+ */
+static void
+flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
+                             const struct rte_flow_action **actions,
+                             uint64_t item_flags)
+{
+       struct pedit_parser p_parser;
+       struct nlattr *na_act_options;
+       struct nlattr *na_pedit_keys;
+
+       memset(&p_parser, 0, sizeof(p_parser));
+       mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
+       na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
+       /* all modify header actions should be in one tc-pedit action */
+       for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+               switch ((*actions)->type) {
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+                       flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+                       flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+                       flow_tcf_pedit_key_set_tp_port(*actions,
+                                                       &p_parser, item_flags);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+                       flow_tcf_pedit_key_set_dec_ttl(*actions,
+                                                       &p_parser, item_flags);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+                       flow_tcf_pedit_key_set_mac(*actions, &p_parser);
+                       break;
+               default:
+                       goto pedit_mnl_msg_done;
+               }
+       }
+pedit_mnl_msg_done:
+       p_parser.sel.action = TC_ACT_PIPE;
+       mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
+                    sizeof(p_parser.sel) +
+                    p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
+                    &p_parser);
+       na_pedit_keys =
+               mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
+       for (int i = 0; i < p_parser.sel.nkeys; i++) {
+               struct nlattr *na_pedit_key =
+                       mnl_attr_nest_start(nl,
+                                           TCA_PEDIT_KEY_EX | NLA_F_NESTED);
+               mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
+                                p_parser.keys_ex[i].htype);
+               mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
+                                p_parser.keys_ex[i].cmd);
+               mnl_attr_nest_end(nl, na_pedit_key);
+       }
+       mnl_attr_nest_end(nl, na_pedit_keys);
+       mnl_attr_nest_end(nl, na_act_options);
+       (*actions)--;
+}
+
+/**
+ * Calculate max memory size of one TC-pedit actions.
+ * One TC-pedit action can contain set of keys each defining
+ * a rewrite element (rte_flow action)
+ *
+ * @param[in,out] actions
+ *   actions specification.
+ * @param[in,out] action_flags
+ *   actions flags
+ * @param[in,out] size
+ *   accumulated size
+ * @return
+ *   Max memory size of one TC-pedit action
+ */
+static int
+flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
+                               uint64_t *action_flags)
+{
+       int pedit_size = 0;
+       int keys = 0;
+       uint64_t flags = 0;
+
+       pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
+                     SZ_NLATTR_STRZ_OF("pedit") +
+                     SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
+       for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+               switch ((*actions)->type) {
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+                       keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+                       keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+                       keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+                       keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+                       /* TCP is as same as UDP */
+                       keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+                       /* TCP is as same as UDP */
+                       keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_TP_DST;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+                       keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_TTL;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+                       keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
+                       flags |= MLX5_FLOW_ACTION_DEC_TTL;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+                       keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+                       keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
+                       break;
+               default:
+                       goto get_pedit_action_size_done;
+               }
+       }
+get_pedit_action_size_done:
+       /* TCA_PEDIT_PARAMS_EX */
+       pedit_size +=
+               SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
+                                 keys * sizeof(struct tc_pedit_key));
+       pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
+       pedit_size += keys *
+                     /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
+                     (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
+                      SZ_NLATTR_DATA_OF(2));
+       (*action_flags) |= flags;
+       (*actions)--;
+       return pedit_size;
+}
 
 /**
  * Retrieve mask for pattern item.
@@ -367,14 +1081,25 @@ flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
                             struct rte_flow_error *error)
 {
        /*
-        * Supported attributes: no groups, some priorities and ingress only.
-        * Don't care about transfer as it is the caller's problem.
+        * Supported attributes: groups, some priorities and ingress only.
+        * group is supported only if kernel supports chain. Don't care about
+        * transfer as it is the caller's problem.
         */
-       if (attr->group)
+       if (attr->group > MLX5_TCF_GROUP_ID_MAX)
                return rte_flow_error_set(error, ENOTSUP,
                                          RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
-                                         "groups are not supported");
-       if (attr->priority > 0xfffe)
+                                         "group ID larger than "
+                                         RTE_STR(MLX5_TCF_GROUP_ID_MAX)
+                                         " isn't supported");
+       else if (attr->group > 0 &&
+                attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+                                         attr,
+                                         "lowest priority level is "
+                                         RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
+                                         " when group is configured");
+       else if (attr->priority > 0xfffe)
                return rte_flow_error_set(error, ENOTSUP,
                                          RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
                                          attr,
@@ -391,62 +1116,889 @@ flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
 }
 
 /**
- * Validate flow for E-Switch.
+ * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_ETH item for E-Switch.
+ * The routine checks the L2 fields to be used in encapsulation header.
  *
- * @param[in] priv
- *   Pointer to the priv structure.
- * @param[in] attr
- *   Pointer to the flow attributes.
- * @param[in] items
- *   Pointer to the list of items.
- * @param[in] actions
- *   Pointer to the list of actions.
+ * @param[in] item
+ *   Pointer to the item structure.
  * @param[out] error
  *   Pointer to the error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
- */
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ **/
 static int
-flow_tcf_validate(struct rte_eth_dev *dev,
-                 const struct rte_flow_attr *attr,
-                 const struct rte_flow_item items[],
-                 const struct rte_flow_action actions[],
-                 struct rte_flow_error *error)
+flow_tcf_validate_vxlan_encap_eth(const struct rte_flow_item *item,
+                                 struct rte_flow_error *error)
 {
-       union {
-               const struct rte_flow_item_port_id *port_id;
-               const struct rte_flow_item_eth *eth;
-               const struct rte_flow_item_vlan *vlan;
-               const struct rte_flow_item_ipv4 *ipv4;
-               const struct rte_flow_item_ipv6 *ipv6;
-               const struct rte_flow_item_tcp *tcp;
-               const struct rte_flow_item_udp *udp;
-       } spec, mask;
-       union {
-               const struct rte_flow_action_port_id *port_id;
-               const struct rte_flow_action_of_push_vlan *of_push_vlan;
-               const struct rte_flow_action_of_set_vlan_vid *
-                       of_set_vlan_vid;
-               const struct rte_flow_action_of_set_vlan_pcp *
-                       of_set_vlan_pcp;
-       } conf;
-       uint32_t item_flags = 0;
-       uint32_t action_flags = 0;
-       uint8_t next_protocol = -1;
-       unsigned int tcm_ifindex = 0;
-       struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
-       bool in_port_id_set;
-       int ret;
+       const struct rte_flow_item_eth *spec = item->spec;
+       const struct rte_flow_item_eth *mask = item->mask;
 
-       claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
-                                               PTOI_TABLE_SZ_MAX(dev)));
-       ret = flow_tcf_validate_attributes(attr, error);
-       if (ret < 0)
-               return ret;
+       if (!spec) {
+               /*
+                * Specification for L2 addresses can be empty
+                * because these ones are optional and not
+                * required directly by tc rule. Kernel tries
+                * to resolve these ones on its own
+                */
+               return 0;
+       }
+       if (!mask) {
+               /* If mask is not specified use the default one. */
+               mask = &rte_flow_item_eth_mask;
+       }
+       if (memcmp(&mask->dst,
+                  &flow_tcf_mask_empty.eth.dst,
+                  sizeof(flow_tcf_mask_empty.eth.dst))) {
+               if (memcmp(&mask->dst,
+                          &rte_flow_item_eth_mask.dst,
+                          sizeof(rte_flow_item_eth_mask.dst)))
+                       return rte_flow_error_set
+                               (error, ENOTSUP,
+                                RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                "no support for partial mask on"
+                                " \"eth.dst\" field");
+       }
+       if (memcmp(&mask->src,
+                  &flow_tcf_mask_empty.eth.src,
+                  sizeof(flow_tcf_mask_empty.eth.src))) {
+               if (memcmp(&mask->src,
+                          &rte_flow_item_eth_mask.src,
+                          sizeof(rte_flow_item_eth_mask.src)))
+                       return rte_flow_error_set
+                               (error, ENOTSUP,
+                                RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                "no support for partial mask on"
+                                " \"eth.src\" field");
+       }
+       if (mask->type != RTE_BE16(0x0000)) {
+               if (mask->type != RTE_BE16(0xffff))
+                       return rte_flow_error_set
+                               (error, ENOTSUP,
+                                RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                "no support for partial mask on"
+                                " \"eth.type\" field");
+               DRV_LOG(WARNING,
+                       "outer ethernet type field"
+                       " cannot be forced for vxlan"
+                       " encapsulation, parameter ignored");
+       }
+       return 0;
+}
+
+/**
+ * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV4 item for E-Switch.
+ * The routine checks the IPv4 fields to be used in encapsulation header.
+ *
+ * @param[in] item
+ *   Pointer to the item structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ **/
+static int
+flow_tcf_validate_vxlan_encap_ipv4(const struct rte_flow_item *item,
+                                  struct rte_flow_error *error)
+{
+       const struct rte_flow_item_ipv4 *spec = item->spec;
+       const struct rte_flow_item_ipv4 *mask = item->mask;
+
+       if (!spec) {
+               /*
+                * Specification for IP addresses cannot be empty
+                * because it is required by tunnel_key parameter.
+                */
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "NULL outer ipv4 address"
+                                         " specification for vxlan"
+                                         " encapsulation");
+       }
+       if (!mask)
+               mask = &rte_flow_item_ipv4_mask;
+       if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
+               if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
+                       return rte_flow_error_set
+                               (error, ENOTSUP,
+                                RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                "no support for partial mask on"
+                                " \"ipv4.hdr.dst_addr\" field"
+                                " for vxlan encapsulation");
+               /* More IPv4 address validations can be put here. */
+       } else {
+               /*
+                * Kernel uses the destination IP address to determine
+                * the routing path and obtain the MAC destination
+                * address, so IP destination address must be
+                * specified in the tc rule.
+                */
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "outer ipv4 destination address"
+                                         " must be specified for"
+                                         " vxlan encapsulation");
+       }
+       if (mask->hdr.src_addr != RTE_BE32(0x00000000)) {
+               if (mask->hdr.src_addr != RTE_BE32(0xffffffff))
+                       return rte_flow_error_set
+                               (error, ENOTSUP,
+                                RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                "no support for partial mask on"
+                                " \"ipv4.hdr.src_addr\" field"
+                                " for vxlan encapsulation");
+               /* More IPv4 address validations can be put here. */
+       } else {
+               /*
+                * Kernel uses the source IP address to select the
+                * interface for egress encapsulated traffic, so
+                * it must be specified in the tc rule.
+                */
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "outer ipv4 source address"
+                                         " must be specified for"
+                                         " vxlan encapsulation");
+       }
+       return 0;
+}
+
+/**
+ * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_IPV6 item for E-Switch.
+ * The routine checks the IPv6 fields to be used in encapsulation header.
+ *
+ * @param[in] item
+ *   Pointer to the item structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ **/
+static int
+flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item,
+                                  struct rte_flow_error *error)
+{
+       const struct rte_flow_item_ipv6 *spec = item->spec;
+       const struct rte_flow_item_ipv6 *mask = item->mask;
+
+       if (!spec) {
+               /*
+                * Specification for IP addresses cannot be empty
+                * because it is required by tunnel_key parameter.
+                */
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "NULL outer ipv6 address"
+                                         " specification for"
+                                         " vxlan encapsulation");
+       }
+       if (!mask)
+               mask = &rte_flow_item_ipv6_mask;
+       if (memcmp(&mask->hdr.dst_addr,
+                  &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
+                  IPV6_ADDR_LEN)) {
+               if (memcmp(&mask->hdr.dst_addr,
+                          &rte_flow_item_ipv6_mask.hdr.dst_addr,
+                          IPV6_ADDR_LEN))
+                       return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                        "no support for partial mask on"
+                                        " \"ipv6.hdr.dst_addr\" field"
+                                        " for vxlan encapsulation");
+               /* More IPv6 address validations can be put here. */
+       } else {
+               /*
+                * Kernel uses the destination IP address to determine
+                * the routing path and obtain the MAC destination
+                * address (heigh or gate), so IP destination address
+                * must be specified within the tc rule.
+                */
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "outer ipv6 destination address"
+                                         " must be specified for"
+                                         " vxlan encapsulation");
+       }
+       if (memcmp(&mask->hdr.src_addr,
+                  &flow_tcf_mask_empty.ipv6.hdr.src_addr,
+                  IPV6_ADDR_LEN)) {
+               if (memcmp(&mask->hdr.src_addr,
+                          &rte_flow_item_ipv6_mask.hdr.src_addr,
+                          IPV6_ADDR_LEN))
+                       return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                        "no support for partial mask on"
+                                        " \"ipv6.hdr.src_addr\" field"
+                                        " for vxlan encapsulation");
+               /* More L3 address validation can be put here. */
+       } else {
+               /*
+                * Kernel uses the source IP address to select the
+                * interface for egress encapsulated traffic, so
+                * it must be specified in the tc rule.
+                */
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "outer L3 source address"
+                                         " must be specified for"
+                                         " vxlan encapsulation");
+       }
+       return 0;
+}
+
+/**
+ * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_UDP item for E-Switch.
+ * The routine checks the UDP fields to be used in encapsulation header.
+ *
+ * @param[in] item
+ *   Pointer to the item structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ **/
+static int
+flow_tcf_validate_vxlan_encap_udp(const struct rte_flow_item *item,
+                                 struct rte_flow_error *error)
+{
+       const struct rte_flow_item_udp *spec = item->spec;
+       const struct rte_flow_item_udp *mask = item->mask;
+
+       if (!spec) {
+               /*
+                * Specification for UDP ports cannot be empty
+                * because it is required by tunnel_key parameter.
+                */
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "NULL UDP port specification "
+                                         " for vxlan encapsulation");
+       }
+       if (!mask)
+               mask = &rte_flow_item_udp_mask;
+       if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
+               if (mask->hdr.dst_port != RTE_BE16(0xffff))
+                       return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                        "no support for partial mask on"
+                                        " \"udp.hdr.dst_port\" field"
+                                        " for vxlan encapsulation");
+               if (!spec->hdr.dst_port)
+                       return rte_flow_error_set
+                                       (error, EINVAL,
+                                        RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                        "outer UDP remote port cannot be"
+                                        " 0 for vxlan encapsulation");
+       } else {
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "outer UDP remote port"
+                                         " must be specified for"
+                                         " vxlan encapsulation");
+       }
+       if (mask->hdr.src_port != RTE_BE16(0x0000)) {
+               if (mask->hdr.src_port != RTE_BE16(0xffff))
+                       return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                        "no support for partial mask on"
+                                        " \"udp.hdr.src_port\" field"
+                                        " for vxlan encapsulation");
+               DRV_LOG(WARNING,
+                       "outer UDP source port cannot be"
+                       " forced for vxlan encapsulation,"
+                       " parameter ignored");
+       }
+       return 0;
+}
+
+/**
+ * Validate VXLAN_ENCAP action RTE_FLOW_ITEM_TYPE_VXLAN item for E-Switch.
+ * The routine checks the VNIP fields to be used in encapsulation header.
+ *
+ * @param[in] item
+ *   Pointer to the item structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ **/
+static int
+flow_tcf_validate_vxlan_encap_vni(const struct rte_flow_item *item,
+                                 struct rte_flow_error *error)
+{
+       const struct rte_flow_item_vxlan *spec = item->spec;
+       const struct rte_flow_item_vxlan *mask = item->mask;
+
+       if (!spec) {
+               /* Outer VNI is required by tunnel_key parameter. */
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "NULL VNI specification"
+                                         " for vxlan encapsulation");
+       }
+       if (!mask)
+               mask = &rte_flow_item_vxlan_mask;
+       if (!mask->vni[0] && !mask->vni[1] && !mask->vni[2])
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "outer VNI must be specified "
+                                         "for vxlan encapsulation");
+       if (mask->vni[0] != 0xff ||
+           mask->vni[1] != 0xff ||
+           mask->vni[2] != 0xff)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                         "no support for partial mask on"
+                                         " \"vxlan.vni\" field");
+
+       if (!spec->vni[0] && !spec->vni[1] && !spec->vni[2])
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "vxlan vni cannot be 0");
+       return 0;
+}
+
+/**
+ * Validate VXLAN_ENCAP action item list for E-Switch.
+ * The routine checks items to be used in encapsulation header.
+ *
+ * @param[in] action
+ *   Pointer to the VXLAN_ENCAP action structure.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ **/
+static int
+flow_tcf_validate_vxlan_encap(const struct rte_flow_action *action,
+                             struct rte_flow_error *error)
+{
+       const struct rte_flow_item *items;
+       int ret;
+       uint32_t item_flags = 0;
+
+       if (!action->conf)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, action,
+                                         "Missing vxlan tunnel"
+                                         " action configuration");
+       items = ((const struct rte_flow_action_vxlan_encap *)
+                                       action->conf)->definition;
+       if (!items)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, action,
+                                         "Missing vxlan tunnel"
+                                         " encapsulation parameters");
+       for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+               switch (items->type) {
+               case RTE_FLOW_ITEM_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ITEM_TYPE_ETH:
+                       ret = mlx5_flow_validate_item_eth(items, item_flags,
+                                                         error);
+                       if (ret < 0)
+                               return ret;
+                       ret = flow_tcf_validate_vxlan_encap_eth(items, error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
+                       break;
+               break;
+               case RTE_FLOW_ITEM_TYPE_IPV4:
+                       ret = mlx5_flow_validate_item_ipv4(items, item_flags,
+                                                          error);
+                       if (ret < 0)
+                               return ret;
+                       ret = flow_tcf_validate_vxlan_encap_ipv4(items, error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV6:
+                       ret = mlx5_flow_validate_item_ipv6(items, item_flags,
+                                                          error);
+                       if (ret < 0)
+                               return ret;
+                       ret = flow_tcf_validate_vxlan_encap_ipv6(items, error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_UDP:
+                       ret = mlx5_flow_validate_item_udp(items, item_flags,
+                                                          0xFF, error);
+                       if (ret < 0)
+                               return ret;
+                       ret = flow_tcf_validate_vxlan_encap_udp(items, error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VXLAN:
+                       ret = mlx5_flow_validate_item_vxlan(items,
+                                                           item_flags, error);
+                       if (ret < 0)
+                               return ret;
+                       ret = flow_tcf_validate_vxlan_encap_vni(items, error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_VXLAN;
+                       break;
+               default:
+                       return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM, items,
+                                        "vxlan encap item not supported");
+               }
+       }
+       if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, action,
+                                         "no outer IP layer found"
+                                         " for vxlan encapsulation");
+       if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, action,
+                                         "no outer UDP layer found"
+                                         " for vxlan encapsulation");
+       if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, action,
+                                         "no VXLAN VNI found"
+                                         " for vxlan encapsulation");
+       return 0;
+}
+
+/**
+ * Validate RTE_FLOW_ITEM_TYPE_IPV4 item if VXLAN_DECAP action
+ * is present in actions list.
+ *
+ * @param[in] ipv4
+ *   Outer IPv4 address item (if any, NULL otherwise).
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ **/
+static int
+flow_tcf_validate_vxlan_decap_ipv4(const struct rte_flow_item *ipv4,
+                                  struct rte_flow_error *error)
+{
+       const struct rte_flow_item_ipv4 *spec = ipv4->spec;
+       const struct rte_flow_item_ipv4 *mask = ipv4->mask;
+
+       if (!spec) {
+               /*
+                * Specification for IP addresses cannot be empty
+                * because it is required as decap parameter.
+                */
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
+                                         "NULL outer ipv4 address"
+                                         " specification for vxlan"
+                                         " for vxlan decapsulation");
+       }
+       if (!mask)
+               mask = &rte_flow_item_ipv4_mask;
+       if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
+               if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
+                       return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                        "no support for partial mask on"
+                                        " \"ipv4.hdr.dst_addr\" field");
+               /* More IP address validations can be put here. */
+       } else {
+               /*
+                * Kernel uses the destination IP address
+                * to determine the ingress network interface
+                * for traffic being decapsulated.
+                */
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
+                                         "outer ipv4 destination address"
+                                         " must be specified for"
+                                         " vxlan decapsulation");
+       }
+       /* Source IP address is optional for decap. */
+       if (mask->hdr.src_addr != RTE_BE32(0x00000000) &&
+           mask->hdr.src_addr != RTE_BE32(0xffffffff))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                         "no support for partial mask on"
+                                         " \"ipv4.hdr.src_addr\" field");
+       return 0;
+}
+
+/**
+ * Validate RTE_FLOW_ITEM_TYPE_IPV6 item if VXLAN_DECAP action
+ * is present in actions list.
+ *
+ * @param[in] ipv6
+ *   Outer IPv6 address item (if any, NULL otherwise).
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ **/
+static int
+flow_tcf_validate_vxlan_decap_ipv6(const struct rte_flow_item *ipv6,
+                                  struct rte_flow_error *error)
+{
+       const struct rte_flow_item_ipv6 *spec = ipv6->spec;
+       const struct rte_flow_item_ipv6 *mask = ipv6->mask;
+
+       if (!spec) {
+               /*
+                * Specification for IP addresses cannot be empty
+                * because it is required as decap parameter.
+                */
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
+                                         "NULL outer ipv6 address"
+                                         " specification for vxlan"
+                                         " decapsulation");
+       }
+       if (!mask)
+               mask = &rte_flow_item_ipv6_mask;
+       if (memcmp(&mask->hdr.dst_addr,
+                  &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
+                  IPV6_ADDR_LEN)) {
+               if (memcmp(&mask->hdr.dst_addr,
+                       &rte_flow_item_ipv6_mask.hdr.dst_addr,
+                       IPV6_ADDR_LEN))
+                       return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                        "no support for partial mask on"
+                                        " \"ipv6.hdr.dst_addr\" field");
+               /* More IP address validations can be put here. */
+       } else {
+               /*
+                * Kernel uses the destination IP address
+                * to determine the ingress network interface
+                * for traffic being decapsulated.
+                */
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
+                                         "outer ipv6 destination address must be "
+                                         "specified for vxlan decapsulation");
+       }
+       /* Source IP address is optional for decap. */
+       if (memcmp(&mask->hdr.src_addr,
+                  &flow_tcf_mask_empty.ipv6.hdr.src_addr,
+                  IPV6_ADDR_LEN)) {
+               if (memcmp(&mask->hdr.src_addr,
+                          &rte_flow_item_ipv6_mask.hdr.src_addr,
+                          IPV6_ADDR_LEN))
+                       return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                        "no support for partial mask on"
+                                        " \"ipv6.hdr.src_addr\" field");
+       }
+       return 0;
+}
+
+/**
+ * Validate RTE_FLOW_ITEM_TYPE_UDP item if VXLAN_DECAP action
+ * is present in actions list.
+ *
+ * @param[in] udp
+ *   Outer UDP layer item (if any, NULL otherwise).
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ **/
+static int
+flow_tcf_validate_vxlan_decap_udp(const struct rte_flow_item *udp,
+                                 struct rte_flow_error *error)
+{
+       const struct rte_flow_item_udp *spec = udp->spec;
+       const struct rte_flow_item_udp *mask = udp->mask;
+
+       if (!spec)
+               /*
+                * Specification for UDP ports cannot be empty
+                * because it is required as decap parameter.
+                */
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, udp,
+                                         "NULL UDP port specification"
+                                         " for VXLAN decapsulation");
+       if (!mask)
+               mask = &rte_flow_item_udp_mask;
+       if (mask->hdr.dst_port != RTE_BE16(0x0000)) {
+               if (mask->hdr.dst_port != RTE_BE16(0xffff))
+                       return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                        "no support for partial mask on"
+                                        " \"udp.hdr.dst_port\" field");
+               if (!spec->hdr.dst_port)
+                       return rte_flow_error_set
+                                       (error, EINVAL,
+                                        RTE_FLOW_ERROR_TYPE_ITEM, udp,
+                                        "zero decap local UDP port");
+       } else {
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, udp,
+                                         "outer UDP destination port must be "
+                                         "specified for vxlan decapsulation");
+       }
+       if (mask->hdr.src_port != RTE_BE16(0x0000)) {
+               if (mask->hdr.src_port != RTE_BE16(0xffff))
+                       return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                        "no support for partial mask on"
+                                        " \"udp.hdr.src_port\" field");
+               DRV_LOG(WARNING,
+                       "outer UDP local port cannot be "
+                       "forced for VXLAN encapsulation, "
+                       "parameter ignored");
+       }
+       return 0;
+}
+
+/**
+ * Validate flow for E-Switch.
+ *
+ * @param[in] priv
+ *   Pointer to the priv structure.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_tcf_validate(struct rte_eth_dev *dev,
+                 const struct rte_flow_attr *attr,
+                 const struct rte_flow_item items[],
+                 const struct rte_flow_action actions[],
+                 struct rte_flow_error *error)
+{
+       union {
+               const struct rte_flow_item_port_id *port_id;
+               const struct rte_flow_item_eth *eth;
+               const struct rte_flow_item_vlan *vlan;
+               const struct rte_flow_item_ipv4 *ipv4;
+               const struct rte_flow_item_ipv6 *ipv6;
+               const struct rte_flow_item_tcp *tcp;
+               const struct rte_flow_item_udp *udp;
+               const struct rte_flow_item_vxlan *vxlan;
+       } spec, mask;
+       union {
+               const struct rte_flow_action_port_id *port_id;
+               const struct rte_flow_action_jump *jump;
+               const struct rte_flow_action_of_push_vlan *of_push_vlan;
+               const struct rte_flow_action_of_set_vlan_vid *
+                       of_set_vlan_vid;
+               const struct rte_flow_action_of_set_vlan_pcp *
+                       of_set_vlan_pcp;
+               const struct rte_flow_action_vxlan_encap *vxlan_encap;
+               const struct rte_flow_action_set_ipv4 *set_ipv4;
+               const struct rte_flow_action_set_ipv6 *set_ipv6;
+       } conf;
+       uint64_t item_flags = 0;
+       uint64_t action_flags = 0;
+       uint8_t next_protocol = -1;
+       unsigned int tcm_ifindex = 0;
+       uint8_t pedit_validated = 0;
+       struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
+       struct rte_eth_dev *port_id_dev = NULL;
+       bool in_port_id_set;
+       int ret;
+
+       claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
+                                               PTOI_TABLE_SZ_MAX(dev)));
+       ret = flow_tcf_validate_attributes(attr, error);
+       if (ret < 0)
+               return ret;
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+               unsigned int i;
+               uint64_t current_action_flag = 0;
+
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_PORT_ID:
+                       current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
+                       if (!actions->conf)
+                               break;
+                       conf.port_id = actions->conf;
+                       if (conf.port_id->original)
+                               i = 0;
+                       else
+                               for (i = 0; ptoi[i].ifindex; ++i)
+                                       if (ptoi[i].port_id == conf.port_id->id)
+                                               break;
+                       if (!ptoi[i].ifindex)
+                               return rte_flow_error_set
+                                       (error, ENODEV,
+                                        RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                        conf.port_id,
+                                        "missing data to convert port ID to"
+                                        " ifindex");
+                       port_id_dev = &rte_eth_devices[conf.port_id->id];
+                       break;
+               case RTE_FLOW_ACTION_TYPE_JUMP:
+                       current_action_flag = MLX5_FLOW_ACTION_JUMP;
+                       if (!actions->conf)
+                               break;
+                       conf.jump = actions->conf;
+                       if (attr->group >= conf.jump->group)
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ACTION,
+                                        actions,
+                                        "can jump only to a group forward");
+                       break;
+               case RTE_FLOW_ACTION_TYPE_DROP:
+                       current_action_flag = MLX5_FLOW_ACTION_DROP;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
+                       current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+                       current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
+                       if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ACTION, actions,
+                                        "vlan modify is not supported,"
+                                        " set action must follow push action");
+                       current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
+                       if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ACTION, actions,
+                                        "vlan modify is not supported,"
+                                        " set action must follow push action");
+                       current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
+                       current_action_flag = MLX5_FLOW_ACTION_VXLAN_DECAP;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
+                       ret = flow_tcf_validate_vxlan_encap(actions, error);
+                       if (ret < 0)
+                               return ret;
+                       current_action_flag = MLX5_FLOW_ACTION_VXLAN_ENCAP;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+                       current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "action not supported");
+               }
+               if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
+                       if (!actions->conf)
+                               return rte_flow_error_set
+                                       (error, EINVAL,
+                                        RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                        actions,
+                                        "action configuration not set");
+               }
+               if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
+                   pedit_validated)
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "set actions should be "
+                                                 "listed successively");
+               if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
+                   (action_flags & MLX5_TCF_PEDIT_ACTIONS))
+                       pedit_validated = 1;
+               if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
+                   (action_flags & MLX5_TCF_FATE_ACTIONS))
+                       return rte_flow_error_set(error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "can't have multiple fate"
+                                                 " actions");
+               if ((current_action_flag & MLX5_TCF_VXLAN_ACTIONS) &&
+                   (action_flags & MLX5_TCF_VXLAN_ACTIONS))
+                       return rte_flow_error_set(error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "can't have multiple vxlan"
+                                                 " actions");
+               if ((current_action_flag & MLX5_TCF_VXLAN_ACTIONS) &&
+                   (action_flags & MLX5_TCF_VLAN_ACTIONS))
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "can't have vxlan and vlan"
+                                                 " actions in the same rule");
+               action_flags |= current_action_flag;
+       }
        for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
                unsigned int i;
 
+               if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
+                   items->type != RTE_FLOW_ITEM_TYPE_ETH)
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ITEM,
+                                                 items,
+                                                 "only L2 inner item"
+                                                 " is supported");
                switch (items->type) {
                case RTE_FLOW_ITEM_TYPE_VOID:
                        break;
@@ -500,7 +2052,9 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                                                          error);
                        if (ret < 0)
                                return ret;
-                       item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
+                       item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
+                                       MLX5_FLOW_LAYER_INNER_L2 :
+                                       MLX5_FLOW_LAYER_OUTER_L2;
                        /* TODO:
                         * Redundant check due to different supported mask.
                         * Same for the rest of items.
@@ -578,6 +2132,12 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                                next_protocol =
                                        ((const struct rte_flow_item_ipv4 *)
                                         (items->spec))->hdr.next_proto_id;
+                       if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
+                               ret = flow_tcf_validate_vxlan_decap_ipv4
+                                                               (items, error);
+                               if (ret < 0)
+                                       return ret;
+                       }
                        break;
                case RTE_FLOW_ITEM_TYPE_IPV6:
                        ret = mlx5_flow_validate_item_ipv6(items, item_flags,
@@ -604,103 +2164,206 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                        else if (mask.ipv6->hdr.proto)
                                next_protocol =
                                        ((const struct rte_flow_item_ipv6 *)
-                                        (items->spec))->hdr.proto;
-                       break;
-               case RTE_FLOW_ITEM_TYPE_UDP:
-                       ret = mlx5_flow_validate_item_udp(items, item_flags,
-                                                         next_protocol, error);
-                       if (ret < 0)
-                               return ret;
-                       item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
-                       mask.udp = flow_tcf_item_mask
-                               (items, &rte_flow_item_udp_mask,
-                                &flow_tcf_mask_supported.udp,
-                                &flow_tcf_mask_empty.udp,
-                                sizeof(flow_tcf_mask_supported.udp),
-                                error);
-                       if (!mask.udp)
-                               return -rte_errno;
-                       break;
-               case RTE_FLOW_ITEM_TYPE_TCP:
-                       ret = mlx5_flow_validate_item_tcp(items, item_flags,
-                                                         next_protocol, error);
-                       if (ret < 0)
-                               return ret;
-                       item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
-                       mask.tcp = flow_tcf_item_mask
-                               (items, &rte_flow_item_tcp_mask,
-                                &flow_tcf_mask_supported.tcp,
-                                &flow_tcf_mask_empty.tcp,
-                                sizeof(flow_tcf_mask_supported.tcp),
-                                error);
-                       if (!mask.tcp)
-                               return -rte_errno;
-                       break;
-               default:
-                       return rte_flow_error_set(error, ENOTSUP,
-                                                 RTE_FLOW_ERROR_TYPE_ITEM,
-                                                 NULL, "item not supported");
-               }
-       }
-       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
-               unsigned int i;
-
-               switch (actions->type) {
-               case RTE_FLOW_ACTION_TYPE_VOID:
-                       break;
-               case RTE_FLOW_ACTION_TYPE_PORT_ID:
-                       if (action_flags & MLX5_TCF_FATE_ACTIONS)
-                               return rte_flow_error_set
-                                       (error, EINVAL,
-                                        RTE_FLOW_ERROR_TYPE_ACTION, actions,
-                                        "can't have multiple fate actions");
-                       conf.port_id = actions->conf;
-                       if (conf.port_id->original)
-                               i = 0;
-                       else
-                               for (i = 0; ptoi[i].ifindex; ++i)
-                                       if (ptoi[i].port_id == conf.port_id->id)
-                                               break;
-                       if (!ptoi[i].ifindex)
-                               return rte_flow_error_set
-                                       (error, ENODEV,
-                                        RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                        conf.port_id,
-                                        "missing data to convert port ID to"
-                                        " ifindex");
-                       action_flags |= MLX5_FLOW_ACTION_PORT_ID;
-                       break;
-               case RTE_FLOW_ACTION_TYPE_DROP:
-                       if (action_flags & MLX5_TCF_FATE_ACTIONS)
-                               return rte_flow_error_set
-                                       (error, EINVAL,
-                                        RTE_FLOW_ERROR_TYPE_ACTION, actions,
-                                        "can't have multiple fate actions");
-                       action_flags |= MLX5_FLOW_ACTION_DROP;
-                       break;
-               case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
-                       action_flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
+                                        (items->spec))->hdr.proto;
+                       if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
+                               ret = flow_tcf_validate_vxlan_decap_ipv6
+                                                               (items, error);
+                               if (ret < 0)
+                                       return ret;
+                       }
                        break;
-               case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
-                       action_flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
+               case RTE_FLOW_ITEM_TYPE_UDP:
+                       ret = mlx5_flow_validate_item_udp(items, item_flags,
+                                                         next_protocol, error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+                       mask.udp = flow_tcf_item_mask
+                               (items, &rte_flow_item_udp_mask,
+                                &flow_tcf_mask_supported.udp,
+                                &flow_tcf_mask_empty.udp,
+                                sizeof(flow_tcf_mask_supported.udp),
+                                error);
+                       if (!mask.udp)
+                               return -rte_errno;
+                       if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
+                               ret = flow_tcf_validate_vxlan_decap_udp
+                                                               (items, error);
+                               if (ret < 0)
+                                       return ret;
+                       }
                        break;
-               case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
-                       action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
+               case RTE_FLOW_ITEM_TYPE_TCP:
+                       ret = mlx5_flow_validate_item_tcp
+                                            (items, item_flags,
+                                             next_protocol,
+                                             &flow_tcf_mask_supported.tcp,
+                                             error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
+                       mask.tcp = flow_tcf_item_mask
+                               (items, &rte_flow_item_tcp_mask,
+                                &flow_tcf_mask_supported.tcp,
+                                &flow_tcf_mask_empty.tcp,
+                                sizeof(flow_tcf_mask_supported.tcp),
+                                error);
+                       if (!mask.tcp)
+                               return -rte_errno;
                        break;
-               case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
-                       action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
+               case RTE_FLOW_ITEM_TYPE_VXLAN:
+                       if (!(action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP))
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM,
+                                        items,
+                                        "vni pattern should be followed by"
+                                        " vxlan decapsulation action");
+                       ret = mlx5_flow_validate_item_vxlan(items,
+                                                           item_flags, error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_VXLAN;
+                       mask.vxlan = flow_tcf_item_mask
+                               (items, &rte_flow_item_vxlan_mask,
+                                &flow_tcf_mask_supported.vxlan,
+                                &flow_tcf_mask_empty.vxlan,
+                                sizeof(flow_tcf_mask_supported.vxlan), error);
+                       if (!mask.vxlan)
+                               return -rte_errno;
+                       if (mask.vxlan->vni[0] != 0xff ||
+                           mask.vxlan->vni[1] != 0xff ||
+                           mask.vxlan->vni[2] != 0xff)
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+                                        mask.vxlan,
+                                        "no support for partial or "
+                                        "empty mask on \"vxlan.vni\" field");
                        break;
                default:
                        return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ITEM,
+                                                 items, "item not supported");
+               }
+       }
+       if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
+           (action_flags & MLX5_FLOW_ACTION_DROP))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION,
+                                         actions,
+                                         "set action is not compatible with "
+                                         "drop action");
+       if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
+           !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION,
+                                         actions,
+                                         "set action must be followed by "
+                                         "port_id action");
+       if (action_flags &
+          (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
+               if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
+                       return rte_flow_error_set(error, EINVAL,
                                                  RTE_FLOW_ERROR_TYPE_ACTION,
                                                  actions,
-                                                 "action not supported");
-               }
+                                                 "no ipv4 item found in"
+                                                 " pattern");
+       }
+       if (action_flags &
+          (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
+               if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
+                       return rte_flow_error_set(error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "no ipv6 item found in"
+                                                 " pattern");
+       }
+       if (action_flags &
+          (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
+               if (!(item_flags &
+                    (MLX5_FLOW_LAYER_OUTER_L4_UDP |
+                     MLX5_FLOW_LAYER_OUTER_L4_TCP)))
+                       return rte_flow_error_set(error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "no TCP/UDP item found in"
+                                                 " pattern");
        }
+       /*
+        * FW syndrome (0xA9C090):
+        *     set_flow_table_entry: push vlan action fte in fdb can ONLY be
+        *     forward to the uplink.
+        */
+       if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
+           (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
+           ((struct priv *)port_id_dev->data->dev_private)->representor)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, actions,
+                                         "vlan push can only be applied"
+                                         " when forwarding to uplink port");
+       /*
+        * FW syndrome (0x294609):
+        *     set_flow_table_entry: modify/pop/push actions in fdb flow table
+        *     are supported only while forwarding to vport.
+        */
+       if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
+           !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, actions,
+                                         "vlan actions are supported"
+                                         " only with port_id action");
+       if ((action_flags & MLX5_TCF_VXLAN_ACTIONS) &&
+           !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+                                         "vxlan actions are supported"
+                                         " only with port_id action");
        if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
                return rte_flow_error_set(error, EINVAL,
                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
                                          "no fate action is found");
+       if (action_flags &
+          (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
+               if (!(item_flags &
+                    (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
+                     MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
+                       return rte_flow_error_set(error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "no IP found in pattern");
+       }
+       if (action_flags &
+           (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
+               if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "no ethernet found in"
+                                                 " pattern");
+       }
+       if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
+               if (!(item_flags &
+                    (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
+                     MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
+                       return rte_flow_error_set(error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 NULL,
+                                                 "no outer IP pattern found"
+                                                 " for vxlan decap action");
+               if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
+                       return rte_flow_error_set(error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 NULL,
+                                                 "no outer UDP pattern found"
+                                                 " for vxlan decap action");
+               if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
+                       return rte_flow_error_set(error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 NULL,
+                                                 "no VNI pattern found"
+                                                 " for vxlan decap action");
+       }
        return 0;
 }
 
@@ -717,7 +2380,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
  *   Maximum size of memory for items.
  */
 static int
-flow_tcf_get_items_and_size(const struct rte_flow_item items[],
+flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
+                           const struct rte_flow_item items[],
                            uint64_t *item_flags)
 {
        int size = 0;
@@ -726,6 +2390,8 @@ flow_tcf_get_items_and_size(const struct rte_flow_item items[],
        size += SZ_NLATTR_STRZ_OF("flower") +
                SZ_NLATTR_NEST + /* TCA_OPTIONS. */
                SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
+       if (attr->group > 0)
+               size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
        for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
                switch (items->type) {
                case RTE_FLOW_ITEM_TYPE_VOID:
@@ -756,7 +2422,7 @@ flow_tcf_get_items_and_size(const struct rte_flow_item items[],
                case RTE_FLOW_ITEM_TYPE_IPV6:
                        size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
                                SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
-                               SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
+                               SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 4;
                                /* dst/src IP addr and mask. */
                        flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
                        break;
@@ -772,6 +2438,10 @@ flow_tcf_get_items_and_size(const struct rte_flow_item items[],
                                /* dst/src port and mask. */
                        flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
                        break;
+               case RTE_FLOW_ITEM_TYPE_VXLAN:
+                       size += SZ_NLATTR_TYPE_OF(uint32_t);
+                       flags |= MLX5_FLOW_LAYER_VXLAN;
+                       break;
                default:
                        DRV_LOG(WARNING,
                                "unsupported item %p type %d,"
@@ -784,6 +2454,69 @@ flow_tcf_get_items_and_size(const struct rte_flow_item items[],
        return size;
 }
 
+/**
+ * Calculate size of memory to store the VXLAN encapsultion
+ * related items in the Netlink message buffer. Items list
+ * is specified by RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action.
+ * The item list should be validated.
+ *
+ * @param[in] action
+ *   RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action object.
+ *   List of pattern items to scan data from.
+ *
+ * @return
+ *   The size the part of Netlink message buffer to store the
+ *   VXLAN encapsulation item attributes.
+ */
+static int
+flow_tcf_vxlan_encap_size(const struct rte_flow_action *action)
+{
+       const struct rte_flow_item *items;
+       int size = 0;
+
+       assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP);
+       assert(action->conf);
+
+       items = ((const struct rte_flow_action_vxlan_encap *)
+                                       action->conf)->definition;
+       assert(items);
+       for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+               switch (items->type) {
+               case RTE_FLOW_ITEM_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ITEM_TYPE_ETH:
+                       /* This item does not require message buffer. */
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV4:
+                       size += SZ_NLATTR_DATA_OF(IPV4_ADDR_LEN) * 2;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV6:
+                       size += SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 2;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_UDP: {
+                       const struct rte_flow_item_udp *udp = items->mask;
+
+                       size += SZ_NLATTR_TYPE_OF(uint16_t);
+                       if (!udp || udp->hdr.src_port != RTE_BE16(0x0000))
+                               size += SZ_NLATTR_TYPE_OF(uint16_t);
+                       break;
+               }
+               case RTE_FLOW_ITEM_TYPE_VXLAN:
+                       size += SZ_NLATTR_TYPE_OF(uint32_t);
+                       break;
+               default:
+                       assert(false);
+                       DRV_LOG(WARNING,
+                               "unsupported item %p type %d,"
+                               " items must be validated"
+                               " before flow creation",
+                               (const void *)items, items->type);
+                       return 0;
+               }
+       }
+       return size;
+}
+
 /**
  * Calculate maximum size of memory for flow actions of Linux TC flower and
  * extract specified actions.
@@ -815,6 +2548,13 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
                                SZ_NLATTR_TYPE_OF(struct tc_mirred);
                        flags |= MLX5_FLOW_ACTION_PORT_ID;
                        break;
+               case RTE_FLOW_ACTION_TYPE_JUMP:
+                       size += SZ_NLATTR_NEST + /* na_act_index. */
+                               SZ_NLATTR_STRZ_OF("gact") +
+                               SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+                               SZ_NLATTR_TYPE_OF(struct tc_gact);
+                       flags |= MLX5_FLOW_ACTION_JUMP;
+                       break;
                case RTE_FLOW_ACTION_TYPE_DROP:
                        size += SZ_NLATTR_NEST + /* na_act_index. */
                                SZ_NLATTR_STRZ_OF("gact") +
@@ -822,6 +2562,8 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
                                SZ_NLATTR_TYPE_OF(struct tc_gact);
                        flags |= MLX5_FLOW_ACTION_DROP;
                        break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+                       break;
                case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
                        flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
                        goto action_of_vlan;
@@ -844,6 +2586,42 @@ action_of_vlan:
                                SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
                                SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
                        break;
+               case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
+                       size += SZ_NLATTR_NEST + /* na_act_index. */
+                               SZ_NLATTR_STRZ_OF("tunnel_key") +
+                               SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+                               SZ_NLATTR_TYPE_OF(uint8_t);
+                       size += SZ_NLATTR_TYPE_OF(struct tc_tunnel_key);
+                       size += flow_tcf_vxlan_encap_size(actions) +
+                               RTE_ALIGN_CEIL /* preceding encap params. */
+                               (sizeof(struct flow_tcf_vxlan_encap),
+                               MNL_ALIGNTO);
+                       flags |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
+                       size += SZ_NLATTR_NEST + /* na_act_index. */
+                               SZ_NLATTR_STRZ_OF("tunnel_key") +
+                               SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+                               SZ_NLATTR_TYPE_OF(uint8_t);
+                       size += SZ_NLATTR_TYPE_OF(struct tc_tunnel_key);
+                       size += RTE_ALIGN_CEIL /* preceding decap params. */
+                               (sizeof(struct flow_tcf_vxlan_decap),
+                               MNL_ALIGNTO);
+                       flags |= MLX5_FLOW_ACTION_VXLAN_DECAP;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+                       size += flow_tcf_get_pedit_actions_size(&actions,
+                                                               &flags);
+                       break;
                default:
                        DRV_LOG(WARNING,
                                "unsupported action %p type %d,"
@@ -900,20 +2678,23 @@ flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
  *   otherwise NULL and rte_ernno is set.
  */
 static struct mlx5_flow *
-flow_tcf_prepare(const struct rte_flow_attr *attr __rte_unused,
+flow_tcf_prepare(const struct rte_flow_attr *attr,
                 const struct rte_flow_item items[],
                 const struct rte_flow_action actions[],
                 uint64_t *item_flags, uint64_t *action_flags,
                 struct rte_flow_error *error)
 {
-       size_t size = sizeof(struct mlx5_flow) +
+       size_t size = RTE_ALIGN_CEIL
+                       (sizeof(struct mlx5_flow),
+                        alignof(struct flow_tcf_tunnel_hdr)) +
                      MNL_ALIGN(sizeof(struct nlmsghdr)) +
                      MNL_ALIGN(sizeof(struct tcmsg));
        struct mlx5_flow *dev_flow;
        struct nlmsghdr *nlh;
        struct tcmsg *tcm;
+       uint8_t *sp, *tun = NULL;
 
-       size += flow_tcf_get_items_and_size(items, item_flags);
+       size += flow_tcf_get_items_and_size(attr, items, item_flags);
        size += flow_tcf_get_actions_and_size(actions, action_flags);
        dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
        if (!dev_flow) {
@@ -922,14 +2703,52 @@ flow_tcf_prepare(const struct rte_flow_attr *attr __rte_unused,
                                   "not enough memory to create E-Switch flow");
                return NULL;
        }
-       nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
+       sp = (uint8_t *)(dev_flow + 1);
+       if (*action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) {
+               sp = RTE_PTR_ALIGN
+                       (sp, alignof(struct flow_tcf_tunnel_hdr));
+               tun = sp;
+               sp += RTE_ALIGN_CEIL
+                       (sizeof(struct flow_tcf_vxlan_encap),
+                       MNL_ALIGNTO);
+#ifndef NDEBUG
+               size -= RTE_ALIGN_CEIL
+                       (sizeof(struct flow_tcf_vxlan_encap),
+                       MNL_ALIGNTO);
+#endif
+       } else if (*action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
+               sp = RTE_PTR_ALIGN
+                       (sp, alignof(struct flow_tcf_tunnel_hdr));
+               tun = sp;
+               sp += RTE_ALIGN_CEIL
+                       (sizeof(struct flow_tcf_vxlan_decap),
+                       MNL_ALIGNTO);
+#ifndef NDEBUG
+               size -= RTE_ALIGN_CEIL
+                       (sizeof(struct flow_tcf_vxlan_decap),
+                       MNL_ALIGNTO);
+#endif
+       } else {
+               sp = RTE_PTR_ALIGN(sp, MNL_ALIGNTO);
+       }
+       nlh = mnl_nlmsg_put_header(sp);
        tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
        *dev_flow = (struct mlx5_flow){
                .tcf = (struct mlx5_flow_tcf){
+#ifndef NDEBUG
+                       .nlsize = size - RTE_ALIGN_CEIL
+                               (sizeof(struct mlx5_flow),
+                                alignof(struct flow_tcf_tunnel_hdr)),
+#endif
+                       .tunnel = (struct flow_tcf_tunnel_hdr *)tun,
                        .nlh = nlh,
                        .tcm = tcm,
                },
        };
+       if (*action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP)
+               dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_DECAP;
+       else if (*action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
+               dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_ENCAP;
        /*
         * Generate a reasonably unique handle based on the address of the
         * target buffer.
@@ -947,6 +2766,273 @@ flow_tcf_prepare(const struct rte_flow_attr *attr __rte_unused,
        return dev_flow;
 }
 
+/**
+ * Make adjustments for supporting count actions.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 On success else a negative errno value is returned and rte_errno is set.
+ */
+static int
+flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
+                                 struct mlx5_flow *dev_flow,
+                                 struct rte_flow_error *error)
+{
+       struct rte_flow *flow = dev_flow->flow;
+
+       if (!flow->counter) {
+               flow->counter = flow_tcf_counter_new();
+               if (!flow->counter)
+                       return rte_flow_error_set(error, rte_errno,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 NULL,
+                                                 "cannot get counter"
+                                                 " context.");
+       }
+       return 0;
+}
+
+/**
+ * Convert VXLAN VNI to 32-bit integer.
+ *
+ * @param[in] vni
+ *   VXLAN VNI in 24-bit wire format.
+ *
+ * @return
+ *   VXLAN VNI as a 32-bit integer value in network endian.
+ */
+static inline rte_be32_t
+vxlan_vni_as_be32(const uint8_t vni[3])
+{
+       union {
+               uint8_t vni[4];
+               rte_be32_t dword;
+       } ret = {
+               .vni = { 0, vni[0], vni[1], vni[2] },
+       };
+       return ret.dword;
+}
+
+/**
+ * Helper function to process RTE_FLOW_ITEM_TYPE_ETH entry in configuration
+ * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the MAC address fields
+ * in the encapsulation parameters structure. The item must be prevalidated,
+ * no any validation checks performed by function.
+ *
+ * @param[in] spec
+ *   RTE_FLOW_ITEM_TYPE_ETH entry specification.
+ * @param[in] mask
+ *   RTE_FLOW_ITEM_TYPE_ETH entry mask.
+ * @param[out] encap
+ *   Structure to fill the gathered MAC address data.
+ */
+static void
+flow_tcf_parse_vxlan_encap_eth(const struct rte_flow_item_eth *spec,
+                              const struct rte_flow_item_eth *mask,
+                              struct flow_tcf_vxlan_encap *encap)
+{
+       /* Item must be validated before. No redundant checks. */
+       assert(spec);
+       if (!mask || !memcmp(&mask->dst,
+                            &rte_flow_item_eth_mask.dst,
+                            sizeof(rte_flow_item_eth_mask.dst))) {
+               /*
+                * Ethernet addresses are not supported by
+                * tc as tunnel_key parameters. Destination
+                * address is needed to form encap packet
+                * header and retrieved by kernel from
+                * implicit sources (ARP table, etc),
+                * address masks are not supported at all.
+                */
+               encap->eth.dst = spec->dst;
+               encap->mask |= FLOW_TCF_ENCAP_ETH_DST;
+       }
+       if (!mask || !memcmp(&mask->src,
+                            &rte_flow_item_eth_mask.src,
+                            sizeof(rte_flow_item_eth_mask.src))) {
+               /*
+                * Ethernet addresses are not supported by
+                * tc as tunnel_key parameters. Source ethernet
+                * address is ignored anyway.
+                */
+               encap->eth.src = spec->src;
+               encap->mask |= FLOW_TCF_ENCAP_ETH_SRC;
+       }
+}
+
+/**
+ * Helper function to process RTE_FLOW_ITEM_TYPE_IPV4 entry in configuration
+ * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the IPV4 address fields
+ * in the encapsulation parameters structure. The item must be prevalidated,
+ * no any validation checks performed by function.
+ *
+ * @param[in] spec
+ *   RTE_FLOW_ITEM_TYPE_IPV4 entry specification.
+ * @param[out] encap
+ *   Structure to fill the gathered IPV4 address data.
+ */
+static void
+flow_tcf_parse_vxlan_encap_ipv4(const struct rte_flow_item_ipv4 *spec,
+                               struct flow_tcf_vxlan_encap *encap)
+{
+       /* Item must be validated before. No redundant checks. */
+       assert(spec);
+       encap->ipv4.dst = spec->hdr.dst_addr;
+       encap->ipv4.src = spec->hdr.src_addr;
+       encap->mask |= FLOW_TCF_ENCAP_IPV4_SRC |
+                      FLOW_TCF_ENCAP_IPV4_DST;
+}
+
+/**
+ * Helper function to process RTE_FLOW_ITEM_TYPE_IPV6 entry in configuration
+ * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the IPV6 address fields
+ * in the encapsulation parameters structure. The item must be prevalidated,
+ * no any validation checks performed by function.
+ *
+ * @param[in] spec
+ *   RTE_FLOW_ITEM_TYPE_IPV6 entry specification.
+ * @param[out] encap
+ *   Structure to fill the gathered IPV6 address data.
+ */
+static void
+flow_tcf_parse_vxlan_encap_ipv6(const struct rte_flow_item_ipv6 *spec,
+                               struct flow_tcf_vxlan_encap *encap)
+{
+       /* Item must be validated before. No redundant checks. */
+       assert(spec);
+       memcpy(encap->ipv6.dst, spec->hdr.dst_addr, IPV6_ADDR_LEN);
+       memcpy(encap->ipv6.src, spec->hdr.src_addr, IPV6_ADDR_LEN);
+       encap->mask |= FLOW_TCF_ENCAP_IPV6_SRC |
+                      FLOW_TCF_ENCAP_IPV6_DST;
+}
+
+/**
+ * Helper function to process RTE_FLOW_ITEM_TYPE_UDP entry in configuration
+ * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the UDP port fields
+ * in the encapsulation parameters structure. The item must be prevalidated,
+ * no any validation checks performed by function.
+ *
+ * @param[in] spec
+ *   RTE_FLOW_ITEM_TYPE_UDP entry specification.
+ * @param[in] mask
+ *   RTE_FLOW_ITEM_TYPE_UDP entry mask.
+ * @param[out] encap
+ *   Structure to fill the gathered UDP port data.
+ */
+static void
+flow_tcf_parse_vxlan_encap_udp(const struct rte_flow_item_udp *spec,
+                              const struct rte_flow_item_udp *mask,
+                              struct flow_tcf_vxlan_encap *encap)
+{
+       assert(spec);
+       encap->udp.dst = spec->hdr.dst_port;
+       encap->mask |= FLOW_TCF_ENCAP_UDP_DST;
+       if (!mask || mask->hdr.src_port != RTE_BE16(0x0000)) {
+               encap->udp.src = spec->hdr.src_port;
+               encap->mask |= FLOW_TCF_ENCAP_IPV4_SRC;
+       }
+}
+
+/**
+ * Helper function to process RTE_FLOW_ITEM_TYPE_VXLAN entry in configuration
+ * of action RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. Fills the VNI fields
+ * in the encapsulation parameters structure. The item must be prevalidated,
+ * no any validation checks performed by function.
+ *
+ * @param[in] spec
+ *   RTE_FLOW_ITEM_TYPE_VXLAN entry specification.
+ * @param[out] encap
+ *   Structure to fill the gathered VNI address data.
+ */
+static void
+flow_tcf_parse_vxlan_encap_vni(const struct rte_flow_item_vxlan *spec,
+                              struct flow_tcf_vxlan_encap *encap)
+{
+       /* Item must be validated before. Do not redundant checks. */
+       assert(spec);
+       memcpy(encap->vxlan.vni, spec->vni, sizeof(encap->vxlan.vni));
+       encap->mask |= FLOW_TCF_ENCAP_VXLAN_VNI;
+}
+
+/**
+ * Populate consolidated encapsulation object from list of pattern items.
+ *
+ * Helper function to process configuration of action such as
+ * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP. The item list should be
+ * validated, there is no way to return an meaningful error.
+ *
+ * @param[in] action
+ *   RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP action object.
+ *   List of pattern items to gather data from.
+ * @param[out] src
+ *   Structure to fill gathered data.
+ */
+static void
+flow_tcf_vxlan_encap_parse(const struct rte_flow_action *action,
+                          struct flow_tcf_vxlan_encap *encap)
+{
+       union {
+               const struct rte_flow_item_eth *eth;
+               const struct rte_flow_item_ipv4 *ipv4;
+               const struct rte_flow_item_ipv6 *ipv6;
+               const struct rte_flow_item_udp *udp;
+               const struct rte_flow_item_vxlan *vxlan;
+       } spec, mask;
+       const struct rte_flow_item *items;
+
+       assert(action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP);
+       assert(action->conf);
+
+       items = ((const struct rte_flow_action_vxlan_encap *)
+                                       action->conf)->definition;
+       assert(items);
+       for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+               switch (items->type) {
+               case RTE_FLOW_ITEM_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ITEM_TYPE_ETH:
+                       mask.eth = items->mask;
+                       spec.eth = items->spec;
+                       flow_tcf_parse_vxlan_encap_eth(spec.eth, mask.eth,
+                                                      encap);
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV4:
+                       spec.ipv4 = items->spec;
+                       flow_tcf_parse_vxlan_encap_ipv4(spec.ipv4, encap);
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV6:
+                       spec.ipv6 = items->spec;
+                       flow_tcf_parse_vxlan_encap_ipv6(spec.ipv6, encap);
+                       break;
+               case RTE_FLOW_ITEM_TYPE_UDP:
+                       mask.udp = items->mask;
+                       spec.udp = items->spec;
+                       flow_tcf_parse_vxlan_encap_udp(spec.udp, mask.udp,
+                                                      encap);
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VXLAN:
+                       spec.vxlan = items->spec;
+                       flow_tcf_parse_vxlan_encap_vni(spec.vxlan, encap);
+                       break;
+               default:
+                       assert(false);
+                       DRV_LOG(WARNING,
+                               "unsupported item %p type %d,"
+                               " items must be validated"
+                               " before flow creation",
+                               (const void *)items, items->type);
+                       encap->mask = 0;
+                       return;
+               }
+       }
+}
+
 /**
  * Translate flow for Linux TC flower and construct Netlink message.
  *
@@ -981,15 +3067,29 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                const struct rte_flow_item_ipv6 *ipv6;
                const struct rte_flow_item_tcp *tcp;
                const struct rte_flow_item_udp *udp;
+               const struct rte_flow_item_vxlan *vxlan;
        } spec, mask;
        union {
                const struct rte_flow_action_port_id *port_id;
+               const struct rte_flow_action_jump *jump;
                const struct rte_flow_action_of_push_vlan *of_push_vlan;
                const struct rte_flow_action_of_set_vlan_vid *
                        of_set_vlan_vid;
                const struct rte_flow_action_of_set_vlan_pcp *
                        of_set_vlan_pcp;
        } conf;
+       union {
+               struct flow_tcf_tunnel_hdr *hdr;
+               struct flow_tcf_vxlan_decap *vxlan;
+       } decap = {
+               .hdr = NULL,
+       };
+       union {
+               struct flow_tcf_tunnel_hdr *hdr;
+               struct flow_tcf_vxlan_encap *vxlan;
+       } encap = {
+               .hdr = NULL,
+       };
        struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
        struct nlmsghdr *nlh = dev_flow->tcf.nlh;
        struct tcmsg *tcm = dev_flow->tcf.tcm;
@@ -1002,9 +3102,25 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
        struct nlattr *na_flower_act;
        struct nlattr *na_vlan_id = NULL;
        struct nlattr *na_vlan_priority = NULL;
+       uint64_t item_flags = 0;
+       int ret;
 
        claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
                                                PTOI_TABLE_SZ_MAX(dev)));
+       if (dev_flow->tcf.tunnel) {
+               switch (dev_flow->tcf.tunnel->type) {
+               case FLOW_TCF_TUNACT_VXLAN_DECAP:
+                       decap.vxlan = dev_flow->tcf.vxlan_decap;
+                       break;
+               case FLOW_TCF_TUNACT_VXLAN_ENCAP:
+                       encap.vxlan = dev_flow->tcf.vxlan_encap;
+                       break;
+               /* New tunnel actions can be added here. */
+               default:
+                       assert(false);
+                       break;
+               }
+       }
        nlh = dev_flow->tcf.nlh;
        tcm = dev_flow->tcf.tcm;
        /* Prepare API must have been called beforehand. */
@@ -1018,9 +3134,10 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
         */
        tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
                                  RTE_BE16(ETH_P_ALL));
+       if (attr->group > 0)
+               mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
        mnl_attr_put_strz(nlh, TCA_KIND, "flower");
        na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
-       mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
        for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
                unsigned int i;
 
@@ -1048,6 +3165,9 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                        tcm->tcm_ifindex = ptoi[i].ifindex;
                        break;
                case RTE_FLOW_ITEM_TYPE_ETH:
+                       item_flags |= (item_flags & MLX5_FLOW_LAYER_VXLAN) ?
+                                     MLX5_FLOW_LAYER_INNER_L2 :
+                                     MLX5_FLOW_LAYER_OUTER_L2;
                        mask.eth = flow_tcf_item_mask
                                (items, &rte_flow_item_eth_mask,
                                 &flow_tcf_mask_supported.eth,
@@ -1058,6 +3178,14 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                        if (mask.eth == &flow_tcf_mask_empty.eth)
                                break;
                        spec.eth = items->spec;
+                       if (decap.vxlan &&
+                           !(item_flags & MLX5_FLOW_LAYER_VXLAN)) {
+                               DRV_LOG(WARNING,
+                                       "outer L2 addresses cannot be forced"
+                                       " for vxlan decapsulation, parameter"
+                                       " ignored");
+                               break;
+                       }
                        if (mask.eth->type) {
                                mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
                                                 spec.eth->type);
@@ -1079,8 +3207,12 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                                             ETHER_ADDR_LEN,
                                             mask.eth->src.addr_bytes);
                        }
+                       assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
                        break;
                case RTE_FLOW_ITEM_TYPE_VLAN:
+                       assert(!encap.hdr);
+                       assert(!decap.hdr);
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
                        mask.vlan = flow_tcf_item_mask
                                (items, &rte_flow_item_vlan_mask,
                                 &flow_tcf_mask_supported.vlan,
@@ -1111,8 +3243,10 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                                                 rte_be_to_cpu_16
                                                 (spec.vlan->tci &
                                                  RTE_BE16(0x0fff)));
+                       assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
                        break;
                case RTE_FLOW_ITEM_TYPE_IPV4:
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
                        mask.ipv4 = flow_tcf_item_mask
                                (items, &rte_flow_item_ipv4_mask,
                                 &flow_tcf_mask_supported.ipv4,
@@ -1120,38 +3254,56 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                                 sizeof(flow_tcf_mask_supported.ipv4),
                                 error);
                        assert(mask.ipv4);
-                       if (!eth_type_set || !vlan_eth_type_set)
-                               mnl_attr_put_u16(nlh,
+                       spec.ipv4 = items->spec;
+                       if (!decap.vxlan) {
+                               if (!eth_type_set && !vlan_eth_type_set)
+                                       mnl_attr_put_u16
+                                               (nlh,
                                                 vlan_present ?
                                                 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
                                                 TCA_FLOWER_KEY_ETH_TYPE,
                                                 RTE_BE16(ETH_P_IP));
-                       eth_type_set = 1;
-                       vlan_eth_type_set = 1;
-                       if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
-                               break;
-                       spec.ipv4 = items->spec;
-                       if (mask.ipv4->hdr.next_proto_id) {
-                               mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
-                                               spec.ipv4->hdr.next_proto_id);
-                               ip_proto_set = 1;
+                               eth_type_set = 1;
+                               vlan_eth_type_set = 1;
+                               if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
+                                       break;
+                               if (mask.ipv4->hdr.next_proto_id) {
+                                       mnl_attr_put_u8
+                                               (nlh, TCA_FLOWER_KEY_IP_PROTO,
+                                                spec.ipv4->hdr.next_proto_id);
+                                       ip_proto_set = 1;
+                               }
+                       } else {
+                               assert(mask.ipv4 != &flow_tcf_mask_empty.ipv4);
                        }
                        if (mask.ipv4->hdr.src_addr) {
-                               mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
-                                                spec.ipv4->hdr.src_addr);
-                               mnl_attr_put_u32(nlh,
-                                                TCA_FLOWER_KEY_IPV4_SRC_MASK,
-                                                mask.ipv4->hdr.src_addr);
+                               mnl_attr_put_u32
+                                       (nlh, decap.vxlan ?
+                                        TCA_FLOWER_KEY_ENC_IPV4_SRC :
+                                        TCA_FLOWER_KEY_IPV4_SRC,
+                                        spec.ipv4->hdr.src_addr);
+                               mnl_attr_put_u32
+                                       (nlh, decap.vxlan ?
+                                        TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK :
+                                        TCA_FLOWER_KEY_IPV4_SRC_MASK,
+                                        mask.ipv4->hdr.src_addr);
                        }
                        if (mask.ipv4->hdr.dst_addr) {
-                               mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
-                                                spec.ipv4->hdr.dst_addr);
-                               mnl_attr_put_u32(nlh,
-                                                TCA_FLOWER_KEY_IPV4_DST_MASK,
-                                                mask.ipv4->hdr.dst_addr);
+                               mnl_attr_put_u32
+                                       (nlh, decap.vxlan ?
+                                        TCA_FLOWER_KEY_ENC_IPV4_DST :
+                                        TCA_FLOWER_KEY_IPV4_DST,
+                                        spec.ipv4->hdr.dst_addr);
+                               mnl_attr_put_u32
+                                       (nlh, decap.vxlan ?
+                                        TCA_FLOWER_KEY_ENC_IPV4_DST_MASK :
+                                        TCA_FLOWER_KEY_IPV4_DST_MASK,
+                                        mask.ipv4->hdr.dst_addr);
                        }
+                       assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
                        break;
                case RTE_FLOW_ITEM_TYPE_IPV6:
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
                        mask.ipv6 = flow_tcf_item_mask
                                (items, &rte_flow_item_ipv6_mask,
                                 &flow_tcf_mask_supported.ipv6,
@@ -1159,40 +3311,57 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                                 sizeof(flow_tcf_mask_supported.ipv6),
                                 error);
                        assert(mask.ipv6);
-                       if (!eth_type_set || !vlan_eth_type_set)
-                               mnl_attr_put_u16(nlh,
+                       spec.ipv6 = items->spec;
+                       if (!decap.vxlan) {
+                               if (!eth_type_set || !vlan_eth_type_set) {
+                                       mnl_attr_put_u16
+                                               (nlh,
                                                 vlan_present ?
                                                 TCA_FLOWER_KEY_VLAN_ETH_TYPE :
                                                 TCA_FLOWER_KEY_ETH_TYPE,
                                                 RTE_BE16(ETH_P_IPV6));
-                       eth_type_set = 1;
-                       vlan_eth_type_set = 1;
-                       if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
-                               break;
-                       spec.ipv6 = items->spec;
-                       if (mask.ipv6->hdr.proto) {
-                               mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
-                                               spec.ipv6->hdr.proto);
-                               ip_proto_set = 1;
+                               }
+                               eth_type_set = 1;
+                               vlan_eth_type_set = 1;
+                               if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
+                                       break;
+                               if (mask.ipv6->hdr.proto) {
+                                       mnl_attr_put_u8
+                                               (nlh, TCA_FLOWER_KEY_IP_PROTO,
+                                                spec.ipv6->hdr.proto);
+                                       ip_proto_set = 1;
+                               }
+                       } else {
+                               assert(mask.ipv6 != &flow_tcf_mask_empty.ipv6);
                        }
                        if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
-                               mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
-                                            sizeof(spec.ipv6->hdr.src_addr),
+                               mnl_attr_put(nlh, decap.vxlan ?
+                                            TCA_FLOWER_KEY_ENC_IPV6_SRC :
+                                            TCA_FLOWER_KEY_IPV6_SRC,
+                                            IPV6_ADDR_LEN,
                                             spec.ipv6->hdr.src_addr);
-                               mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
-                                            sizeof(mask.ipv6->hdr.src_addr),
+                               mnl_attr_put(nlh, decap.vxlan ?
+                                            TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK :
+                                            TCA_FLOWER_KEY_IPV6_SRC_MASK,
+                                            IPV6_ADDR_LEN,
                                             mask.ipv6->hdr.src_addr);
                        }
                        if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
-                               mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
-                                            sizeof(spec.ipv6->hdr.dst_addr),
+                               mnl_attr_put(nlh, decap.vxlan ?
+                                            TCA_FLOWER_KEY_ENC_IPV6_DST :
+                                            TCA_FLOWER_KEY_IPV6_DST,
+                                            IPV6_ADDR_LEN,
                                             spec.ipv6->hdr.dst_addr);
-                               mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
-                                            sizeof(mask.ipv6->hdr.dst_addr),
+                               mnl_attr_put(nlh, decap.vxlan ?
+                                            TCA_FLOWER_KEY_ENC_IPV6_DST_MASK :
+                                            TCA_FLOWER_KEY_IPV6_DST_MASK,
+                                            IPV6_ADDR_LEN,
                                             mask.ipv6->hdr.dst_addr);
                        }
+                       assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
                        break;
                case RTE_FLOW_ITEM_TYPE_UDP:
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
                        mask.udp = flow_tcf_item_mask
                                (items, &rte_flow_item_udp_mask,
                                 &flow_tcf_mask_supported.udp,
@@ -1200,28 +3369,48 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                                 sizeof(flow_tcf_mask_supported.udp),
                                 error);
                        assert(mask.udp);
-                       if (!ip_proto_set)
-                               mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
-                                               IPPROTO_UDP);
-                       if (mask.udp == &flow_tcf_mask_empty.udp)
-                               break;
                        spec.udp = items->spec;
+                       if (!decap.vxlan) {
+                               if (!ip_proto_set)
+                                       mnl_attr_put_u8
+                                               (nlh, TCA_FLOWER_KEY_IP_PROTO,
+                                               IPPROTO_UDP);
+                               if (mask.udp == &flow_tcf_mask_empty.udp)
+                                       break;
+                       } else {
+                               assert(mask.udp != &flow_tcf_mask_empty.udp);
+                               decap.vxlan->udp_port =
+                                       rte_be_to_cpu_16
+                                               (spec.udp->hdr.dst_port);
+                       }
                        if (mask.udp->hdr.src_port) {
-                               mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
-                                                spec.udp->hdr.src_port);
-                               mnl_attr_put_u16(nlh,
-                                                TCA_FLOWER_KEY_UDP_SRC_MASK,
-                                                mask.udp->hdr.src_port);
+                               mnl_attr_put_u16
+                                       (nlh, decap.vxlan ?
+                                        TCA_FLOWER_KEY_ENC_UDP_SRC_PORT :
+                                        TCA_FLOWER_KEY_UDP_SRC,
+                                        spec.udp->hdr.src_port);
+                               mnl_attr_put_u16
+                                       (nlh, decap.vxlan ?
+                                        TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK :
+                                        TCA_FLOWER_KEY_UDP_SRC_MASK,
+                                        mask.udp->hdr.src_port);
                        }
                        if (mask.udp->hdr.dst_port) {
-                               mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
-                                                spec.udp->hdr.dst_port);
-                               mnl_attr_put_u16(nlh,
-                                                TCA_FLOWER_KEY_UDP_DST_MASK,
-                                                mask.udp->hdr.dst_port);
+                               mnl_attr_put_u16
+                                       (nlh, decap.vxlan ?
+                                        TCA_FLOWER_KEY_ENC_UDP_DST_PORT :
+                                        TCA_FLOWER_KEY_UDP_DST,
+                                        spec.udp->hdr.dst_port);
+                               mnl_attr_put_u16
+                                       (nlh, decap.vxlan ?
+                                        TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK :
+                                        TCA_FLOWER_KEY_UDP_DST_MASK,
+                                        mask.udp->hdr.dst_port);
                        }
+                       assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
                        break;
                case RTE_FLOW_ITEM_TYPE_TCP:
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
                        mask.tcp = flow_tcf_item_mask
                                (items, &rte_flow_item_tcp_mask,
                                 &flow_tcf_mask_supported.tcp,
@@ -1249,6 +3438,28 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                                                 TCA_FLOWER_KEY_TCP_DST_MASK,
                                                 mask.tcp->hdr.dst_port);
                        }
+                       if (mask.tcp->hdr.tcp_flags) {
+                               mnl_attr_put_u16
+                                       (nlh,
+                                        TCA_FLOWER_KEY_TCP_FLAGS,
+                                        rte_cpu_to_be_16
+                                               (spec.tcp->hdr.tcp_flags));
+                               mnl_attr_put_u16
+                                       (nlh,
+                                        TCA_FLOWER_KEY_TCP_FLAGS_MASK,
+                                        rte_cpu_to_be_16
+                                               (mask.tcp->hdr.tcp_flags));
+                       }
+                       assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VXLAN:
+                       assert(decap.vxlan);
+                       item_flags |= MLX5_FLOW_LAYER_VXLAN;
+                       spec.vxlan = items->spec;
+                       mnl_attr_put_u32(nlh,
+                                        TCA_FLOWER_KEY_ENC_KEY_ID,
+                                        vxlan_vni_as_be32(spec.vxlan->vni));
+                       assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
                        break;
                default:
                        return rte_flow_error_set(error, ENOTSUP,
@@ -1282,6 +3493,14 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                        mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
                        na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
                        assert(na_act);
+                       if (encap.hdr) {
+                               assert(dev_flow->tcf.tunnel);
+                               dev_flow->tcf.tunnel->ifindex_ptr =
+                                       &((struct tc_mirred *)
+                                       mnl_attr_get_payload
+                                       (mnl_nlmsg_get_payload_tail
+                                               (nlh)))->ifindex;
+                       }
                        mnl_attr_put(nlh, TCA_MIRRED_PARMS,
                                     sizeof(struct tc_mirred),
                                     &(struct tc_mirred){
@@ -1292,6 +3511,23 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                        mnl_attr_nest_end(nlh, na_act);
                        mnl_attr_nest_end(nlh, na_act_index);
                        break;
+               case RTE_FLOW_ACTION_TYPE_JUMP:
+                       conf.jump = actions->conf;
+                       na_act_index =
+                               mnl_attr_nest_start(nlh, na_act_index_cur++);
+                       assert(na_act_index);
+                       mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
+                       na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+                       assert(na_act);
+                       mnl_attr_put(nlh, TCA_GACT_PARMS,
+                                    sizeof(struct tc_gact),
+                                    &(struct tc_gact){
+                                       .action = TC_ACT_GOTO_CHAIN |
+                                                 conf.jump->group,
+                                    });
+                       mnl_attr_nest_end(nlh, na_act);
+                       mnl_attr_nest_end(nlh, na_act_index);
+                       break;
                case RTE_FLOW_ACTION_TYPE_DROP:
                        na_act_index =
                                mnl_attr_nest_start(nlh, na_act_index_cur++);
@@ -1307,6 +3543,16 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                        mnl_attr_nest_end(nlh, na_act);
                        mnl_attr_nest_end(nlh, na_act_index);
                        break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+                       /*
+                        * Driver adds the count action implicitly for
+                        * each rule it creates.
+                        */
+                       ret = flow_tcf_translate_action_count(dev,
+                                                             dev_flow, error);
+                       if (ret < 0)
+                               return ret;
+                       break;
                case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
                        conf.of_push_vlan = NULL;
                        vlan_act = TCA_VLAN_ACT_POP;
@@ -1372,6 +3618,90 @@ override_na_vlan_priority:
                                        conf.of_set_vlan_pcp->vlan_pcp;
                        }
                        break;
+               case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
+                       assert(decap.vxlan);
+                       assert(dev_flow->tcf.tunnel);
+                       dev_flow->tcf.tunnel->ifindex_ptr =
+                               (unsigned int *)&tcm->tcm_ifindex;
+                       na_act_index =
+                               mnl_attr_nest_start(nlh, na_act_index_cur++);
+                       assert(na_act_index);
+                       mnl_attr_put_strz(nlh, TCA_ACT_KIND, "tunnel_key");
+                       na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+                       assert(na_act);
+                       mnl_attr_put(nlh, TCA_TUNNEL_KEY_PARMS,
+                               sizeof(struct tc_tunnel_key),
+                               &(struct tc_tunnel_key){
+                                       .action = TC_ACT_PIPE,
+                                       .t_action = TCA_TUNNEL_KEY_ACT_RELEASE,
+                                       });
+                       mnl_attr_nest_end(nlh, na_act);
+                       mnl_attr_nest_end(nlh, na_act_index);
+                       assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
+                       assert(encap.vxlan);
+                       flow_tcf_vxlan_encap_parse(actions, encap.vxlan);
+                       na_act_index =
+                               mnl_attr_nest_start(nlh, na_act_index_cur++);
+                       assert(na_act_index);
+                       mnl_attr_put_strz(nlh, TCA_ACT_KIND, "tunnel_key");
+                       na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+                       assert(na_act);
+                       mnl_attr_put(nlh, TCA_TUNNEL_KEY_PARMS,
+                               sizeof(struct tc_tunnel_key),
+                               &(struct tc_tunnel_key){
+                                       .action = TC_ACT_PIPE,
+                                       .t_action = TCA_TUNNEL_KEY_ACT_SET,
+                                       });
+                       if (encap.vxlan->mask & FLOW_TCF_ENCAP_UDP_DST)
+                               mnl_attr_put_u16(nlh,
+                                        TCA_TUNNEL_KEY_ENC_DST_PORT,
+                                        encap.vxlan->udp.dst);
+                       if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV4_SRC)
+                               mnl_attr_put_u32(nlh,
+                                        TCA_TUNNEL_KEY_ENC_IPV4_SRC,
+                                        encap.vxlan->ipv4.src);
+                       if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV4_DST)
+                               mnl_attr_put_u32(nlh,
+                                        TCA_TUNNEL_KEY_ENC_IPV4_DST,
+                                        encap.vxlan->ipv4.dst);
+                       if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV6_SRC)
+                               mnl_attr_put(nlh,
+                                        TCA_TUNNEL_KEY_ENC_IPV6_SRC,
+                                        sizeof(encap.vxlan->ipv6.src),
+                                        &encap.vxlan->ipv6.src);
+                       if (encap.vxlan->mask & FLOW_TCF_ENCAP_IPV6_DST)
+                               mnl_attr_put(nlh,
+                                        TCA_TUNNEL_KEY_ENC_IPV6_DST,
+                                        sizeof(encap.vxlan->ipv6.dst),
+                                        &encap.vxlan->ipv6.dst);
+                       if (encap.vxlan->mask & FLOW_TCF_ENCAP_VXLAN_VNI)
+                               mnl_attr_put_u32(nlh,
+                                        TCA_TUNNEL_KEY_ENC_KEY_ID,
+                                        vxlan_vni_as_be32
+                                               (encap.vxlan->vxlan.vni));
+                       mnl_attr_put_u8(nlh, TCA_TUNNEL_KEY_NO_CSUM, 0);
+                       mnl_attr_nest_end(nlh, na_act);
+                       mnl_attr_nest_end(nlh, na_act_index);
+                       assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+                       na_act_index =
+                               mnl_attr_nest_start(nlh, na_act_index_cur++);
+                       flow_tcf_create_pedit_mnl_msg(nlh,
+                                                     &actions, item_flags);
+                       mnl_attr_nest_end(nlh, na_act_index);
+                       break;
                default:
                        return rte_flow_error_set(error, ENOTSUP,
                                                  RTE_FLOW_ERROR_TYPE_ACTION,
@@ -1382,15 +3712,21 @@ override_na_vlan_priority:
        assert(na_flower);
        assert(na_flower_act);
        mnl_attr_nest_end(nlh, na_flower_act);
+       mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, decap.vxlan ?
+                                               0 : TCA_CLS_FLAGS_SKIP_SW);
        mnl_attr_nest_end(nlh, na_flower);
+       if (dev_flow->tcf.tunnel && dev_flow->tcf.tunnel->ifindex_ptr)
+               dev_flow->tcf.tunnel->ifindex_org =
+                       *dev_flow->tcf.tunnel->ifindex_ptr;
+       assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
        return 0;
 }
 
 /**
  * Send Netlink message with acknowledgment.
  *
- * @param nl
- *   Libmnl socket to use.
+ * @param ctx
+ *   Flow context to use.
  * @param nlh
  *   Message to send. This function always raises the NLM_F_ACK flag before
  *   sending.
@@ -1399,12 +3735,13 @@ override_na_vlan_priority:
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
+flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
 {
        alignas(struct nlmsghdr)
        uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
                    nlh->nlmsg_len - sizeof(*nlh)];
-       uint32_t seq = random();
+       uint32_t seq = ctx->seq++;
+       struct mnl_socket *nl = ctx->nl;
        int ret;
 
        nlh->nlmsg_flags |= NLM_F_ACK;
@@ -1439,7 +3776,7 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
               struct rte_flow_error *error)
 {
        struct priv *priv = dev->data->dev_private;
-       struct mnl_socket *nl = priv->mnl_socket;
+       struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
        struct mlx5_flow *dev_flow;
        struct nlmsghdr *nlh;
 
@@ -1449,7 +3786,7 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
        nlh = dev_flow->tcf.nlh;
        nlh->nlmsg_type = RTM_NEWTFILTER;
        nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-       if (!flow_tcf_nl_ack(nl, nlh))
+       if (!flow_tcf_nl_ack(ctx, nlh))
                return 0;
        return rte_flow_error_set(error, rte_errno,
                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
@@ -1468,12 +3805,18 @@ static void
 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
        struct priv *priv = dev->data->dev_private;
-       struct mnl_socket *nl = priv->mnl_socket;
+       struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
        struct mlx5_flow *dev_flow;
        struct nlmsghdr *nlh;
 
        if (!flow)
                return;
+       if (flow->counter) {
+               if (--flow->counter->ref_cnt == 0) {
+                       rte_free(flow->counter);
+                       flow->counter = NULL;
+               }
+       }
        dev_flow = LIST_FIRST(&flow->dev_flows);
        if (!dev_flow)
                return;
@@ -1482,7 +3825,7 @@ flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
        nlh = dev_flow->tcf.nlh;
        nlh->nlmsg_type = RTM_DELTFILTER;
        nlh->nlmsg_flags = NLM_F_REQUEST;
-       flow_tcf_nl_ack(nl, nlh);
+       flow_tcf_nl_ack(ctx, nlh);
 }
 
 /**
@@ -1510,6 +3853,432 @@ flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
        rte_free(dev_flow);
 }
 
+/**
+ * Helper routine for figuring the space size required for a parse buffer.
+ *
+ * @param array
+ *   array of values to use.
+ * @param idx
+ *   Current location in array.
+ * @param value
+ *   Value to compare with.
+ *
+ * @return
+ *   The maximum between the given value and the array value on index.
+ */
+static uint16_t
+flow_tcf_arr_val_max(uint16_t array[], int idx, uint16_t value)
+{
+       return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
+}
+
+/**
+ * Parse rtnetlink message attributes filling the attribute table with the info
+ * retrieved.
+ *
+ * @param tb
+ *   Attribute table to be filled.
+ * @param[out] max
+ *   Maxinum entry in the attribute table.
+ * @param rte
+ *   The attributes section in the message to be parsed.
+ * @param len
+ *   The length of the attributes section in the message.
+ */
+static void
+flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
+                        struct rtattr *rta, int len)
+{
+       unsigned short type;
+       memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+       while (RTA_OK(rta, len)) {
+               type = rta->rta_type;
+               if (type <= max && !tb[type])
+                       tb[type] = rta;
+               rta = RTA_NEXT(rta, len);
+       }
+}
+
+/**
+ * Extract flow counters from flower action.
+ *
+ * @param rta
+ *   flower action stats properties in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data holding the count statistics of the rte_flow retrieved from
+ *   the message.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
+                                      uint16_t rta_type[], int idx,
+                                      struct gnet_stats_basic *data)
+{
+       int tca_stats_max = flow_tcf_arr_val_max(rta_type, idx,
+                                                TCA_STATS_BASIC);
+       struct rtattr *tbs[tca_stats_max + 1];
+
+       if (rta == NULL || idx < 0)
+               return -1;
+       flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
+                                RTA_DATA(rta), RTA_PAYLOAD(rta));
+       switch (rta_type[idx]) {
+       case TCA_STATS_BASIC:
+               if (tbs[TCA_STATS_BASIC]) {
+                       memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
+                              RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
+                              sizeof(*data)));
+                       return 0;
+               }
+               break;
+       default:
+               break;
+       }
+       return -1;
+}
+
+/**
+ * Parse flower single action retrieving the requested action attribute,
+ * if found.
+ *
+ * @param arg
+ *   flower action properties in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   Count statistics retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
+                                    uint16_t rta_type[], int idx, void *data)
+{
+       int tca_act_max = flow_tcf_arr_val_max(rta_type, idx, TCA_ACT_STATS);
+       struct rtattr *tb[tca_act_max + 1];
+
+       if (arg == NULL || idx < 0)
+               return -1;
+       flow_tcf_nl_parse_rtattr(tb, tca_act_max,
+                                RTA_DATA(arg), RTA_PAYLOAD(arg));
+       if (tb[TCA_ACT_KIND] == NULL)
+               return -1;
+       switch (rta_type[idx]) {
+       case TCA_ACT_STATS:
+               if (tb[TCA_ACT_STATS])
+                       return flow_tcf_nl_action_stats_parse_and_get
+                                       (tb[TCA_ACT_STATS],
+                                        rta_type, --idx,
+                                        (struct gnet_stats_basic *)data);
+               break;
+       default:
+               break;
+       }
+       return -1;
+}
+
+/**
+ * Parse flower action section in the message retrieving the requested
+ * attribute from the first action that provides it.
+ *
+ * @param opt
+ *   flower section in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_action_parse_and_get(struct rtattr *arg,
+                                uint16_t rta_type[], int idx, void *data)
+{
+       struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
+       int i;
+
+       if (arg == NULL || idx < 0)
+               return -1;
+       flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
+                                RTA_DATA(arg), RTA_PAYLOAD(arg));
+       switch (rta_type[idx]) {
+       /*
+        * flow counters are stored in the actions defined by the flow
+        * and not in the flow itself, therefore we need to traverse the
+        * flower chain of actions in search for them.
+        *
+        * Note that the index is not decremented here.
+        */
+       case TCA_ACT_STATS:
+               for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
+                       if (tb[i] &&
+                       !flow_tcf_nl_parse_one_action_and_get(tb[i],
+                                                             rta_type,
+                                                             idx, data))
+                               return 0;
+               }
+               break;
+       default:
+               break;
+       }
+       return -1;
+}
+
+/**
+ * Parse flower classifier options in the message, retrieving the requested
+ * attribute if found.
+ *
+ * @param opt
+ *   flower section in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
+                              uint16_t rta_type[], int idx, void *data)
+{
+       int tca_flower_max = flow_tcf_arr_val_max(rta_type, idx,
+                                                 TCA_FLOWER_ACT);
+       struct rtattr *tb[tca_flower_max + 1];
+
+       if (!opt || idx < 0)
+               return -1;
+       flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
+                                RTA_DATA(opt), RTA_PAYLOAD(opt));
+       switch (rta_type[idx]) {
+       case TCA_FLOWER_ACT:
+               if (tb[TCA_FLOWER_ACT])
+                       return flow_tcf_nl_action_parse_and_get
+                                                       (tb[TCA_FLOWER_ACT],
+                                                        rta_type, --idx, data);
+               break;
+       default:
+               break;
+       }
+       return -1;
+}
+
+/**
+ * Parse Netlink reply on filter query, retrieving the flow counters.
+ *
+ * @param nlh
+ *   Message received from Netlink.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh,
+                                uint16_t rta_type[], int idx, void *data)
+{
+       struct nlmsghdr *nlh = cnlh;
+       struct tcmsg *t = NLMSG_DATA(nlh);
+       int len = nlh->nlmsg_len;
+       int tca_max = flow_tcf_arr_val_max(rta_type, idx, TCA_OPTIONS);
+       struct rtattr *tb[tca_max + 1];
+
+       if (idx < 0)
+               return -1;
+       if (nlh->nlmsg_type != RTM_NEWTFILTER &&
+           nlh->nlmsg_type != RTM_GETTFILTER &&
+           nlh->nlmsg_type != RTM_DELTFILTER)
+               return -1;
+       len -= NLMSG_LENGTH(sizeof(*t));
+       if (len < 0)
+               return -1;
+       flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
+       /* Not a TC flower flow - bail out */
+       if (!tb[TCA_KIND] ||
+           strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
+               return -1;
+       switch (rta_type[idx]) {
+       case TCA_OPTIONS:
+               if (tb[TCA_OPTIONS])
+                       return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
+                                                             rta_type,
+                                                             --idx, data);
+               break;
+       default:
+               break;
+       }
+       return -1;
+}
+
+/**
+ * A callback to parse Netlink reply on TC flower query.
+ *
+ * @param nlh
+ *   Message received from Netlink.
+ * @param[out] data
+ *   Pointer to data area to be filled by the parsing routine.
+ *   assumed to be a pinter to struct flow_tcf_stats_basic.
+ *
+ * @return
+ *   MNL_CB_OK value.
+ */
+static int
+flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
+{
+       /*
+        * The backward sequence of rta_types to pass in order to get
+        *  to the counters.
+        */
+       uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
+                               TCA_FLOWER_ACT, TCA_OPTIONS };
+       struct flow_tcf_stats_basic *sb_data = data;
+       union {
+               const struct nlmsghdr *c;
+               struct nlmsghdr *nc;
+       } tnlh = { .c = nlh };
+
+       if (!flow_tcf_nl_filter_parse_and_get(tnlh.nc, rta_type,
+                                             RTE_DIM(rta_type) - 1,
+                                             (void *)&sb_data->counters))
+               sb_data->valid = true;
+       return MNL_CB_OK;
+}
+
+/**
+ * Query a TC flower rule for its statistics via netlink.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] flow
+ *   Pointer to the sub flow.
+ * @param[out] data
+ *   data retrieved by the query.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_tcf_query_count(struct rte_eth_dev *dev,
+                         struct rte_flow *flow,
+                         void *data,
+                         struct rte_flow_error *error)
+{
+       struct flow_tcf_stats_basic sb_data = { 0 };
+       struct rte_flow_query_count *qc = data;
+       struct priv *priv = dev->data->dev_private;
+       struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
+       struct mnl_socket *nl = ctx->nl;
+       struct mlx5_flow *dev_flow;
+       struct nlmsghdr *nlh;
+       uint32_t seq = priv->tcf_context->seq++;
+       ssize_t ret;
+       assert(qc);
+
+       dev_flow = LIST_FIRST(&flow->dev_flows);
+       /* E-Switch flow can't be expanded. */
+       assert(!LIST_NEXT(dev_flow, next));
+       if (!dev_flow->flow->counter)
+               goto notsup_exit;
+       nlh = dev_flow->tcf.nlh;
+       nlh->nlmsg_type = RTM_GETTFILTER;
+       nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
+       nlh->nlmsg_seq = seq;
+       if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
+               goto error_exit;
+       do {
+               ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
+               if (ret <= 0)
+                       break;
+               ret = mnl_cb_run(ctx->buf, ret, seq,
+                                mnl_socket_get_portid(nl),
+                                flow_tcf_nl_message_get_stats_basic,
+                                (void *)&sb_data);
+       } while (ret > 0);
+       /* Return the delta from last reset. */
+       if (sb_data.valid) {
+               /* Return the delta from last reset. */
+               qc->hits_set = 1;
+               qc->bytes_set = 1;
+               qc->hits = sb_data.counters.packets - flow->counter->hits;
+               qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
+               if (qc->reset) {
+                       flow->counter->hits = sb_data.counters.packets;
+                       flow->counter->bytes = sb_data.counters.bytes;
+               }
+               return 0;
+       }
+       return rte_flow_error_set(error, EINVAL,
+                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                 NULL,
+                                 "flow does not have counter");
+error_exit:
+       return rte_flow_error_set
+                       (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                        NULL, "netlink: failed to read flow rule counters");
+notsup_exit:
+       return rte_flow_error_set
+                       (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                        NULL, "counters are not available.");
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_tcf_query(struct rte_eth_dev *dev,
+              struct rte_flow *flow,
+              const struct rte_flow_action *actions,
+              void *data,
+              struct rte_flow_error *error)
+{
+       int ret = -EINVAL;
+
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+                       ret = flow_tcf_query_count(dev, flow, data, error);
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "action not supported");
+               }
+       }
+       return ret;
+}
+
 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
        .validate = flow_tcf_validate,
        .prepare = flow_tcf_prepare,
@@ -1517,13 +4286,51 @@ const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
        .apply = flow_tcf_apply,
        .remove = flow_tcf_remove,
        .destroy = flow_tcf_destroy,
+       .query = flow_tcf_query,
 };
 
 /**
- * Initialize ingress qdisc of a given network interface.
+ * Create and configure a libmnl socket for Netlink flow rules.
+ *
+ * @return
+ *   A valid libmnl socket object pointer on success, NULL otherwise and
+ *   rte_errno is set.
+ */
+static struct mnl_socket *
+flow_tcf_mnl_socket_create(void)
+{
+       struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
+
+       if (nl) {
+               mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
+                                     sizeof(int));
+               if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
+                       return nl;
+       }
+       rte_errno = errno;
+       if (nl)
+               mnl_socket_close(nl);
+       return NULL;
+}
+
+/**
+ * Destroy a libmnl socket.
  *
  * @param nl
  *   Libmnl socket of the @p NETLINK_ROUTE kind.
+ */
+static void
+flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
+{
+       if (nl)
+               mnl_socket_close(nl);
+}
+
+/**
+ * Initialize ingress qdisc of a given network interface.
+ *
+ * @param ctx
+ *   Pointer to tc-flower context to use.
  * @param ifindex
  *   Index of network interface to initialize.
  * @param[out] error
@@ -1533,8 +4340,8 @@ const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
-                  struct rte_flow_error *error)
+mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
+                  unsigned int ifindex, struct rte_flow_error *error)
 {
        struct nlmsghdr *nlh;
        struct tcmsg *tcm;
@@ -1551,7 +4358,7 @@ mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
        tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
        tcm->tcm_parent = TC_H_INGRESS;
        /* Ignore errors when qdisc is already absent. */
-       if (flow_tcf_nl_ack(nl, nlh) &&
+       if (flow_tcf_nl_ack(ctx, nlh) &&
            rte_errno != EINVAL && rte_errno != ENOENT)
                return rte_flow_error_set(error, rte_errno,
                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
@@ -1567,7 +4374,7 @@ mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
        tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
        tcm->tcm_parent = TC_H_INGRESS;
        mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
-       if (flow_tcf_nl_ack(nl, nlh))
+       if (flow_tcf_nl_ack(ctx, nlh))
                return rte_flow_error_set(error, rte_errno,
                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
                                          "netlink: failed to create ingress"
@@ -1576,37 +4383,47 @@ mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
 }
 
 /**
- * Create and configure a libmnl socket for Netlink flow rules.
+ * Create libmnl context for Netlink flow rules.
  *
  * @return
  *   A valid libmnl socket object pointer on success, NULL otherwise and
  *   rte_errno is set.
  */
-struct mnl_socket *
-mlx5_flow_tcf_socket_create(void)
+struct mlx5_flow_tcf_context *
+mlx5_flow_tcf_context_create(void)
 {
-       struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
-
-       if (nl) {
-               mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
-                                     sizeof(int));
-               if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
-                       return nl;
-       }
-       rte_errno = errno;
-       if (nl)
-               mnl_socket_close(nl);
+       struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
+                                                       sizeof(*ctx),
+                                                       sizeof(uint32_t));
+       if (!ctx)
+               goto error;
+       ctx->nl = flow_tcf_mnl_socket_create();
+       if (!ctx->nl)
+               goto error;
+       ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
+       ctx->buf = rte_zmalloc(__func__,
+                              ctx->buf_size, sizeof(uint32_t));
+       if (!ctx->buf)
+               goto error;
+       ctx->seq = random();
+       return ctx;
+error:
+       mlx5_flow_tcf_context_destroy(ctx);
        return NULL;
 }
 
 /**
- * Destroy a libmnl socket.
+ * Destroy a libmnl context.
  *
- * @param nl
+ * @param ctx
  *   Libmnl socket of the @p NETLINK_ROUTE kind.
  */
 void
-mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
+mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
 {
-       mnl_socket_close(nl);
+       if (!ctx)
+               return;
+       flow_tcf_mnl_socket_destroy(ctx->nl);
+       rte_free(ctx->buf);
+       rte_free(ctx);
 }