X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_flow.c;h=d4d956f2ec01cd8b7b4e8a502805b0d078cd00ce;hb=2e542da709371ee51d61d74c9a1b357ad34ae13e;hp=9314e3a86ae5a022ebaa0aa785f1612fa2cb7064;hpb=dbeba4cf18a5e1d9f7aaa284457bf15c351eb965;p=dpdk.git

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 9314e3a86a..d4d956f2ec 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -21,7 +21,6 @@
 
 #include <rte_common.h>
 #include <rte_ether.h>
-#include <rte_eth_ctrl.h>
 #include <rte_ethdev_driver.h>
 #include <rte_flow.h>
 #include <rte_flow_driver.h>
@@ -30,9 +29,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 /* Dev ops structure defined in mlx5.c */
 extern const struct eth_dev_ops mlx5_dev_ops;
@@ -42,7 +42,6 @@ extern const struct eth_dev_ops mlx5_dev_ops_isolate;
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 extern const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops;
 #endif
-extern const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops;
 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
 
 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
@@ -52,7 +51,6 @@ const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 	[MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
 #endif
-	[MLX5_FLOW_TYPE_TCF] = &mlx5_flow_tcf_drv_ops,
 	[MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
 	[MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
 };
@@ -129,7 +127,9 @@ static const struct rte_flow_expand_node mlx5_support_expansion[] = {
 		.next = RTE_FLOW_EXPAND_RSS_NEXT
 			(MLX5_EXPANSION_OUTER_IPV4_UDP,
 			 MLX5_EXPANSION_OUTER_IPV4_TCP,
-			 MLX5_EXPANSION_GRE),
+			 MLX5_EXPANSION_GRE,
+			 MLX5_EXPANSION_IPV4,
+			 MLX5_EXPANSION_IPV6),
 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
 		.rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
 			ETH_RSS_NONFRAG_IPV4_OTHER,
@@ -147,7 +147,9 @@ static const struct rte_flow_expand_node mlx5_support_expansion[] = {
 	[MLX5_EXPANSION_OUTER_IPV6] = {
 		.next = RTE_FLOW_EXPAND_RSS_NEXT
 			(MLX5_EXPANSION_OUTER_IPV6_UDP,
-			 MLX5_EXPANSION_OUTER_IPV6_TCP),
+			 MLX5_EXPANSION_OUTER_IPV6_TCP,
+			 MLX5_EXPANSION_IPV4,
+			 MLX5_EXPANSION_IPV6),
 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
 		.rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
 			ETH_RSS_NONFRAG_IPV6_OTHER,
@@ -300,6 +302,18 @@ static struct mlx5_flow_tunnel_info tunnels_info[] = {
 		.tunnel = MLX5_FLOW_LAYER_MPLS,
 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
 	},
+	{
+		.tunnel = MLX5_FLOW_LAYER_NVGRE,
+		.ptype = RTE_PTYPE_TUNNEL_NVGRE,
+	},
+	{
+		.tunnel = MLX5_FLOW_LAYER_IPIP,
+		.ptype = RTE_PTYPE_TUNNEL_IP,
+	},
+	{
+		.tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
+		.ptype = RTE_PTYPE_TUNNEL_IP,
+	},
 };
 
 /**
@@ -315,6 +329,7 @@ static struct mlx5_flow_tunnel_info tunnels_info[] = {
 int
 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
 {
+	struct mlx5_priv *priv = dev->data->dev_private;
 	struct {
 		struct ibv_flow_attr attr;
 		struct ibv_flow_spec_eth eth;
@@ -322,6 +337,7 @@ mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
 	} flow_attr = {
 		.attr = {
 			.num_of_specs = 2,
+			.port = (uint8_t)priv->ibv_port,
 		},
 		.eth = {
 			.type = IBV_FLOW_SPEC_ETH,
@@ -350,6 +366,7 @@ mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
 		claim_zero(mlx5_glue->destroy_flow(flow));
 		priority = vprio[i];
 	}
+	mlx5_hrxq_drop_release(dev);
 	switch (priority) {
 	case 8:
 		priority = RTE_DIM(priority_map_3);
@@ -361,10 +378,9 @@ mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
 		rte_errno = ENOTSUP;
 		DRV_LOG(ERR,
 			"port %u verbs maximum priority: %d expected 8/16",
-			dev->data->port_id, vprio[i]);
+			dev->data->port_id, priority);
 		return -rte_errno;
 	}
-	mlx5_hrxq_drop_release(dev);
 	DRV_LOG(INFO, "port %u flow maximum priority: %d",
 		dev->data->port_id, priority);
 	return priority;
@@ -473,7 +489,7 @@ mlx5_flow_item_acceptable(const struct rte_flow_item *item,
  *   Item hash fields.
  *
  * @return
- *   The hash fileds that should be used.
+ *   The hash fields that should be used.
  */
 uint64_t
 mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow,
@@ -680,6 +696,29 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev)
 	}
 }
 
+/*
+ * return a pointer to the desired action in the list of actions.
+ *
+ * @param[in] actions
+ *   The list of actions to search the action in.
+ * @param[in] action
+ *   The action to find.
+ *
+ * @return
+ *   Pointer to the action in the list, if found. NULL otherwise.
+ */
+const struct rte_flow_action *
+mlx5_flow_find_action(const struct rte_flow_action *actions,
+		      enum rte_flow_action_type action)
+{
+	if (actions == NULL)
+		return NULL;
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
+		if (actions->type == action)
+			return actions;
+	return NULL;
+}
+
 /*
  * Validate the flag action.
  *
@@ -786,7 +825,7 @@ mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
  *   Pointer to error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_flow_validate_action_drop(uint64_t action_flags,
@@ -801,7 +840,8 @@ mlx5_flow_validate_action_drop(uint64_t action_flags,
 		return rte_flow_error_set(error, EINVAL,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "can't drop and mark in same flow");
-	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
+	if (action_flags & (MLX5_FLOW_FATE_ACTIONS |
+			    MLX5_FLOW_FATE_ESWITCH_ACTIONS))
 		return rte_flow_error_set(error, EINVAL,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "can't have 2 fate actions in"
@@ -829,7 +869,7 @@ mlx5_flow_validate_action_drop(uint64_t action_flags,
  *   Pointer to error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
@@ -879,21 +919,25 @@ mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
  *   Pointer to the Ethernet device structure.
  * @param[in] attr
  *   Attributes of flow that includes this action.
+ * @param[in] item_flags
+ *   Items that were detected.
  * @param[out] error
  *   Pointer to error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
 			      uint64_t action_flags,
 			      struct rte_eth_dev *dev,
 			      const struct rte_flow_attr *attr,
+			      uint64_t item_flags,
 			      struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	const struct rte_flow_action_rss *rss = action->conf;
+	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
 	unsigned int i;
 
 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
@@ -962,6 +1006,11 @@ mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
 					  "rss action not supported for "
 					  "egress");
+	if (rss->level > 1 &&  !tunnel)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
+					  "inner RSS is not supported for "
+					  "non-tunnel flows");
 	return 0;
 }
 
@@ -976,7 +1025,7 @@ mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
  *   Pointer to error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
@@ -1026,7 +1075,7 @@ mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
 					  "egress is not supported");
-	if (attributes->transfer)
+	if (attributes->transfer && !priv->config.dv_esw_en)
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
 					  NULL, "transfer is not supported");
@@ -1038,6 +1087,110 @@ mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
 	return 0;
 }
 
+/**
+ * Validate ICMP6 item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
+			       uint64_t item_flags,
+			       uint8_t target_protocol,
+			       struct rte_flow_error *error)
+{
+	const struct rte_flow_item_icmp6 *mask = item->mask;
+	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+				      MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
+				      MLX5_FLOW_LAYER_OUTER_L4;
+	int ret;
+
+	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "protocol filtering not compatible"
+					  " with ICMP6 layer");
+	if (!(item_flags & l3m))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "IPv6 is mandatory to filter on"
+					  " ICMP6");
+	if (item_flags & l4m)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "multiple L4 layers not supported");
+	if (!mask)
+		mask = &rte_flow_item_icmp6_mask;
+	ret = mlx5_flow_item_acceptable
+		(item, (const uint8_t *)mask,
+		 (const uint8_t *)&rte_flow_item_icmp6_mask,
+		 sizeof(struct rte_flow_item_icmp6), error);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+/**
+ * Validate ICMP item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
+			     uint64_t item_flags,
+			     uint8_t target_protocol,
+			     struct rte_flow_error *error)
+{
+	const struct rte_flow_item_icmp *mask = item->mask;
+	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+				      MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
+				      MLX5_FLOW_LAYER_OUTER_L4;
+	int ret;
+
+	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "protocol filtering not compatible"
+					  " with ICMP layer");
+	if (!(item_flags & l3m))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "IPv4 is mandatory to filter"
+					  " on ICMP");
+	if (item_flags & l4m)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "multiple L4 layers not supported");
+	if (!mask)
+		mask = &rte_flow_item_icmp_mask;
+	ret = mlx5_flow_item_acceptable
+		(item, (const uint8_t *)mask,
+		 (const uint8_t *)&rte_flow_item_icmp_mask,
+		 sizeof(struct rte_flow_item_icmp), error);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
 /**
  * Validate Ethernet item.
  *
@@ -1071,6 +1224,11 @@ mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
 					  "multiple L2 layers not supported");
+	if (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "inner L2 layer should not "
+					  "follow inner L3 layers");
 	if (!mask)
 		mask = &rte_flow_item_eth_mask;
 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
@@ -1087,6 +1245,8 @@ mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
  *   Item specification.
  * @param[in] item_flags
  *   Bit-fields that holds the items detected until now.
+ * @param[in] dev
+ *   Ethernet device flow is being created on.
  * @param[out] error
  *   Pointer to error structure.
  *
@@ -1096,13 +1256,14 @@ mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
 int
 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
 			     uint64_t item_flags,
+			     struct rte_eth_dev *dev,
 			     struct rte_flow_error *error)
 {
 	const struct rte_flow_item_vlan *spec = item->spec;
 	const struct rte_flow_item_vlan *mask = item->mask;
 	const struct rte_flow_item_vlan nic_mask = {
-		.tci = RTE_BE16(0x0fff),
-		.inner_type = RTE_BE16(0xffff),
+		.tci = RTE_BE16(UINT16_MAX),
+		.inner_type = RTE_BE16(UINT16_MAX),
 	};
 	uint16_t vlan_tag = 0;
 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
@@ -1114,6 +1275,8 @@ mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
 	const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
 					MLX5_FLOW_LAYER_OUTER_VLAN;
 
+	const uint64_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+				      MLX5_FLOW_LAYER_OUTER_L2;
 	if (item_flags & vlanm)
 		return rte_flow_error_set(error, EINVAL,
 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
@@ -1122,6 +1285,10 @@ mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
 		return rte_flow_error_set(error, EINVAL,
 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
 					  "L2 layer cannot follow L3/L4 layer");
+	else if ((item_flags & l2m) == 0)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "no L2 layer before VLAN");
 	if (!mask)
 		mask = &rte_flow_item_vlan_mask;
 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
@@ -1130,6 +1297,25 @@ mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
 					error);
 	if (ret)
 		return ret;
+	if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
+		struct mlx5_priv *priv = dev->data->dev_private;
+
+		if (priv->vmwa_context) {
+			/*
+			 * Non-NULL context means we have a virtual machine
+			 * and SR-IOV enabled, we have to create VLAN interface
+			 * to make hypervisor to setup E-Switch vport
+			 * context correctly. We avoid creating the multiple
+			 * VLAN interfaces, so we cannot support VLAN tag mask.
+			 */
+			return rte_flow_error_set(error, EINVAL,
+						  RTE_FLOW_ERROR_TYPE_ITEM,
+						  item,
+						  "VLAN tag mask is not"
+						  " supported in virtual"
+						  " environment");
+		}
+	}
 	if (spec) {
 		vlan_tag = spec->tci;
 		vlan_tag &= mask->tci;
@@ -1169,6 +1355,7 @@ mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
 			     struct rte_flow_error *error)
 {
 	const struct rte_flow_item_ipv4 *mask = item->mask;
+	const struct rte_flow_item_ipv4 *spec = item->spec;
 	const struct rte_flow_item_ipv4 nic_mask = {
 		.hdr = {
 			.src_addr = RTE_BE32(0xffffffff),
@@ -1183,7 +1370,24 @@ mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
 				      MLX5_FLOW_LAYER_OUTER_L4;
 	int ret;
+	uint8_t next_proto = 0xFF;
 
+	if (item_flags & MLX5_FLOW_LAYER_IPIP) {
+		if (mask && spec)
+			next_proto = mask->hdr.next_proto_id &
+				     spec->hdr.next_proto_id;
+		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
+			return rte_flow_error_set(error, EINVAL,
+						  RTE_FLOW_ERROR_TYPE_ITEM,
+						  item,
+						  "multiple tunnel "
+						  "not supported");
+	}
+	if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "wrong tunnel type - IPv6 specified "
+					  "but IPv4 item provided");
 	if (item_flags & l3m)
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
@@ -1192,6 +1396,15 @@ mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
 		return rte_flow_error_set(error, EINVAL,
 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
 					  "L3 cannot follow an L4 layer.");
+	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
+		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "L3 cannot follow an NVGRE layer.");
+	else if (!tunnel && !(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "no L2 layer before IPV4");
 	if (!mask)
 		mask = &rte_flow_item_ipv4_mask;
 	else if (mask->hdr.next_proto_id != 0 &&
@@ -1233,6 +1446,7 @@ mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
 			     struct rte_flow_error *error)
 {
 	const struct rte_flow_item_ipv6 *mask = item->mask;
+	const struct rte_flow_item_ipv6 *spec = item->spec;
 	const struct rte_flow_item_ipv6 nic_mask = {
 		.hdr = {
 			.src_addr =
@@ -1252,7 +1466,23 @@ mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
 				      MLX5_FLOW_LAYER_OUTER_L4;
 	int ret;
+	uint8_t next_proto = 0xFF;
 
+	if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) {
+		if (mask && spec)
+			next_proto = mask->hdr.proto & spec->hdr.proto;
+		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
+			return rte_flow_error_set(error, EINVAL,
+						  RTE_FLOW_ERROR_TYPE_ITEM,
+						  item,
+						  "multiple tunnel "
+						  "not supported");
+	}
+	if (item_flags & MLX5_FLOW_LAYER_IPIP)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "wrong tunnel type - IPv4 specified "
+					  "but IPv6 item provided");
 	if (item_flags & l3m)
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
@@ -1261,6 +1491,15 @@ mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
 		return rte_flow_error_set(error, EINVAL,
 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
 					  "L3 cannot follow an L4 layer.");
+	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
+		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "L3 cannot follow an NVGRE layer.");
+	else if (!tunnel && !(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "no L2 layer before IPV6");
 	if (!mask)
 		mask = &rte_flow_item_ipv6_mask;
 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
@@ -1283,7 +1522,7 @@ mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
  * @param[in] target_protocol
  *   The next protocol in the previous item.
  * @param[in] flow_mask
- *   mlx5 flow-specific (TCF, DV, verbs, etc.) supported header fields mask.
+ *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
  * @param[out] error
  *   Pointer to error structure.
  *
@@ -1553,6 +1792,61 @@ mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
 					  " defined");
 	return 0;
 }
+/**
+ * Validate GRE Key item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit flags to mark detected items.
+ * @param[in] gre_item
+ *   Pointer to gre_item
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
+				uint64_t item_flags,
+				const struct rte_flow_item *gre_item,
+				struct rte_flow_error *error)
+{
+	const rte_be32_t *mask = item->mask;
+	int ret = 0;
+	rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
+	const struct rte_flow_item_gre *gre_spec = gre_item->spec;
+	const struct rte_flow_item_gre *gre_mask = gre_item->mask;
+
+	if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "Multiple GRE key not support");
+	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "No preceding GRE header");
+	if (item_flags & MLX5_FLOW_LAYER_INNER)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "GRE key following a wrong item");
+	if (!gre_mask)
+		gre_mask = &rte_flow_item_gre_mask;
+	if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
+			 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "Key bit must be on");
+
+	if (!mask)
+		mask = &gre_key_default_mask;
+	ret = mlx5_flow_item_acceptable
+		(item, (const uint8_t *)mask,
+		 (const uint8_t *)&gre_key_default_mask,
+		 sizeof(rte_be32_t), error);
+	return ret;
+}
 
 /**
  * Validate GRE item.
@@ -1578,6 +1872,10 @@ mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
 	const struct rte_flow_item_gre *spec __rte_unused = item->spec;
 	const struct rte_flow_item_gre *mask = item->mask;
 	int ret;
+	const struct rte_flow_item_gre nic_mask = {
+		.c_rsvd0_ver = RTE_BE16(0xB000),
+		.protocol = RTE_BE16(UINT16_MAX),
+	};
 
 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
 		return rte_flow_error_set(error, EINVAL,
@@ -1597,10 +1895,11 @@ mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
 		mask = &rte_flow_item_gre_mask;
 	ret = mlx5_flow_item_acceptable
 		(item, (const uint8_t *)mask,
-		 (const uint8_t *)&rte_flow_item_gre_mask,
+		 (const uint8_t *)&nic_mask,
 		 sizeof(struct rte_flow_item_gre), error);
 	if (ret < 0)
 		return ret;
+#ifndef HAVE_MLX5DV_DR
 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
 	if (spec && (spec->protocol & mask->protocol))
 		return rte_flow_error_set(error, ENOTSUP,
@@ -1608,10 +1907,100 @@ mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
 					  "without MPLS support the"
 					  " specification cannot be used for"
 					  " filtering");
+#endif
 #endif
 	return 0;
 }
 
+/**
+ * Validate Geneve item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] itemFlags
+ *   Bit-fields that holds the items detected until now.
+ * @param[in] enPriv
+ *   Pointer to the private data structure.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+
+int
+mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
+			       uint64_t item_flags,
+			       struct rte_eth_dev *dev,
+			       struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	const struct rte_flow_item_geneve *spec = item->spec;
+	const struct rte_flow_item_geneve *mask = item->mask;
+	int ret;
+	uint16_t gbhdr;
+	uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
+			  MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
+	const struct rte_flow_item_geneve nic_mask = {
+		.ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
+		.vni = "\xff\xff\xff",
+		.protocol = RTE_BE16(UINT16_MAX),
+	};
+
+	if (!(priv->config.hca_attr.flex_parser_protocols &
+	      MLX5_HCA_FLEX_GENEVE_ENABLED) ||
+	    !priv->config.hca_attr.tunnel_stateless_geneve_rx)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "L3 Geneve is not enabled by device"
+					  " parameter and/or not configured in"
+					  " firmware");
+	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "multiple tunnel layers not"
+					  " supported");
+	/*
+	 * Verify only UDPv4 is present as defined in
+	 * https://tools.ietf.org/html/rfc7348
+	 */
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "no outer UDP layer found");
+	if (!mask)
+		mask = &rte_flow_item_geneve_mask;
+	ret = mlx5_flow_item_acceptable
+				  (item, (const uint8_t *)mask,
+				   (const uint8_t *)&nic_mask,
+				   sizeof(struct rte_flow_item_geneve), error);
+	if (ret)
+		return ret;
+	if (spec) {
+		gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
+		if (MLX5_GENEVE_VER_VAL(gbhdr) ||
+		     MLX5_GENEVE_CRITO_VAL(gbhdr) ||
+		     MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ITEM,
+						  item,
+						  "Geneve protocol unsupported"
+						  " fields are being used");
+		if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
+			return rte_flow_error_set
+					(error, ENOTSUP,
+					 RTE_FLOW_ERROR_TYPE_ITEM,
+					 item,
+					 "Unsupported Geneve options length");
+	}
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
+		return rte_flow_error_set
+				    (error, ENOTSUP,
+				     RTE_FLOW_ERROR_TYPE_ITEM, item,
+				     "Geneve tunnel must be fully defined");
+	return 0;
+}
+
 /**
  * Validate MPLS item.
  *
@@ -1678,24 +2067,75 @@ mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
 				  " update.");
 }
 
+/**
+ * Validate NVGRE item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit flags to mark detected items.
+ * @param[in] target_protocol
+ *   The next protocol in the previous item.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
+			      uint64_t item_flags,
+			      uint8_t target_protocol,
+			      struct rte_flow_error *error)
+{
+	const struct rte_flow_item_nvgre *mask = item->mask;
+	int ret;
+
+	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "protocol filtering not compatible"
+					  " with this GRE layer");
+	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "multiple tunnel layers not"
+					  " supported");
+	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ITEM, item,
+					  "L3 Layer is missing");
+	if (!mask)
+		mask = &rte_flow_item_nvgre_mask;
+	ret = mlx5_flow_item_acceptable
+		(item, (const uint8_t *)mask,
+		 (const uint8_t *)&rte_flow_item_nvgre_mask,
+		 sizeof(struct rte_flow_item_nvgre), error);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
 static int
 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
 		   const struct rte_flow_attr *attr __rte_unused,
 		   const struct rte_flow_item items[] __rte_unused,
 		   const struct rte_flow_action actions[] __rte_unused,
-		   struct rte_flow_error *error __rte_unused)
+		   bool external __rte_unused,
+		   struct rte_flow_error *error)
 {
-	rte_errno = ENOTSUP;
-	return -rte_errno;
+	return rte_flow_error_set(error, ENOTSUP,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
 }
 
 static struct mlx5_flow *
 flow_null_prepare(const struct rte_flow_attr *attr __rte_unused,
 		  const struct rte_flow_item items[] __rte_unused,
 		  const struct rte_flow_action actions[] __rte_unused,
-		  struct rte_flow_error *error __rte_unused)
+		  struct rte_flow_error *error)
 {
-	rte_errno = ENOTSUP;
+	rte_flow_error_set(error, ENOTSUP,
+			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
 	return NULL;
 }
 
@@ -1705,19 +2145,19 @@ flow_null_translate(struct rte_eth_dev *dev __rte_unused,
 		    const struct rte_flow_attr *attr __rte_unused,
 		    const struct rte_flow_item items[] __rte_unused,
 		    const struct rte_flow_action actions[] __rte_unused,
-		    struct rte_flow_error *error __rte_unused)
+		    struct rte_flow_error *error)
 {
-	rte_errno = ENOTSUP;
-	return -rte_errno;
+	return rte_flow_error_set(error, ENOTSUP,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
 }
 
 static int
 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
 		struct rte_flow *flow __rte_unused,
-		struct rte_flow_error *error __rte_unused)
+		struct rte_flow_error *error)
 {
-	rte_errno = ENOTSUP;
-	return -rte_errno;
+	return rte_flow_error_set(error, ENOTSUP,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
 }
 
 static void
@@ -1737,10 +2177,10 @@ flow_null_query(struct rte_eth_dev *dev __rte_unused,
 		struct rte_flow *flow __rte_unused,
 		const struct rte_flow_action *actions __rte_unused,
 		void *data __rte_unused,
-		struct rte_flow_error *error __rte_unused)
+		struct rte_flow_error *error)
 {
-	rte_errno = ENOTSUP;
-	return -rte_errno;
+	return rte_flow_error_set(error, ENOTSUP,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
 }
 
 /* Void driver to protect from null pointer reference. */
@@ -1772,9 +2212,9 @@ flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX;
 
-	if (attr->transfer)
-		type = MLX5_FLOW_TYPE_TCF;
-	else
+	if (attr->transfer && priv->config.dv_esw_en)
+		type = MLX5_FLOW_TYPE_DV;
+	if (!attr->transfer)
 		type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
 						 MLX5_FLOW_TYPE_VERBS;
 	return type;
@@ -1794,24 +2234,26 @@ flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
  *   Pointer to the list of items.
  * @param[in] actions
  *   Pointer to the list of actions.
+ * @param[in] external
+ *   This flow rule is created by request external to PMD.
  * @param[out] error
  *   Pointer to the error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static inline int
 flow_drv_validate(struct rte_eth_dev *dev,
 		  const struct rte_flow_attr *attr,
 		  const struct rte_flow_item items[],
 		  const struct rte_flow_action actions[],
-		  struct rte_flow_error *error)
+		  bool external, struct rte_flow_error *error)
 {
 	const struct mlx5_flow_driver_ops *fops;
 	enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
 
 	fops = flow_get_drv_ops(type);
-	return fops->validate(dev, attr, items, actions, error);
+	return fops->validate(dev, attr, items, actions, external, error);
 }
 
 /**
@@ -1821,7 +2263,7 @@ flow_drv_validate(struct rte_eth_dev *dev,
  * initializes the device flow and returns the pointer.
  *
  * @note
- *   This function initializes device flow structure such as dv, tcf or verbs in
+ *   This function initializes device flow structure such as dv or verbs in
  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
  *   rest. For example, adding returning device flow to flow->dev_flow list and
  *   setting backward reference to the flow should be done out of this function.
@@ -1837,7 +2279,7 @@ flow_drv_validate(struct rte_eth_dev *dev,
  *   Pointer to the error structure.
  *
  * @return
- *   Pointer to device flow on success, otherwise NULL and rte_ernno is set.
+ *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
  */
 static inline struct mlx5_flow *
 flow_drv_prepare(const struct rte_flow *flow,
@@ -1881,7 +2323,7 @@ flow_drv_prepare(const struct rte_flow *flow,
  *   Pointer to the error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static inline int
 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
@@ -1983,7 +2425,7 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
 {
 	int ret;
 
-	ret = flow_drv_validate(dev, attr, items, actions, error);
+	ret = flow_drv_validate(dev, attr, items, actions, true, error);
 	if (ret < 0)
 		return ret;
 	return 0;
@@ -2045,6 +2487,8 @@ find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
  *   Pattern specification (list terminated by the END pattern item).
  * @param[in] actions
  *   Associated actions (list terminated by the END action).
+ * @param[in] external
+ *   This flow rule is created by request external to PMD.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
@@ -2056,7 +2500,7 @@ flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list,
 		 const struct rte_flow_attr *attr,
 		 const struct rte_flow_item items[],
 		 const struct rte_flow_action actions[],
-		 struct rte_flow_error *error)
+		 bool external, struct rte_flow_error *error)
 {
 	struct rte_flow *flow = NULL;
 	struct mlx5_flow *dev_flow;
@@ -2070,7 +2514,7 @@ flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list,
 	uint32_t i;
 	uint32_t flow_size;
 
-	ret = flow_drv_validate(dev, attr, items, actions, error);
+	ret = flow_drv_validate(dev, attr, items, actions, external, error);
 	if (ret < 0)
 		return NULL;
 	flow_size = sizeof(struct rte_flow);
@@ -2081,7 +2525,13 @@ flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list,
 	else
 		flow_size += RTE_ALIGN_CEIL(sizeof(uint16_t), sizeof(void *));
 	flow = rte_calloc(__func__, 1, flow_size, 0);
+	if (!flow) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
 	flow->drv_type = flow_get_drv_type(dev, attr);
+	flow->ingress = attr->ingress;
+	flow->transfer = attr->transfer;
 	assert(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
 	       flow->drv_type < MLX5_FLOW_TYPE_MAX);
 	flow->queue = (void *)(flow + 1);
@@ -2106,6 +2556,7 @@ flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list,
 		if (!dev_flow)
 			goto error;
 		dev_flow->flow = flow;
+		dev_flow->external = external;
 		LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
 		ret = flow_drv_translate(dev, dev_flow, attr,
 					 buf->entry[i].pattern,
@@ -2130,6 +2581,55 @@ error:
 	return NULL;
 }
 
+/**
+ * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
+ * incoming packets to table 1.
+ *
+ * Other flow rules, requested for group n, will be created in
+ * e-switch table n+1.
+ * Jump action to e-switch group n will be created to group n+1.
+ *
+ * Used when working in switchdev mode, to utilise advantages of table 1
+ * and above.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ *
+ * @return
+ *   Pointer to flow on success, NULL otherwise and rte_errno is set.
+ */
+struct rte_flow *
+mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
+{
+	const struct rte_flow_attr attr = {
+		.group = 0,
+		.priority = 0,
+		.ingress = 1,
+		.egress = 0,
+		.transfer = 1,
+	};
+	const struct rte_flow_item pattern = {
+		.type = RTE_FLOW_ITEM_TYPE_END,
+	};
+	struct rte_flow_action_jump jump = {
+		.group = 1,
+	};
+	const struct rte_flow_action actions[] = {
+		{
+			.type = RTE_FLOW_ACTION_TYPE_JUMP,
+			.conf = &jump,
+		},
+		{
+			.type = RTE_FLOW_ACTION_TYPE_END,
+		},
+	};
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct rte_flow_error error;
+
+	return flow_list_create(dev, &priv->ctrl_flows, &attr, &pattern,
+				actions, false, &error);
+}
+
 /**
  * Create a flow.
  *
@@ -2143,10 +2643,10 @@ mlx5_flow_create(struct rte_eth_dev *dev,
 		 const struct rte_flow_action actions[],
 		 struct rte_flow_error *error)
 {
-	struct mlx5_priv *priv = (struct mlx5_priv *)dev->data->dev_private;
+	struct mlx5_priv *priv = dev->data->dev_private;
 
 	return flow_list_create(dev, &priv->flows,
-				attr, items, actions, error);
+				attr, items, actions, true, error);
 }
 
 /**
@@ -2343,7 +2843,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
 	for (i = 0; i != priv->reta_idx_n; ++i)
 		queue[i] = (*priv->reta_idx)[i];
 	flow = flow_list_create(dev, &priv->ctrl_flows,
-				&attr, items, actions, &error);
+				&attr, items, actions, false, &error);
 	if (!flow)
 		return -rte_errno;
 	return 0;
@@ -2535,13 +3035,13 @@ flow_fdir_filter_convert(struct rte_eth_dev *dev,
 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
 	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
-		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+		attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){
 			.src_addr = input->flow.ip4_flow.src_ip,
 			.dst_addr = input->flow.ip4_flow.dst_ip,
 			.time_to_live = input->flow.ip4_flow.ttl,
 			.type_of_service = input->flow.ip4_flow.tos,
 		};
-		attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
+		attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){
 			.src_addr = mask->ipv4_mask.src_ip,
 			.dst_addr = mask->ipv4_mask.dst_ip,
 			.time_to_live = mask->ipv4_mask.ttl,
@@ -2557,7 +3057,7 @@ flow_fdir_filter_convert(struct rte_eth_dev *dev,
 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
 	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
-		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+		attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){
 			.hop_limits = input->flow.ipv6_flow.hop_limits,
 			.proto = input->flow.ipv6_flow.proto,
 		};
@@ -2589,11 +3089,11 @@ flow_fdir_filter_convert(struct rte_eth_dev *dev,
 	/* Handle L4. */
 	switch (fdir_filter->input.flow_type) {
 	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
-		attributes->l4.udp.hdr = (struct udp_hdr){
+		attributes->l4.udp.hdr = (struct rte_udp_hdr){
 			.src_port = input->flow.udp4_flow.src_port,
 			.dst_port = input->flow.udp4_flow.dst_port,
 		};
-		attributes->l4_mask.udp.hdr = (struct udp_hdr){
+		attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
 			.src_port = mask->src_port_mask,
 			.dst_port = mask->dst_port_mask,
 		};
@@ -2604,11 +3104,11 @@ flow_fdir_filter_convert(struct rte_eth_dev *dev,
 		};
 		break;
 	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
-		attributes->l4.tcp.hdr = (struct tcp_hdr){
+		attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
 			.src_port = input->flow.tcp4_flow.src_port,
 			.dst_port = input->flow.tcp4_flow.dst_port,
 		};
-		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
+		attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
 			.src_port = mask->src_port_mask,
 			.dst_port = mask->dst_port_mask,
 		};
@@ -2619,11 +3119,11 @@ flow_fdir_filter_convert(struct rte_eth_dev *dev,
 		};
 		break;
 	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
-		attributes->l4.udp.hdr = (struct udp_hdr){
+		attributes->l4.udp.hdr = (struct rte_udp_hdr){
 			.src_port = input->flow.udp6_flow.src_port,
 			.dst_port = input->flow.udp6_flow.dst_port,
 		};
-		attributes->l4_mask.udp.hdr = (struct udp_hdr){
+		attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
 			.src_port = mask->src_port_mask,
 			.dst_port = mask->dst_port_mask,
 		};
@@ -2634,11 +3134,11 @@ flow_fdir_filter_convert(struct rte_eth_dev *dev,
 		};
 		break;
 	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
-		attributes->l4.tcp.hdr = (struct tcp_hdr){
+		attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
 			.src_port = input->flow.tcp6_flow.src_port,
 			.dst_port = input->flow.tcp6_flow.dst_port,
 		};
-		attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
+		attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
 			.src_port = mask->src_port_mask,
 			.dst_port = mask->dst_port_mask,
 		};
@@ -2757,7 +3257,8 @@ flow_fdir_filter_add(struct rte_eth_dev *dev,
 		goto error;
 	}
 	flow = flow_list_create(dev, &priv->flows, &fdir_flow->attr,
-				fdir_flow->items, fdir_flow->actions, NULL);
+				fdir_flow->items, fdir_flow->actions, true,
+				NULL);
 	if (!flow)
 		goto error;
 	assert(!flow->fdir);
@@ -2958,3 +3459,186 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
 	}
 	return 0;
 }
+
+#define MLX5_POOL_QUERY_FREQ_US 1000000
+
+/**
+ * Set the periodic procedure for triggering asynchronous batch queries for all
+ * the counter pools.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_ibv_shared object.
+ */
+void
+mlx5_set_query_alarm(struct mlx5_ibv_shared *sh)
+{
+	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(sh, 0, 0);
+	uint32_t pools_n = rte_atomic16_read(&cont->n_valid);
+	uint32_t us;
+
+	cont = MLX5_CNT_CONTAINER(sh, 1, 0);
+	pools_n += rte_atomic16_read(&cont->n_valid);
+	us = MLX5_POOL_QUERY_FREQ_US / pools_n;
+	DRV_LOG(DEBUG, "Set alarm for %u pools each %u us\n", pools_n, us);
+	if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
+		sh->cmng.query_thread_on = 0;
+		DRV_LOG(ERR, "Cannot reinitialize query alarm\n");
+	} else {
+		sh->cmng.query_thread_on = 1;
+	}
+}
+
+/**
+ * The periodic procedure for triggering asynchronous batch queries for all the
+ * counter pools. This function is probably called by the host thread.
+ *
+ * @param[in] arg
+ *   The parameter for the alarm process.
+ */
+void
+mlx5_flow_query_alarm(void *arg)
+{
+	struct mlx5_ibv_shared *sh = arg;
+	struct mlx5_devx_obj *dcs;
+	uint16_t offset;
+	int ret;
+	uint8_t batch = sh->cmng.batch;
+	uint16_t pool_index = sh->cmng.pool_index;
+	struct mlx5_pools_container *cont;
+	struct mlx5_pools_container *mcont;
+	struct mlx5_flow_counter_pool *pool;
+
+	if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
+		goto set_alarm;
+next_container:
+	cont = MLX5_CNT_CONTAINER(sh, batch, 1);
+	mcont = MLX5_CNT_CONTAINER(sh, batch, 0);
+	/* Check if resize was done and need to flip a container. */
+	if (cont != mcont) {
+		if (cont->pools) {
+			/* Clean the old container. */
+			rte_free(cont->pools);
+			memset(cont, 0, sizeof(*cont));
+		}
+		rte_cio_wmb();
+		 /* Flip the host container. */
+		sh->cmng.mhi[batch] ^= (uint8_t)2;
+		cont = mcont;
+	}
+	if (!cont->pools) {
+		/* 2 empty containers case is unexpected. */
+		if (unlikely(batch != sh->cmng.batch))
+			goto set_alarm;
+		batch ^= 0x1;
+		pool_index = 0;
+		goto next_container;
+	}
+	pool = cont->pools[pool_index];
+	if (pool->raw_hw)
+		/* There is a pool query in progress. */
+		goto set_alarm;
+	pool->raw_hw =
+		LIST_FIRST(&sh->cmng.free_stat_raws);
+	if (!pool->raw_hw)
+		/* No free counter statistics raw memory. */
+		goto set_alarm;
+	dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read
+							      (&pool->a64_dcs);
+	offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL;
+	ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL -
+					       offset, NULL, NULL,
+					       pool->raw_hw->mem_mng->dm->id,
+					       (void *)(uintptr_t)
+					       (pool->raw_hw->data + offset),
+					       sh->devx_comp,
+					       (uint64_t)(uintptr_t)pool);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
+			" %d\n", pool->min_dcs->id);
+		pool->raw_hw = NULL;
+		goto set_alarm;
+	}
+	pool->raw_hw->min_dcs_id = dcs->id;
+	LIST_REMOVE(pool->raw_hw, next);
+	sh->cmng.pending_queries++;
+	pool_index++;
+	if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
+		batch ^= 0x1;
+		pool_index = 0;
+	}
+set_alarm:
+	sh->cmng.batch = batch;
+	sh->cmng.pool_index = pool_index;
+	mlx5_set_query_alarm(sh);
+}
+
+/**
+ * Handler for the HW respond about ready values from an asynchronous batch
+ * query. This function is probably called by the host thread.
+ *
+ * @param[in] sh
+ *   The pointer to the shared IB device context.
+ * @param[in] async_id
+ *   The Devx async ID.
+ * @param[in] status
+ *   The status of the completion.
+ */
+void
+mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh,
+				  uint64_t async_id, int status)
+{
+	struct mlx5_flow_counter_pool *pool =
+		(struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
+	struct mlx5_counter_stats_raw *raw_to_free;
+
+	if (unlikely(status)) {
+		raw_to_free = pool->raw_hw;
+	} else {
+		raw_to_free = pool->raw;
+		rte_spinlock_lock(&pool->sl);
+		pool->raw = pool->raw_hw;
+		rte_spinlock_unlock(&pool->sl);
+		rte_atomic64_add(&pool->query_gen, 1);
+		/* Be sure the new raw counters data is updated in memory. */
+		rte_cio_wmb();
+	}
+	LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
+	pool->raw_hw = NULL;
+	sh->cmng.pending_queries--;
+}
+
+/**
+ * Translate the rte_flow group index to HW table value.
+ *
+ * @param[in] attributes
+ *   Pointer to flow attributes
+ * @param[in] external
+ *   Value is part of flow rule created by request external to PMD.
+ * @param[in] group
+ *   rte_flow group index value.
+ * @param[out] table
+ *   HW table value.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external,
+			 uint32_t group, uint32_t *table,
+			 struct rte_flow_error *error)
+{
+	if (attributes->transfer && external) {
+		if (group == UINT32_MAX)
+			return rte_flow_error_set
+						(error, EINVAL,
+						 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+						 NULL,
+						 "group index not supported");
+		*table = group + 1;
+	} else {
+		*table = group;
+	}
+	return 0;
+}