net/mlx5: support MPLS-in-GRE and MPLS-in-UDP
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
index 093e3a2..3af9524 100644 (file)
@@ -18,6 +18,7 @@
 #endif
 
 #include <rte_common.h>
+#include <rte_ether.h>
 #include <rte_eth_ctrl.h>
 #include <rte_ethdev_driver.h>
 #include <rte_flow.h>
 #include "mlx5_prm.h"
 #include "mlx5_glue.h"
 
-/* Define minimal priority for control plane flows. */
-#define MLX5_CTRL_FLOW_PRIORITY 4
+/* Flow priority for control plane flows. */
+#define MLX5_CTRL_FLOW_PRIORITY 1
 
 /* Internet Protocol versions. */
 #define MLX5_IPV4 4
 #define MLX5_IPV6 6
+#define MLX5_GRE 47
 
 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
 struct ibv_flow_spec_counter_action {
@@ -49,6 +51,7 @@ extern const struct eth_dev_ops mlx5_dev_ops_isolate;
 
 /** Structure give to the conversion functions. */
 struct mlx5_flow_data {
+       struct rte_eth_dev *dev; /** Ethernet device. */
        struct mlx5_flow_parse *parser; /** Parser context. */
        struct rte_flow_error *error; /** Error context. */
 };
@@ -88,6 +91,21 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
                       const void *default_mask,
                       struct mlx5_flow_data *data);
 
+static int
+mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
+                          const void *default_mask,
+                          struct mlx5_flow_data *data);
+
+static int
+mlx5_flow_create_gre(const struct rte_flow_item *item,
+                    const void *default_mask,
+                    struct mlx5_flow_data *data);
+
+static int
+mlx5_flow_create_mpls(const struct rte_flow_item *item,
+                     const void *default_mask,
+                     struct mlx5_flow_data *data);
+
 struct mlx5_flow_parse;
 
 static void
@@ -109,6 +127,7 @@ enum hash_rxq_type {
        HASH_RXQ_UDPV6,
        HASH_RXQ_IPV6,
        HASH_RXQ_ETH,
+       HASH_RXQ_TUNNEL,
 };
 
 /* Initialization data for hash RX queue. */
@@ -127,7 +146,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                IBV_RX_HASH_SRC_PORT_TCP |
                                IBV_RX_HASH_DST_PORT_TCP),
                .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
-               .flow_priority = 1,
+               .flow_priority = 0,
                .ip_version = MLX5_IPV4,
        },
        [HASH_RXQ_UDPV4] = {
@@ -136,7 +155,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                IBV_RX_HASH_SRC_PORT_UDP |
                                IBV_RX_HASH_DST_PORT_UDP),
                .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
-               .flow_priority = 1,
+               .flow_priority = 0,
                .ip_version = MLX5_IPV4,
        },
        [HASH_RXQ_IPV4] = {
@@ -144,7 +163,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                IBV_RX_HASH_DST_IPV4),
                .dpdk_rss_hf = (ETH_RSS_IPV4 |
                                ETH_RSS_FRAG_IPV4),
-               .flow_priority = 2,
+               .flow_priority = 1,
                .ip_version = MLX5_IPV4,
        },
        [HASH_RXQ_TCPV6] = {
@@ -153,7 +172,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                IBV_RX_HASH_SRC_PORT_TCP |
                                IBV_RX_HASH_DST_PORT_TCP),
                .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
-               .flow_priority = 1,
+               .flow_priority = 0,
                .ip_version = MLX5_IPV6,
        },
        [HASH_RXQ_UDPV6] = {
@@ -162,7 +181,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                IBV_RX_HASH_SRC_PORT_UDP |
                                IBV_RX_HASH_DST_PORT_UDP),
                .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
-               .flow_priority = 1,
+               .flow_priority = 0,
                .ip_version = MLX5_IPV6,
        },
        [HASH_RXQ_IPV6] = {
@@ -170,13 +189,13 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                IBV_RX_HASH_DST_IPV6),
                .dpdk_rss_hf = (ETH_RSS_IPV6 |
                                ETH_RSS_FRAG_IPV6),
-               .flow_priority = 2,
+               .flow_priority = 1,
                .ip_version = MLX5_IPV6,
        },
        [HASH_RXQ_ETH] = {
                .hash_fields = 0,
                .dpdk_rss_hf = 0,
-               .flow_priority = 3,
+               .flow_priority = 2,
        },
 };
 
@@ -218,6 +237,7 @@ struct rte_flow {
        struct rte_flow_action_rss rss_conf; /**< RSS configuration */
        uint16_t (*queues)[]; /**< Queues indexes to use. */
        uint8_t rss_key[40]; /**< copy of the RSS key. */
+       uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
        struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
        struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
        struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
@@ -230,6 +250,33 @@ struct rte_flow {
                __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
        }
 
+#define IS_TUNNEL(type) ( \
+       (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
+       (type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
+       (type) == RTE_FLOW_ITEM_TYPE_GRE || \
+       (type) == RTE_FLOW_ITEM_TYPE_MPLS)
+
+const uint32_t flow_ptype[] = {
+       [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
+       [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
+       [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
+       [RTE_FLOW_ITEM_TYPE_MPLS] = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
+};
+
+#define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
+
+const uint32_t ptype_ext[] = {
+       [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
+                                             RTE_PTYPE_L4_UDP,
+       [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)] = RTE_PTYPE_TUNNEL_VXLAN_GPE |
+                                                 RTE_PTYPE_L4_UDP,
+       [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
+       [PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)] =
+               RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
+       [PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)] =
+               RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP,
+};
+
 /** Structure to generate a simple graph of layers supported by the NIC. */
 struct mlx5_flow_items {
        /** List of possible actions for these items. */
@@ -283,7 +330,9 @@ static const enum rte_flow_action_type valid_actions[] = {
 static const struct mlx5_flow_items mlx5_flow_items[] = {
        [RTE_FLOW_ITEM_TYPE_END] = {
                .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
-                              RTE_FLOW_ITEM_TYPE_VXLAN),
+                              RTE_FLOW_ITEM_TYPE_VXLAN,
+                              RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
+                              RTE_FLOW_ITEM_TYPE_GRE),
        },
        [RTE_FLOW_ITEM_TYPE_ETH] = {
                .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
@@ -306,6 +355,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
                .actions = valid_actions,
                .mask = &(const struct rte_flow_item_vlan){
                        .tci = -1,
+                       .inner_type = -1,
                },
                .default_mask = &rte_flow_item_vlan_mask,
                .mask_sz = sizeof(struct rte_flow_item_vlan),
@@ -314,7 +364,8 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
        },
        [RTE_FLOW_ITEM_TYPE_IPV4] = {
                .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
-                              RTE_FLOW_ITEM_TYPE_TCP),
+                              RTE_FLOW_ITEM_TYPE_TCP,
+                              RTE_FLOW_ITEM_TYPE_GRE),
                .actions = valid_actions,
                .mask = &(const struct rte_flow_item_ipv4){
                        .hdr = {
@@ -331,7 +382,8 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
        },
        [RTE_FLOW_ITEM_TYPE_IPV6] = {
                .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
-                              RTE_FLOW_ITEM_TYPE_TCP),
+                              RTE_FLOW_ITEM_TYPE_TCP,
+                              RTE_FLOW_ITEM_TYPE_GRE),
                .actions = valid_actions,
                .mask = &(const struct rte_flow_item_ipv6){
                        .hdr = {
@@ -358,7 +410,9 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
                .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
        },
        [RTE_FLOW_ITEM_TYPE_UDP] = {
-               .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
+               .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
+                              RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
+                              RTE_FLOW_ITEM_TYPE_MPLS),
                .actions = valid_actions,
                .mask = &(const struct rte_flow_item_udp){
                        .hdr = {
@@ -384,8 +438,43 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
                .convert = mlx5_flow_create_tcp,
                .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
        },
+       [RTE_FLOW_ITEM_TYPE_GRE] = {
+               .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
+                              RTE_FLOW_ITEM_TYPE_IPV4,
+                              RTE_FLOW_ITEM_TYPE_IPV6,
+                              RTE_FLOW_ITEM_TYPE_MPLS),
+               .actions = valid_actions,
+               .mask = &(const struct rte_flow_item_gre){
+                       .protocol = -1,
+               },
+               .default_mask = &rte_flow_item_gre_mask,
+               .mask_sz = sizeof(struct rte_flow_item_gre),
+               .convert = mlx5_flow_create_gre,
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+               .dst_sz = sizeof(struct ibv_flow_spec_gre),
+#else
+               .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
+#endif
+       },
+       [RTE_FLOW_ITEM_TYPE_MPLS] = {
+               .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
+                              RTE_FLOW_ITEM_TYPE_IPV4,
+                              RTE_FLOW_ITEM_TYPE_IPV6),
+               .actions = valid_actions,
+               .mask = &(const struct rte_flow_item_mpls){
+                       .label_tc_s = "\xff\xff\xf0",
+               },
+               .default_mask = &rte_flow_item_mpls_mask,
+               .mask_sz = sizeof(struct rte_flow_item_mpls),
+               .convert = mlx5_flow_create_mpls,
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+               .dst_sz = sizeof(struct ibv_flow_spec_mpls),
+#endif
+       },
        [RTE_FLOW_ITEM_TYPE_VXLAN] = {
-               .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
+               .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
+                              RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
+                              RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
                .actions = valid_actions,
                .mask = &(const struct rte_flow_item_vxlan){
                        .vni = "\xff\xff\xff",
@@ -395,11 +484,24 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
                .convert = mlx5_flow_create_vxlan,
                .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
        },
+       [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = {
+               .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
+                              RTE_FLOW_ITEM_TYPE_IPV4,
+                              RTE_FLOW_ITEM_TYPE_IPV6),
+               .actions = valid_actions,
+               .mask = &(const struct rte_flow_item_vxlan_gpe){
+                       .vni = "\xff\xff\xff",
+               },
+               .default_mask = &rte_flow_item_vxlan_gpe_mask,
+               .mask_sz = sizeof(struct rte_flow_item_vxlan_gpe),
+               .convert = mlx5_flow_create_vxlan_gpe,
+               .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
+       },
 };
 
 /** Structure to pass to the conversion function. */
 struct mlx5_flow_parse {
-       uint32_t inner; /**< Set once VXLAN is encountered. */
+       uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
        uint32_t create:1;
        /**< Whether resources should remain after a validate. */
        uint32_t drop:1; /**< Target is a drop queue. */
@@ -410,12 +512,15 @@ struct mlx5_flow_parse {
        uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
        uint8_t rss_key[40]; /**< copy of the RSS key. */
        enum hash_rxq_type layer; /**< Last pattern layer detected. */
+       enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
+       uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
        struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
        struct {
                struct ibv_flow_attr *ibv_attr;
                /**< Pointer to Verbs attributes. */
                unsigned int offset;
                /**< Current position or total size of the attribute. */
+               uint64_t hash_fields; /**< Verbs hash fields. */
        } queue[RTE_DIM(hash_rxq_init)];
 };
 
@@ -465,7 +570,7 @@ struct ibv_spec_header {
 };
 
 /**
- * Check support for a given item.
+ * Check item is fully supported by the NIC matching capability.
  *
  * @param item[in]
  *   Item specification.
@@ -482,60 +587,33 @@ static int
 mlx5_flow_item_validate(const struct rte_flow_item *item,
                        const uint8_t *mask, unsigned int size)
 {
-       if (!item->spec && (item->mask || item->last)) {
-               rte_errno = EINVAL;
-               return -rte_errno;
-       }
-       if (item->spec && !item->mask) {
-               unsigned int i;
-               const uint8_t *spec = item->spec;
-
-               for (i = 0; i < size; ++i)
-                       if ((spec[i] | mask[i]) != mask[i]) {
-                               rte_errno = EINVAL;
-                               return -rte_errno;
-                       }
-       }
-       if (item->last && !item->mask) {
-               unsigned int i;
-               const uint8_t *spec = item->last;
-
-               for (i = 0; i < size; ++i)
-                       if ((spec[i] | mask[i]) != mask[i]) {
-                               rte_errno = EINVAL;
-                               return -rte_errno;
-                       }
-       }
-       if (item->mask) {
-               unsigned int i;
-               const uint8_t *spec = item->spec;
-
-               for (i = 0; i < size; ++i)
-                       if ((spec[i] | mask[i]) != mask[i]) {
-                               rte_errno = EINVAL;
-                               return -rte_errno;
-                       }
-       }
-       if (item->spec && item->last) {
-               uint8_t spec[size];
-               uint8_t last[size];
-               const uint8_t *apply = mask;
-               unsigned int i;
-               int ret;
+       unsigned int i;
+       const uint8_t *spec = item->spec;
+       const uint8_t *last = item->last;
+       const uint8_t *m = item->mask ? item->mask : mask;
 
-               if (item->mask)
-                       apply = item->mask;
-               for (i = 0; i < size; ++i) {
-                       spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
-                       last[i] = ((const uint8_t *)item->last)[i] & apply[i];
-               }
-               ret = memcmp(spec, last, size);
-               if (ret != 0) {
-                       rte_errno = EINVAL;
-                       return -rte_errno;
-               }
+       if (!spec && (item->mask || last))
+               goto error;
+       if (!spec)
+               return 0;
+       /*
+        * Single-pass check to make sure that:
+        * - item->mask is supported, no bits are set outside mask.
+        * - Both masked item->spec and item->last are equal (no range
+        *   supported).
+        */
+       for (i = 0; i < size; i++) {
+               if (!m[i])
+                       continue;
+               if ((m[i] | mask[i]) != mask[i])
+                       goto error;
+               if (last && ((spec[i] & m[i]) != (last[i] & m[i])))
+                       goto error;
        }
        return 0;
+error:
+       rte_errno = ENOTSUP;
+       return -rte_errno;
 }
 
 /**
@@ -574,6 +652,13 @@ mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
                                   "egress is not supported");
                return -rte_errno;
        }
+       if (attr->transfer) {
+               rte_flow_error_set(error, ENOTSUP,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
+                                  NULL,
+                                  "transfer is not supported");
+               return -rte_errno;
+       }
        if (!attr->ingress) {
                rte_flow_error_set(error, ENOTSUP,
                                   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
@@ -652,6 +737,24 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
                                                   " function is Toeplitz");
                                return -rte_errno;
                        }
+#ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+                       if (parser->rss_conf.level > 1) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ACTION,
+                                                  actions,
+                                                  "a nonzero RSS encapsulation"
+                                                  " level is not supported");
+                               return -rte_errno;
+                       }
+#endif
+                       if (parser->rss_conf.level > 2) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ACTION,
+                                                  actions,
+                                                  "RSS encapsulation level"
+                                                  " > 1 is not supported");
+                               return -rte_errno;
+                       }
                        if (rss->types & MLX5_RSS_HF_MASK) {
                                rte_flow_error_set(error, EINVAL,
                                                   RTE_FLOW_ERROR_TYPE_ACTION,
@@ -702,6 +805,7 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
                        }
                        parser->rss_conf = (struct rte_flow_action_rss){
                                .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+                               .level = rss->level,
                                .types = rss->types,
                                .key_len = rss_key_len,
                                .queue_num = rss->queue_num,
@@ -785,12 +889,15 @@ exit_action_overlap:
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
+mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
+                                const struct rte_flow_item items[],
                                 struct rte_flow_error *error,
                                 struct mlx5_flow_parse *parser)
 {
+       struct priv *priv = dev->data->dev_private;
        const struct mlx5_flow_items *cur_item = mlx5_flow_items;
        unsigned int i;
+       unsigned int last_voids = 0;
        int ret = 0;
 
        /* Initialise the offsets to start after verbs attribute. */
@@ -800,8 +907,10 @@ mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
                const struct mlx5_flow_items *token = NULL;
                unsigned int n;
 
-               if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+               if (items->type == RTE_FLOW_ITEM_TYPE_VOID) {
+                       last_voids++;
                        continue;
+               }
                for (i = 0;
                     cur_item->items &&
                     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
@@ -821,16 +930,38 @@ mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
                                              cur_item->mask_sz);
                if (ret)
                        goto exit_item_not_supported;
-               if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
-                       if (parser->inner) {
+               if (IS_TUNNEL(items->type)) {
+                       if (parser->tunnel &&
+                           !((items - last_voids - 1)->type ==
+                             RTE_FLOW_ITEM_TYPE_GRE && items->type ==
+                             RTE_FLOW_ITEM_TYPE_MPLS)) {
+                               rte_flow_error_set(error, ENOTSUP,
+                                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                                  items,
+                                                  "Cannot recognize multiple"
+                                                  " tunnel encapsulations.");
+                               return -rte_errno;
+                       }
+                       if (items->type == RTE_FLOW_ITEM_TYPE_MPLS &&
+                           !priv->config.mpls_en) {
                                rte_flow_error_set(error, ENOTSUP,
                                                   RTE_FLOW_ERROR_TYPE_ITEM,
                                                   items,
-                                                  "cannot recognize multiple"
-                                                  " VXLAN encapsulations");
+                                                  "MPLS not supported or"
+                                                  " disabled in firmware"
+                                                  " configuration.");
+                               return -rte_errno;
+                       }
+                       if (!priv->config.tunnel_en &&
+                           parser->rss_conf.level > 1) {
+                               rte_flow_error_set(error, ENOTSUP,
+                                       RTE_FLOW_ERROR_TYPE_ITEM,
+                                       items,
+                                       "RSS on tunnel is not supported");
                                return -rte_errno;
                        }
                        parser->inner = IBV_FLOW_SPEC_INNER;
+                       parser->tunnel = flow_ptype[items->type];
                }
                if (parser->drop) {
                        parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
@@ -838,6 +969,7 @@ mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
                        for (n = 0; n != hash_rxq_init_n; ++n)
                                parser->queue[n].offset += cur_item->dst_sz;
                }
+               last_voids = 0;
        }
        if (parser->drop) {
                parser->queue[HASH_RXQ_ETH].offset +=
@@ -891,30 +1023,50 @@ mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
  * Make inner packet matching with an higher priority from the non Inner
  * matching.
  *
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param[in, out] parser
  *   Internal parser structure.
  * @param attr
  *   User flow attribute.
  */
 static void
-mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
+mlx5_flow_update_priority(struct rte_eth_dev *dev,
+                         struct mlx5_flow_parse *parser,
                          const struct rte_flow_attr *attr)
 {
+       struct priv *priv = dev->data->dev_private;
        unsigned int i;
+       uint16_t priority;
 
+       /*                      8 priorities    >= 16 priorities
+        * Control flow:        4-7             8-15
+        * User normal flow:    1-3             4-7
+        * User tunnel flow:    0-2             0-3
+        */
+       priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
+       if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
+               priority /= 2;
+       /*
+        * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
+        * priorities, lower 4 otherwise.
+        */
+       if (!parser->inner) {
+               if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
+                       priority += 1;
+               else
+                       priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
+       }
        if (parser->drop) {
-               parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
-                       attr->priority +
-                       hash_rxq_init[HASH_RXQ_ETH].flow_priority;
+               parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
+                               hash_rxq_init[HASH_RXQ_ETH].flow_priority;
                return;
        }
        for (i = 0; i != hash_rxq_init_n; ++i) {
-               if (parser->queue[i].ibv_attr) {
-                       parser->queue[i].ibv_attr->priority =
-                               attr->priority +
-                               hash_rxq_init[i].flow_priority -
-                               (parser->inner ? 1 : 0);
-               }
+               if (!parser->queue[i].ibv_attr)
+                       continue;
+               parser->queue[i].ibv_attr->priority = priority +
+                               hash_rxq_init[i].flow_priority;
        }
 }
 
@@ -927,59 +1079,12 @@ mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
 static void
 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
 {
-       const unsigned int ipv4 =
-               hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
-       const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
-       const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
-       const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
-       const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
-       const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
        unsigned int i;
+       uint32_t inner = parser->inner;
 
-       /* Remove any other flow not matching the pattern. */
-       if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
-               for (i = 0; i != hash_rxq_init_n; ++i) {
-                       if (i == HASH_RXQ_ETH)
-                               continue;
-                       rte_free(parser->queue[i].ibv_attr);
-                       parser->queue[i].ibv_attr = NULL;
-               }
-               return;
-       }
-       if (parser->layer == HASH_RXQ_ETH) {
-               goto fill;
-       } else {
-               /*
-                * This layer becomes useless as the pattern define under
-                * layers.
-                */
-               rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
-               parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
-       }
-       /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
-       for (i = ohmin; i != (ohmax + 1); ++i) {
-               if (!parser->queue[i].ibv_attr)
-                       continue;
-               rte_free(parser->queue[i].ibv_attr);
-               parser->queue[i].ibv_attr = NULL;
-       }
-       /* Remove impossible flow according to the RSS configuration. */
-       if (hash_rxq_init[parser->layer].dpdk_rss_hf &
-           parser->rss_conf.types) {
-               /* Remove any other flow. */
-               for (i = hmin; i != (hmax + 1); ++i) {
-                       if ((i == parser->layer) ||
-                            (!parser->queue[i].ibv_attr))
-                               continue;
-                       rte_free(parser->queue[i].ibv_attr);
-                       parser->queue[i].ibv_attr = NULL;
-               }
-       } else  if (!parser->queue[ip].ibv_attr) {
-               /* no RSS possible with the current configuration. */
-               parser->rss_conf.queue_num = 1;
+       /* Don't create extra flows for outer RSS. */
+       if (parser->tunnel && parser->rss_conf.level < 2)
                return;
-       }
-fill:
        /*
         * Fill missing layers in verbs specifications, or compute the correct
         * offset to allocate the memory space for the attributes and
@@ -990,23 +1095,25 @@ fill:
                        struct ibv_flow_spec_ipv4_ext ipv4;
                        struct ibv_flow_spec_ipv6 ipv6;
                        struct ibv_flow_spec_tcp_udp udp_tcp;
+                       struct ibv_flow_spec_eth eth;
                } specs;
                void *dst;
                uint16_t size;
 
                if (i == parser->layer)
                        continue;
-               if (parser->layer == HASH_RXQ_ETH) {
+               if (parser->layer == HASH_RXQ_ETH ||
+                   parser->layer == HASH_RXQ_TUNNEL) {
                        if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
                                size = sizeof(struct ibv_flow_spec_ipv4_ext);
                                specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
-                                       .type = IBV_FLOW_SPEC_IPV4_EXT,
+                                       .type = inner | IBV_FLOW_SPEC_IPV4_EXT,
                                        .size = size,
                                };
                        } else {
                                size = sizeof(struct ibv_flow_spec_ipv6);
                                specs.ipv6 = (struct ibv_flow_spec_ipv6){
-                                       .type = IBV_FLOW_SPEC_IPV6,
+                                       .type = inner | IBV_FLOW_SPEC_IPV6,
                                        .size = size,
                                };
                        }
@@ -1023,7 +1130,7 @@ fill:
                    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
                        size = sizeof(struct ibv_flow_spec_tcp_udp);
                        specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
-                               .type = ((i == HASH_RXQ_UDPV4 ||
+                               .type = inner | ((i == HASH_RXQ_UDPV4 ||
                                          i == HASH_RXQ_UDPV6) ?
                                         IBV_FLOW_SPEC_UDP :
                                         IBV_FLOW_SPEC_TCP),
@@ -1041,6 +1148,109 @@ fill:
        }
 }
 
+/**
+ * Update flows according to pattern and RSS hash fields.
+ *
+ * @param[in, out] parser
+ *   Internal parser structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
+{
+       unsigned int i;
+       enum hash_rxq_type start;
+       enum hash_rxq_type layer;
+       int outer = parser->tunnel && parser->rss_conf.level < 2;
+       uint64_t rss = parser->rss_conf.types;
+
+       /* Default to outer RSS. */
+       if (!parser->rss_conf.level)
+               parser->rss_conf.level = 1;
+       layer = outer ? parser->out_layer : parser->layer;
+       if (layer == HASH_RXQ_TUNNEL)
+               layer = HASH_RXQ_ETH;
+       if (outer) {
+               /* Only one hash type for outer RSS. */
+               if (rss && layer == HASH_RXQ_ETH) {
+                       start = HASH_RXQ_TCPV4;
+               } else if (rss && layer != HASH_RXQ_ETH &&
+                          !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
+                       /* If RSS not match L4 pattern, try L3 RSS. */
+                       if (layer < HASH_RXQ_IPV4)
+                               layer = HASH_RXQ_IPV4;
+                       else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
+                               layer = HASH_RXQ_IPV6;
+                       start = layer;
+               } else {
+                       start = layer;
+               }
+               /* Scan first valid hash type. */
+               for (i = start; rss && i <= layer; ++i) {
+                       if (!parser->queue[i].ibv_attr)
+                               continue;
+                       if (hash_rxq_init[i].dpdk_rss_hf & rss)
+                               break;
+               }
+               if (rss && i <= layer)
+                       parser->queue[layer].hash_fields =
+                                       hash_rxq_init[i].hash_fields;
+               /* Trim unused hash types. */
+               for (i = 0; i != hash_rxq_init_n; ++i) {
+                       if (parser->queue[i].ibv_attr && i != layer) {
+                               rte_free(parser->queue[i].ibv_attr);
+                               parser->queue[i].ibv_attr = NULL;
+                       }
+               }
+       } else {
+               /* Expand for inner or normal RSS. */
+               if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
+                       start = HASH_RXQ_TCPV4;
+               else if (rss && layer == HASH_RXQ_IPV6)
+                       start = HASH_RXQ_TCPV6;
+               else
+                       start = layer;
+               /* For L4 pattern, try L3 RSS if no L4 RSS. */
+               /* Trim unused hash types. */
+               for (i = 0; i != hash_rxq_init_n; ++i) {
+                       if (!parser->queue[i].ibv_attr)
+                               continue;
+                       if (i < start || i > layer) {
+                               rte_free(parser->queue[i].ibv_attr);
+                               parser->queue[i].ibv_attr = NULL;
+                               continue;
+                       }
+                       if (!rss)
+                               continue;
+                       if (hash_rxq_init[i].dpdk_rss_hf & rss) {
+                               parser->queue[i].hash_fields =
+                                               hash_rxq_init[i].hash_fields;
+                       } else if (i != layer) {
+                               /* Remove unused RSS expansion. */
+                               rte_free(parser->queue[i].ibv_attr);
+                               parser->queue[i].ibv_attr = NULL;
+                       } else if (layer < HASH_RXQ_IPV4 &&
+                                  (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
+                                   rss)) {
+                               /* Allow IPv4 RSS on L4 pattern. */
+                               parser->queue[i].hash_fields =
+                                       hash_rxq_init[HASH_RXQ_IPV4]
+                                               .hash_fields;
+                       } else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
+                                  (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
+                                   rss)) {
+                               /* Allow IPv4 RSS on L4 pattern. */
+                               parser->queue[i].hash_fields =
+                                       hash_rxq_init[HASH_RXQ_IPV6]
+                                               .hash_fields;
+                       }
+               }
+       }
+       return 0;
+}
+
 /**
  * Validate and convert a flow supported by the NIC.
  *
@@ -1084,7 +1294,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
        ret = mlx5_flow_convert_actions(dev, actions, error, parser);
        if (ret)
                return ret;
-       ret = mlx5_flow_convert_items_validate(items, error, parser);
+       ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
        if (ret)
                return ret;
        mlx5_flow_convert_finalise(parser);
@@ -1105,10 +1315,6 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
                for (i = 0; i != hash_rxq_init_n; ++i) {
                        unsigned int offset;
 
-                       if (!(parser->rss_conf.types &
-                             hash_rxq_init[i].dpdk_rss_hf) &&
-                           (i != HASH_RXQ_ETH))
-                               continue;
                        offset = parser->queue[i].offset;
                        parser->queue[i].ibv_attr =
                                mlx5_flow_convert_allocate(offset, error);
@@ -1119,8 +1325,11 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
        }
        /* Third step. Conversion parse, fill the specifications. */
        parser->inner = 0;
+       parser->tunnel = 0;
+       parser->layer = HASH_RXQ_ETH;
        for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
                struct mlx5_flow_data data = {
+                       .dev = dev,
                        .parser = parser,
                        .error = error,
                };
@@ -1136,6 +1345,15 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
                if (ret)
                        goto exit_free;
        }
+       if (!parser->drop) {
+               /* RSS check, remove unused hash types. */
+               ret = mlx5_flow_convert_rss(parser);
+               if (ret)
+                       goto exit_free;
+               /* Complete missing specification. */
+               mlx5_flow_convert_finalise(parser);
+       }
+       mlx5_flow_update_priority(dev, parser, attr);
        if (parser->mark)
                mlx5_flow_create_flag_mark(parser, parser->mark_id);
        if (parser->count && parser->create) {
@@ -1143,13 +1361,6 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
                if (!parser->cs)
                        goto exit_count_error;
        }
-       /*
-        * Last step. Complete missing specification to reach the RSS
-        * configuration.
-        */
-       if (!parser->drop)
-               mlx5_flow_convert_finalise(parser);
-       mlx5_flow_update_priority(parser, attr);
 exit_free:
        /* Only verification is expected, all resources should be released. */
        if (!parser->create) {
@@ -1197,17 +1408,11 @@ mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
        for (i = 0; i != hash_rxq_init_n; ++i) {
                if (!parser->queue[i].ibv_attr)
                        continue;
-               /* Specification must be the same l3 type or none. */
-               if (parser->layer == HASH_RXQ_ETH ||
-                   (hash_rxq_init[parser->layer].ip_version ==
-                    hash_rxq_init[i].ip_version) ||
-                   (hash_rxq_init[i].ip_version == 0)) {
-                       dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
-                                       parser->queue[i].offset);
-                       memcpy(dst, src, size);
-                       ++parser->queue[i].ibv_attr->num_of_specs;
-                       parser->queue[i].offset += size;
-               }
+               dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
+                               parser->queue[i].offset);
+               memcpy(dst, src, size);
+               ++parser->queue[i].ibv_attr->num_of_specs;
+               parser->queue[i].offset += size;
        }
 }
 
@@ -1238,9 +1443,7 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
                .size = eth_size,
        };
 
-       /* Don't update layer for the inner pattern. */
-       if (!parser->inner)
-               parser->layer = HASH_RXQ_ETH;
+       parser->layer = HASH_RXQ_ETH;
        if (spec) {
                unsigned int i;
 
@@ -1286,6 +1489,7 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item,
        struct mlx5_flow_parse *parser = data->parser;
        struct ibv_flow_spec_eth *eth;
        const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+       const char *msg = "VLAN cannot be empty";
 
        if (spec) {
                unsigned int i;
@@ -1307,12 +1511,20 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item,
                         */
                        if (!eth->mask.vlan_tag)
                                goto error;
+                       /* Outer TPID cannot be matched. */
+                       if (eth->mask.ether_type) {
+                               msg = "VLAN TPID matching is not supported";
+                               goto error;
+                       }
+                       eth->val.ether_type = spec->inner_type;
+                       eth->mask.ether_type = mask->inner_type;
+                       eth->val.ether_type &= eth->mask.ether_type;
                }
                return 0;
        }
 error:
        return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
-                                 item, "VLAN cannot be empty");
+                                 item, msg);
 }
 
 /**
@@ -1333,6 +1545,7 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
                      const void *default_mask,
                      struct mlx5_flow_data *data)
 {
+       struct priv *priv = data->dev->data->dev_private;
        const struct rte_flow_item_ipv4 *spec = item->spec;
        const struct rte_flow_item_ipv4 *mask = item->mask;
        struct mlx5_flow_parse *parser = data->parser;
@@ -1342,9 +1555,16 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
                .size = ipv4_size,
        };
 
-       /* Don't update layer for the inner pattern. */
-       if (!parser->inner)
-               parser->layer = HASH_RXQ_IPV4;
+       if (parser->layer == HASH_RXQ_TUNNEL &&
+           parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
+           !priv->config.l3_vxlan_en)
+               return rte_flow_error_set(data->error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM,
+                                         item,
+                                         "L3 VXLAN not enabled by device"
+                                         " parameter and/or not configured"
+                                         " in firmware");
+       parser->layer = HASH_RXQ_IPV4;
        if (spec) {
                if (!mask)
                        mask = default_mask;
@@ -1388,6 +1608,7 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
                      const void *default_mask,
                      struct mlx5_flow_data *data)
 {
+       struct priv *priv = data->dev->data->dev_private;
        const struct rte_flow_item_ipv6 *spec = item->spec;
        const struct rte_flow_item_ipv6 *mask = item->mask;
        struct mlx5_flow_parse *parser = data->parser;
@@ -1397,9 +1618,16 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
                .size = ipv6_size,
        };
 
-       /* Don't update layer for the inner pattern. */
-       if (!parser->inner)
-               parser->layer = HASH_RXQ_IPV6;
+       if (parser->layer == HASH_RXQ_TUNNEL &&
+           parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
+           !priv->config.l3_vxlan_en)
+               return rte_flow_error_set(data->error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM,
+                                         item,
+                                         "L3 VXLAN not enabled by device"
+                                         " parameter and/or not configured"
+                                         " in firmware");
+       parser->layer = HASH_RXQ_IPV6;
        if (spec) {
                unsigned int i;
                uint32_t vtc_flow_val;
@@ -1472,13 +1700,10 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
                .size = udp_size,
        };
 
-       /* Don't update layer for the inner pattern. */
-       if (!parser->inner) {
-               if (parser->layer == HASH_RXQ_IPV4)
-                       parser->layer = HASH_RXQ_UDPV4;
-               else
-                       parser->layer = HASH_RXQ_UDPV6;
-       }
+       if (parser->layer == HASH_RXQ_IPV4)
+               parser->layer = HASH_RXQ_UDPV4;
+       else
+               parser->layer = HASH_RXQ_UDPV6;
        if (spec) {
                if (!mask)
                        mask = default_mask;
@@ -1521,13 +1746,10 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
                .size = tcp_size,
        };
 
-       /* Don't update layer for the inner pattern. */
-       if (!parser->inner) {
-               if (parser->layer == HASH_RXQ_IPV4)
-                       parser->layer = HASH_RXQ_TCPV4;
-               else
-                       parser->layer = HASH_RXQ_TCPV6;
-       }
+       if (parser->layer == HASH_RXQ_IPV4)
+               parser->layer = HASH_RXQ_TCPV4;
+       else
+               parser->layer = HASH_RXQ_TCPV6;
        if (spec) {
                if (!mask)
                        mask = default_mask;
@@ -1576,6 +1798,12 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 
        id.vni[0] = 0;
        parser->inner = IBV_FLOW_SPEC_INNER;
+       parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
+       parser->out_layer = parser->layer;
+       parser->layer = HASH_RXQ_TUNNEL;
+       /* Default VXLAN to outer RSS. */
+       if (!parser->rss_conf.level)
+               parser->rss_conf.level = 1;
        if (spec) {
                if (!mask)
                        mask = default_mask;
@@ -1594,7 +1822,8 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
         * before will also match this rule.
         * To avoid such situation, VNI 0 is currently refused.
         */
-       if (!vxlan.val.tunnel_id)
+       /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
+       if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
                return rte_flow_error_set(data->error, EINVAL,
                                          RTE_FLOW_ERROR_TYPE_ITEM,
                                          item,
@@ -1603,6 +1832,252 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
        return 0;
 }
 
+/**
+ * Convert VXLAN-GPE item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
+                          const void *default_mask,
+                          struct mlx5_flow_data *data)
+{
+       struct priv *priv = data->dev->data->dev_private;
+       const struct rte_flow_item_vxlan_gpe *spec = item->spec;
+       const struct rte_flow_item_vxlan_gpe *mask = item->mask;
+       struct mlx5_flow_parse *parser = data->parser;
+       unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+       struct ibv_flow_spec_tunnel vxlan = {
+               .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
+               .size = size,
+       };
+       union vni {
+               uint32_t vlan_id;
+               uint8_t vni[4];
+       } id;
+
+       if (!priv->config.l3_vxlan_en)
+               return rte_flow_error_set(data->error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM,
+                                         item,
+                                         "L3 VXLAN not enabled by device"
+                                         " parameter and/or not configured"
+                                         " in firmware");
+       id.vni[0] = 0;
+       parser->inner = IBV_FLOW_SPEC_INNER;
+       parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)];
+       parser->out_layer = parser->layer;
+       parser->layer = HASH_RXQ_TUNNEL;
+       /* Default VXLAN-GPE to outer RSS. */
+       if (!parser->rss_conf.level)
+               parser->rss_conf.level = 1;
+       if (spec) {
+               if (!mask)
+                       mask = default_mask;
+               memcpy(&id.vni[1], spec->vni, 3);
+               vxlan.val.tunnel_id = id.vlan_id;
+               memcpy(&id.vni[1], mask->vni, 3);
+               vxlan.mask.tunnel_id = id.vlan_id;
+               if (spec->protocol)
+                       return rte_flow_error_set(data->error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ITEM,
+                                                 item,
+                                                 "VxLAN-GPE protocol not"
+                                                 " supported");
+               /* Remove unwanted bits from values. */
+               vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
+       }
+       /*
+        * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
+        * layer is defined in the Verbs specification it is interpreted as
+        * wildcard and all packets will match this rule, if it follows a full
+        * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
+        * before will also match this rule.
+        * To avoid such situation, VNI 0 is currently refused.
+        */
+       /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
+       if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
+               return rte_flow_error_set(data->error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM,
+                                         item,
+                                         "VxLAN-GPE vni cannot be 0");
+       mlx5_flow_create_copy(parser, &vxlan, size);
+       return 0;
+}
+
+/**
+ * Convert GRE item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_create_gre(const struct rte_flow_item *item,
+                    const void *default_mask,
+                    struct mlx5_flow_data *data)
+{
+       struct mlx5_flow_parse *parser = data->parser;
+#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
+       (void)default_mask;
+       unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+       struct ibv_flow_spec_tunnel tunnel = {
+               .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
+               .size = size,
+       };
+#else
+       const struct rte_flow_item_gre *spec = item->spec;
+       const struct rte_flow_item_gre *mask = item->mask;
+       unsigned int size = sizeof(struct ibv_flow_spec_gre);
+       struct ibv_flow_spec_gre tunnel = {
+               .type = parser->inner | IBV_FLOW_SPEC_GRE,
+               .size = size,
+       };
+#endif
+       struct ibv_flow_spec_ipv4_ext *ipv4;
+       struct ibv_flow_spec_ipv6 *ipv6;
+       unsigned int i;
+
+       parser->inner = IBV_FLOW_SPEC_INNER;
+       parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
+       parser->out_layer = parser->layer;
+       parser->layer = HASH_RXQ_TUNNEL;
+       /* Default GRE to inner RSS. */
+       if (!parser->rss_conf.level)
+               parser->rss_conf.level = 2;
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+       if (spec) {
+               if (!mask)
+                       mask = default_mask;
+               tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
+               tunnel.val.protocol = spec->protocol;
+               tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
+               tunnel.mask.protocol = mask->protocol;
+               /* Remove unwanted bits from values. */
+               tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
+               tunnel.val.protocol &= tunnel.mask.protocol;
+               tunnel.val.key &= tunnel.mask.key;
+       }
+#endif
+       /* Update encapsulation IP layer protocol. */
+       for (i = 0; i != hash_rxq_init_n; ++i) {
+               if (!parser->queue[i].ibv_attr)
+                       continue;
+               if (parser->out_layer == HASH_RXQ_IPV4) {
+                       ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
+                               parser->queue[i].offset -
+                               sizeof(struct ibv_flow_spec_ipv4_ext));
+                       if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
+                               break;
+                       ipv4->val.proto = MLX5_GRE;
+                       ipv4->mask.proto = 0xff;
+               } else if (parser->out_layer == HASH_RXQ_IPV6) {
+                       ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
+                               parser->queue[i].offset -
+                               sizeof(struct ibv_flow_spec_ipv6));
+                       if (ipv6->mask.next_hdr &&
+                           ipv6->val.next_hdr != MLX5_GRE)
+                               break;
+                       ipv6->val.next_hdr = MLX5_GRE;
+                       ipv6->mask.next_hdr = 0xff;
+               }
+       }
+       if (i != hash_rxq_init_n)
+               return rte_flow_error_set(data->error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM,
+                                         item,
+                                         "IP protocol of GRE must be 47");
+       mlx5_flow_create_copy(parser, &tunnel, size);
+       return 0;
+}
+
+/**
+ * Convert MPLS item to Verbs specification.
+ * MPLS tunnel types currently supported are MPLS-in-GRE and MPLS-in-UDP.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param default_mask[in]
+ *   Default bit-masks to use when item->mask is not provided.
+ * @param data[in, out]
+ *   User structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_create_mpls(const struct rte_flow_item *item,
+                     const void *default_mask,
+                     struct mlx5_flow_data *data)
+{
+#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
+       (void)default_mask;
+       return rte_flow_error_set(data->error, ENOTSUP,
+                                 RTE_FLOW_ERROR_TYPE_ITEM,
+                                 item,
+                                 "MPLS is not supported by driver");
+#else
+       const struct rte_flow_item_mpls *spec = item->spec;
+       const struct rte_flow_item_mpls *mask = item->mask;
+       struct mlx5_flow_parse *parser = data->parser;
+       unsigned int size = sizeof(struct ibv_flow_spec_mpls);
+       struct ibv_flow_spec_mpls mpls = {
+               .type = IBV_FLOW_SPEC_MPLS,
+               .size = size,
+       };
+
+       parser->inner = IBV_FLOW_SPEC_INNER;
+       if (parser->layer == HASH_RXQ_UDPV4 ||
+           parser->layer == HASH_RXQ_UDPV6) {
+               parser->tunnel =
+                       ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)];
+               parser->out_layer = parser->layer;
+       } else {
+               parser->tunnel =
+                       ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)];
+               /* parser->out_layer stays as in GRE out_layer. */
+       }
+       parser->layer = HASH_RXQ_TUNNEL;
+       /*
+        * For MPLS-in-GRE, RSS level should have been set.
+        * For MPLS-in-UDP, use outer RSS.
+        */
+       if (!parser->rss_conf.level)
+               parser->rss_conf.level = 1;
+       if (spec) {
+               if (!mask)
+                       mask = default_mask;
+               /*
+                * The verbs label field includes the entire MPLS header:
+                * bits 0:19 - label value field.
+                * bits 20:22 - traffic class field.
+                * bits 23 - bottom of stack bit.
+                * bits 24:31 - ttl field.
+                */
+               mpls.val.label = *(const uint32_t *)spec;
+               mpls.mask.label = *(const uint32_t *)mask;
+               /* Remove unwanted bits from values. */
+               mpls.val.label &= mpls.mask.label;
+       }
+       mlx5_flow_create_copy(parser, &mpls, size);
+       return 0;
+#endif
+}
+
 /**
  * Convert mark/flag action to Verbs specification.
  *
@@ -1706,7 +2181,7 @@ mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
                parser->queue[HASH_RXQ_ETH].ibv_attr;
        if (parser->count)
                flow->cs = parser->cs;
-       if (!priv->dev->data->dev_started)
+       if (!dev->data->dev_started)
                return 0;
        parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
        flow->frxq[HASH_RXQ_ETH].ibv_flow =
@@ -1758,35 +2233,36 @@ mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
                                  struct rte_flow *flow,
                                  struct rte_flow_error *error)
 {
-       struct priv *priv = dev->data->dev_private;
        unsigned int i;
 
        for (i = 0; i != hash_rxq_init_n; ++i) {
-               uint64_t hash_fields;
-
                if (!parser->queue[i].ibv_attr)
                        continue;
                flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
                parser->queue[i].ibv_attr = NULL;
-               hash_fields = hash_rxq_init[i].hash_fields;
-               if (!priv->dev->data->dev_started)
+               flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
+               if (!dev->data->dev_started)
                        continue;
                flow->frxq[i].hrxq =
                        mlx5_hrxq_get(dev,
                                      parser->rss_conf.key,
                                      parser->rss_conf.key_len,
-                                     hash_fields,
+                                     flow->frxq[i].hash_fields,
                                      parser->rss_conf.queue,
-                                     parser->rss_conf.queue_num);
+                                     parser->rss_conf.queue_num,
+                                     parser->tunnel,
+                                     parser->rss_conf.level);
                if (flow->frxq[i].hrxq)
                        continue;
                flow->frxq[i].hrxq =
                        mlx5_hrxq_new(dev,
                                      parser->rss_conf.key,
                                      parser->rss_conf.key_len,
-                                     hash_fields,
+                                     flow->frxq[i].hash_fields,
                                      parser->rss_conf.queue,
-                                     parser->rss_conf.queue_num);
+                                     parser->rss_conf.queue_num,
+                                     parser->tunnel,
+                                     parser->rss_conf.level);
                if (!flow->frxq[i].hrxq) {
                        return rte_flow_error_set(error, ENOMEM,
                                                  RTE_FLOW_ERROR_TYPE_HANDLE,
@@ -1797,6 +2273,99 @@ mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
        return 0;
 }
 
+/**
+ * RXQ update after flow rule creation.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param flow
+ *   Pointer to the flow rule.
+ */
+static void
+mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+       struct priv *priv = dev->data->dev_private;
+       unsigned int i;
+       unsigned int j;
+
+       if (!dev->data->dev_started)
+               return;
+       for (i = 0; i != flow->rss_conf.queue_num; ++i) {
+               struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
+                                                [(*flow->queues)[i]];
+               struct mlx5_rxq_ctrl *rxq_ctrl =
+                       container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+               uint8_t tunnel = PTYPE_IDX(flow->tunnel);
+
+               rxq_data->mark |= flow->mark;
+               if (!tunnel)
+                       continue;
+               rxq_ctrl->tunnel_types[tunnel] += 1;
+               /* Clear tunnel type if more than one tunnel types set. */
+               for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
+                       if (j == tunnel)
+                               continue;
+                       if (rxq_ctrl->tunnel_types[j] > 0) {
+                               rxq_data->tunnel = 0;
+                               break;
+                       }
+               }
+               if (j == RTE_DIM(rxq_ctrl->tunnel_types))
+                       rxq_data->tunnel = flow->tunnel;
+       }
+}
+
+/**
+ * Dump flow hash RX queue detail.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param flow
+ *   Pointer to the rte_flow.
+ * @param hrxq_idx
+ *   Hash RX queue index.
+ */
+static void
+mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
+              struct rte_flow *flow __rte_unused,
+              unsigned int hrxq_idx __rte_unused)
+{
+#ifndef NDEBUG
+       uintptr_t spec_ptr;
+       uint16_t j;
+       char buf[256];
+       uint8_t off;
+
+       spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
+       for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
+            j++) {
+               struct ibv_flow_spec *spec = (void *)spec_ptr;
+               off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
+                              spec->hdr.size);
+               spec_ptr += spec->hdr.size;
+       }
+       DRV_LOG(DEBUG,
+               "port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
+               " hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
+               " flags:%x, comp_mask:%x specs:%s",
+               dev->data->port_id, (void *)flow, hrxq_idx,
+               (void *)flow->frxq[hrxq_idx].hrxq,
+               (void *)flow->frxq[hrxq_idx].hrxq->qp,
+               (void *)flow->frxq[hrxq_idx].hrxq->ind_table,
+               flow->frxq[hrxq_idx].hash_fields |
+               (flow->tunnel &&
+                flow->rss_conf.level > 1 ? (uint32_t)IBV_RX_HASH_INNER : 0),
+               flow->rss_conf.queue_num,
+               flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
+               flow->frxq[hrxq_idx].ibv_attr->size,
+               flow->frxq[hrxq_idx].ibv_attr->priority,
+               flow->frxq[hrxq_idx].ibv_attr->type,
+               flow->frxq[hrxq_idx].ibv_attr->flags,
+               flow->frxq[hrxq_idx].ibv_attr->comp_mask,
+               buf);
+#endif
+}
+
 /**
  * Complete flow rule creation.
  *
@@ -1818,7 +2387,7 @@ mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
                              struct rte_flow *flow,
                              struct rte_flow_error *error)
 {
-       struct priv *priv = dev->data->dev_private;
+       struct priv *priv __rte_unused = dev->data->dev_private;
        int ret;
        unsigned int i;
        unsigned int flows_n = 0;
@@ -1831,7 +2400,7 @@ mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
                goto error;
        if (parser->count)
                flow->cs = parser->cs;
-       if (!priv->dev->data->dev_started)
+       if (!dev->data->dev_started)
                return 0;
        for (i = 0; i != hash_rxq_init_n; ++i) {
                if (!flow->frxq[i].hrxq)
@@ -1839,6 +2408,7 @@ mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
                flow->frxq[i].ibv_flow =
                        mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
                                               flow->frxq[i].ibv_attr);
+               mlx5_flow_dump(dev, flow, i);
                if (!flow->frxq[i].ibv_flow) {
                        rte_flow_error_set(error, ENOMEM,
                                           RTE_FLOW_ERROR_TYPE_HANDLE,
@@ -1846,23 +2416,13 @@ mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
                        goto error;
                }
                ++flows_n;
-               DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
-                       dev->data->port_id,
-                       (void *)flow, i,
-                       (void *)flow->frxq[i].hrxq,
-                       (void *)flow->frxq[i].ibv_flow);
        }
        if (!flows_n) {
                rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
                                   NULL, "internal error in flow creation");
                goto error;
        }
-       for (i = 0; i != parser->rss_conf.queue_num; ++i) {
-               struct mlx5_rxq_data *q =
-                       (*priv->rxqs)[parser->rss_conf.queue[i]];
-
-               q->mark |= parser->mark;
-       }
+       mlx5_flow_create_update_rxqs(dev, flow);
        return 0;
 error:
        ret = rte_errno; /* Save rte_errno before cleanup. */
@@ -1935,8 +2495,10 @@ mlx5_flow_list_create(struct rte_eth_dev *dev,
        }
        /* Copy configuration. */
        flow->queues = (uint16_t (*)[])(flow + 1);
+       flow->tunnel = parser.tunnel;
        flow->rss_conf = (struct rte_flow_action_rss){
                .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+               .level = 0,
                .types = parser.rss_conf.types,
                .key_len = parser.rss_conf.key_len,
                .queue_num = parser.rss_conf.queue_num,
@@ -2025,9 +2587,38 @@ mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
        struct priv *priv = dev->data->dev_private;
        unsigned int i;
 
-       if (flow->drop || !flow->mark)
+       if (flow->drop || !dev->data->dev_started)
                goto free;
-       for (i = 0; i != flow->rss_conf.queue_num; ++i) {
+       for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
+               /* Update queue tunnel type. */
+               struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
+                                                [(*flow->queues)[i]];
+               struct mlx5_rxq_ctrl *rxq_ctrl =
+                       container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+               uint8_t tunnel = PTYPE_IDX(flow->tunnel);
+
+               assert(rxq_ctrl->tunnel_types[tunnel] > 0);
+               rxq_ctrl->tunnel_types[tunnel] -= 1;
+               if (!rxq_ctrl->tunnel_types[tunnel]) {
+                       /* Update tunnel type. */
+                       uint8_t j;
+                       uint8_t types = 0;
+                       uint8_t last;
+
+                       for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
+                               if (rxq_ctrl->tunnel_types[j]) {
+                                       types += 1;
+                                       last = j;
+                               }
+                       /* Keep same if more than one tunnel types left. */
+                       if (types == 1)
+                               rxq_data->tunnel = ptype_ext[last];
+                       else if (types == 0)
+                               /* No tunnel type left. */
+                               rxq_data->tunnel = 0;
+               }
+       }
+       for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
                struct rte_flow *tmp;
                int mark = 0;
 
@@ -2246,9 +2837,9 @@ mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
 {
        struct priv *priv = dev->data->dev_private;
        struct rte_flow *flow;
+       unsigned int i;
 
        TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
-               unsigned int i;
                struct mlx5_ind_table_ibv *ind_tbl = NULL;
 
                if (flow->drop) {
@@ -2294,6 +2885,18 @@ mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
                DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
                        (void *)flow);
        }
+       /* Cleanup Rx queue tunnel info. */
+       for (i = 0; i != priv->rxqs_n; ++i) {
+               struct mlx5_rxq_data *q = (*priv->rxqs)[i];
+               struct mlx5_rxq_ctrl *rxq_ctrl =
+                       container_of(q, struct mlx5_rxq_ctrl, rxq);
+
+               if (!q)
+                       continue;
+               memset((void *)rxq_ctrl->tunnel_types, 0,
+                      sizeof(rxq_ctrl->tunnel_types));
+               q->tunnel = 0;
+       }
 }
 
 /**
@@ -2339,42 +2942,44 @@ mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
                        flow->frxq[i].hrxq =
                                mlx5_hrxq_get(dev, flow->rss_conf.key,
                                              flow->rss_conf.key_len,
-                                             hash_rxq_init[i].hash_fields,
+                                             flow->frxq[i].hash_fields,
                                              flow->rss_conf.queue,
-                                             flow->rss_conf.queue_num);
+                                             flow->rss_conf.queue_num,
+                                             flow->tunnel,
+                                             flow->rss_conf.level);
                        if (flow->frxq[i].hrxq)
                                goto flow_create;
                        flow->frxq[i].hrxq =
                                mlx5_hrxq_new(dev, flow->rss_conf.key,
                                              flow->rss_conf.key_len,
-                                             hash_rxq_init[i].hash_fields,
+                                             flow->frxq[i].hash_fields,
                                              flow->rss_conf.queue,
-                                             flow->rss_conf.queue_num);
+                                             flow->rss_conf.queue_num,
+                                             flow->tunnel,
+                                             flow->rss_conf.level);
                        if (!flow->frxq[i].hrxq) {
                                DRV_LOG(DEBUG,
-                                       "port %u flow %p cannot be applied",
+                                       "port %u flow %p cannot create hash"
+                                       " rxq",
                                        dev->data->port_id, (void *)flow);
                                rte_errno = EINVAL;
                                return -rte_errno;
                        }
 flow_create:
+                       mlx5_flow_dump(dev, flow, i);
                        flow->frxq[i].ibv_flow =
                                mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
                                                       flow->frxq[i].ibv_attr);
                        if (!flow->frxq[i].ibv_flow) {
                                DRV_LOG(DEBUG,
-                                       "port %u flow %p cannot be applied",
-                                       dev->data->port_id, (void *)flow);
+                                       "port %u flow %p type %u cannot be"
+                                       " applied",
+                                       dev->data->port_id, (void *)flow, i);
                                rte_errno = EINVAL;
                                return -rte_errno;
                        }
-                       DRV_LOG(DEBUG, "port %u flow %p applied",
-                               dev->data->port_id, (void *)flow);
                }
-               if (!flow->mark)
-                       continue;
-               for (i = 0; i != flow->rss_conf.queue_num; ++i)
-                       (*priv->rxqs)[flow->rss_conf.queue[i]]->mark = 1;
+               mlx5_flow_create_update_rxqs(dev, flow);
        }
        return 0;
 }
@@ -2452,6 +3057,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
        uint16_t queue[priv->reta_idx_n];
        struct rte_flow_action_rss action_rss = {
                .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+               .level = 0,
                .types = priv->rss_conf.rss_hf,
                .key_len = priv->rss_conf.rss_key_len,
                .queue_num = priv->reta_idx_n,
@@ -2592,7 +3198,7 @@ mlx5_flow_query_count(struct ibv_counter_set *cs,
 int
 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
                struct rte_flow *flow,
-               enum rte_flow_action_type action __rte_unused,
+               const struct rte_flow_action *action __rte_unused,
                void *data,
                struct rte_flow_error *error)
 {
@@ -2637,9 +3243,9 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
        }
        priv->isolated = !!enable;
        if (enable)
-               priv->dev->dev_ops = &mlx5_dev_ops_isolate;
+               dev->dev_ops = &mlx5_dev_ops_isolate;
        else
-               priv->dev->dev_ops = &mlx5_dev_ops;
+               dev->dev_ops = &mlx5_dev_ops;
        return 0;
 }
 
@@ -2938,11 +3544,14 @@ mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
                struct ibv_spec_header *flow_h;
                void *flow_spec;
                unsigned int specs_n;
+               unsigned int queue_id = parser.drop ? HASH_RXQ_ETH :
+                                                     parser.layer;
 
-               attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
-               flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
+               attr = parser.queue[queue_id].ibv_attr;
+               flow_attr = flow->frxq[queue_id].ibv_attr;
                /* Compare first the attributes. */
-               if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
+               if (!flow_attr ||
+                   memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
                        continue;
                if (attr->num_of_specs == 0)
                        continue;
@@ -3027,11 +3636,10 @@ mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
 static void
 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
 {
-       struct priv *priv = dev->data->dev_private;
        struct rte_eth_fdir_masks *mask =
-               &priv->dev->data->dev_conf.fdir_conf.mask;
+               &dev->data->dev_conf.fdir_conf.mask;
 
-       fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
+       fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
        fdir_info->guarant_spc = 0;
        rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
        fdir_info->max_flexpayload = 0;
@@ -3059,9 +3667,8 @@ static int
 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
                    void *arg)
 {
-       struct priv *priv = dev->data->dev_private;
        enum rte_fdir_mode fdir_mode =
-               priv->dev->data->dev_conf.fdir_conf.mode;
+               dev->data->dev_conf.fdir_conf.mode;
 
        if (filter_op == RTE_ETH_FILTER_NOP)
                return 0;
@@ -3133,3 +3740,56 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
        }
        return 0;
 }
+
+/**
+ * Detect number of Verbs flow priorities supported.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ *
+ * @return
+ *   number of supported Verbs flow priority.
+ */
+unsigned int
+mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
+{
+       struct priv *priv = dev->data->dev_private;
+       unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
+       struct {
+               struct ibv_flow_attr attr;
+               struct ibv_flow_spec_eth eth;
+               struct ibv_flow_spec_action_drop drop;
+       } flow_attr = {
+               .attr = {
+                       .num_of_specs = 2,
+               },
+               .eth = {
+                       .type = IBV_FLOW_SPEC_ETH,
+                       .size = sizeof(struct ibv_flow_spec_eth),
+               },
+               .drop = {
+                       .size = sizeof(struct ibv_flow_spec_action_drop),
+                       .type = IBV_FLOW_SPEC_ACTION_DROP,
+               },
+       };
+       struct ibv_flow *flow;
+
+       do {
+               flow_attr.attr.priority = verb_priorities - 1;
+               flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
+                                             &flow_attr.attr);
+               if (flow) {
+                       claim_zero(mlx5_glue->destroy_flow(flow));
+                       /* Try more priorities. */
+                       verb_priorities *= 2;
+               } else {
+                       /* Failed, restore last right number. */
+                       verb_priorities /= 2;
+                       break;
+               }
+       } while (1);
+       DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
+               " user flow priorities: %d",
+               dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
+       return verb_priorities;
+}