net/mlx5: fix flow director deletion
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
index 23ca2b8..466d74b 100644 (file)
 #define MLX5_IPV4 4
 #define MLX5_IPV6 6
 
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+struct ibv_counter_set_init_attr {
+       int dummy;
+};
+struct ibv_flow_spec_counter_action {
+       int dummy;
+};
+struct ibv_counter_set {
+       int dummy;
+};
+
+static inline int
+ibv_destroy_counter_set(struct ibv_counter_set *cs)
+{
+       (void)cs;
+       return -ENOTSUP;
+}
+#endif
+
+/* Dev ops structure defined in mlx5.c */
+extern const struct eth_dev_ops mlx5_dev_ops;
+extern const struct eth_dev_ops mlx5_dev_ops_isolate;
+
 static int
 mlx5_flow_create_eth(const struct rte_flow_item *item,
                     const void *default_mask,
@@ -103,6 +126,9 @@ mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
 static int
 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
 
+static int
+mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
+
 /* Hash RX queue types. */
 enum hash_rxq_type {
        HASH_RXQ_TCPV4,
@@ -186,6 +212,12 @@ const struct hash_rxq_init hash_rxq_init[] = {
 /* Number of entries in hash_rxq_init[]. */
 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
 
+/** Structure for holding counter stats. */
+struct mlx5_flow_counter_stats {
+       uint64_t hits; /**< Number of packets matched by the rule. */
+       uint64_t bytes; /**< Number of bytes matched by the rule. */
+};
+
 /** Structure for Drop queue. */
 struct mlx5_hrxq_drop {
        struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
@@ -216,6 +248,8 @@ struct rte_flow {
        uint16_t (*queues)[]; /**< Queues indexes to use. */
        struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
        uint8_t rss_key[40]; /**< copy of the RSS key. */
+       struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
+       struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
        union {
                struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
                /**< Flow with Rx queue. */
@@ -271,6 +305,9 @@ static const enum rte_flow_action_type valid_actions[] = {
        RTE_FLOW_ACTION_TYPE_QUEUE,
        RTE_FLOW_ACTION_TYPE_MARK,
        RTE_FLOW_ACTION_TYPE_FLAG,
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+       RTE_FLOW_ACTION_TYPE_COUNT,
+#endif
        RTE_FLOW_ACTION_TYPE_END,
 };
 
@@ -399,12 +436,14 @@ struct mlx5_flow_parse {
        /**< Whether resources should remain after a validate. */
        uint32_t drop:1; /**< Target is a drop queue. */
        uint32_t mark:1; /**< Mark is present in the flow. */
+       uint32_t count:1; /**< Count is present in the flow. */
        uint32_t mark_id; /**< Mark identifier. */
        uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
        uint16_t queues_n; /**< Number of entries in queue[]. */
        struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
        uint8_t rss_key[40]; /**< copy of the RSS key. */
        enum hash_rxq_type layer; /**< Last pattern layer detected. */
+       struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
        union {
                struct {
                        struct ibv_flow_attr *ibv_attr;
@@ -426,43 +465,37 @@ static const struct rte_flow_ops mlx5_flow_ops = {
        .create = mlx5_flow_create,
        .destroy = mlx5_flow_destroy,
        .flush = mlx5_flow_flush,
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+       .query = mlx5_flow_query,
+#else
        .query = NULL,
+#endif
        .isolate = mlx5_flow_isolate,
 };
 
-/**
- * Manage filter operations.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param filter_type
- *   Filter type.
- * @param filter_op
- *   Operation to perform.
- * @param arg
- *   Pointer to operation-specific structure.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-int
-mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
-                    enum rte_filter_type filter_type,
-                    enum rte_filter_op filter_op,
-                    void *arg)
-{
-       int ret = EINVAL;
+/* Convert FDIR request to Generic flow. */
+struct mlx5_fdir {
+       struct rte_flow_attr attr;
+       struct rte_flow_action actions[2];
+       struct rte_flow_item items[4];
+       struct rte_flow_item_eth l2;
+       struct rte_flow_item_eth l2_mask;
+       union {
+               struct rte_flow_item_ipv4 ipv4;
+               struct rte_flow_item_ipv6 ipv6;
+       } l3;
+       union {
+               struct rte_flow_item_udp udp;
+               struct rte_flow_item_tcp tcp;
+       } l4;
+       struct rte_flow_action_queue queue;
+};
 
-       if (filter_type == RTE_ETH_FILTER_GENERIC) {
-               if (filter_op != RTE_ETH_FILTER_GET)
-                       return -EINVAL;
-               *(const void **)arg = &mlx5_flow_ops;
-               return 0;
-       }
-       ERROR("%p: filter type (%d) not supported",
-             (void *)dev, filter_type);
-       return -ret;
-}
+/* Verbs specification header. */
+struct ibv_spec_header {
+       enum ibv_flow_spec_type type;
+       uint16_t size;
+};
 
 /**
  * Check support for a given item.
@@ -747,10 +780,15 @@ priv_flow_convert_actions(struct priv *priv,
                        parser->mark_id = mark->id;
                } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
                        parser->mark = 1;
+               } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
+                          priv->counter_set_supported) {
+                       parser->count = 1;
                } else {
                        goto exit_action_not_supported;
                }
        }
+       if (parser->drop && parser->mark)
+               parser->mark = 0;
        if (!parser->queues_n && !parser->drop) {
                rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
                                   NULL, "no valid action");
@@ -828,7 +866,7 @@ priv_flow_convert_items_validate(struct priv *priv,
                                                   " VXLAN encapsulations");
                                return -rte_errno;
                        }
-                       parser->inner = 1;
+                       parser->inner = IBV_FLOW_SPEC_INNER;
                }
                if (parser->drop) {
                        parser->drop_q.offset += cur_item->dst_sz;
@@ -844,6 +882,16 @@ priv_flow_convert_items_validate(struct priv *priv,
                        parser->queue[i].offset +=
                                sizeof(struct ibv_flow_spec_action_tag);
        }
+       if (parser->count) {
+               unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
+
+               if (parser->drop) {
+                       parser->drop_q.offset += size;
+               } else {
+                       for (i = 0; i != hash_rxq_init_n; ++i)
+                               parser->queue[i].offset += size;
+               }
+       }
        return 0;
 exit_item_not_supported:
        rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
@@ -962,15 +1010,13 @@ fill:
                        if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
                                size = sizeof(struct ibv_flow_spec_ipv4_ext);
                                specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
-                                       .type = IBV_FLOW_SPEC_IPV4_EXT |
-                                               parser->inner,
+                                       .type = IBV_FLOW_SPEC_IPV4_EXT,
                                        .size = size,
                                };
                        } else {
                                size = sizeof(struct ibv_flow_spec_ipv6);
                                specs.ipv6 = (struct ibv_flow_spec_ipv6){
-                                       .type = IBV_FLOW_SPEC_IPV6 |
-                                               parser->inner,
+                                       .type = IBV_FLOW_SPEC_IPV6,
                                        .size = size,
                                };
                        }
@@ -990,8 +1036,7 @@ fill:
                                .type = ((i == HASH_RXQ_UDPV4 ||
                                          i == HASH_RXQ_UDPV6) ?
                                         IBV_FLOW_SPEC_UDP :
-                                        IBV_FLOW_SPEC_TCP) |
-                                       parser->inner,
+                                        IBV_FLOW_SPEC_TCP),
                                .size = size,
                        };
                        if (parser->queue[i].ibv_attr) {
@@ -1110,7 +1155,7 @@ priv_flow_convert(struct priv *priv,
                                         cur_item->mask),
                                        parser);
                if (ret) {
-                       rte_flow_error_set(error, ENOTSUP,
+                       rte_flow_error_set(error, ret,
                                           RTE_FLOW_ERROR_TYPE_ITEM,
                                           items, "item not supported");
                        goto exit_free;
@@ -1118,12 +1163,35 @@ priv_flow_convert(struct priv *priv,
        }
        if (parser->mark)
                mlx5_flow_create_flag_mark(parser, parser->mark_id);
+       if (parser->count && parser->create) {
+               mlx5_flow_create_count(priv, parser);
+               if (!parser->cs)
+                       goto exit_count_error;
+       }
        /*
         * Last step. Complete missing specification to reach the RSS
         * configuration.
         */
-       if (parser->queues_n > 1)
+       if (parser->drop) {
+               /*
+                * Drop queue priority needs to be adjusted to
+                * their most specific layer priority.
+                */
+               parser->drop_q.ibv_attr->priority =
+                       attr->priority +
+                       hash_rxq_init[parser->layer].flow_priority;
+       } else if (parser->queues_n > 1) {
                priv_flow_convert_finalise(priv, parser);
+       } else {
+               /*
+                * Action queue have their priority overridden with
+                * Ethernet priority, this priority needs to be adjusted to
+                * their most specific layer priority.
+                */
+               parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
+                       attr->priority +
+                       hash_rxq_init[parser->layer].flow_priority;
+       }
 exit_free:
        /* Only verification is expected, all resources should be released. */
        if (!parser->create) {
@@ -1149,6 +1217,10 @@ exit_enomem:
        rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
                           NULL, "cannot allocate verbs spec attributes.");
        return ret;
+exit_count_error:
+       rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                          NULL, "cannot create counter.");
+       return rte_errno;
 }
 
 /**
@@ -1217,7 +1289,9 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
                .size = eth_size,
        };
 
-       parser->layer = HASH_RXQ_ETH;
+       /* Don't update layer for the inner pattern. */
+       if (!parser->inner)
+               parser->layer = HASH_RXQ_ETH;
        if (spec) {
                unsigned int i;
 
@@ -1312,7 +1386,9 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
                .size = ipv4_size,
        };
 
-       parser->layer = HASH_RXQ_IPV4;
+       /* Don't update layer for the inner pattern. */
+       if (!parser->inner)
+               parser->layer = HASH_RXQ_IPV4;
        if (spec) {
                if (!mask)
                        mask = default_mask;
@@ -1362,7 +1438,9 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
                .size = ipv6_size,
        };
 
-       parser->layer = HASH_RXQ_IPV6;
+       /* Don't update layer for the inner pattern. */
+       if (!parser->inner)
+               parser->layer = HASH_RXQ_IPV6;
        if (spec) {
                unsigned int i;
 
@@ -1416,10 +1494,13 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
                .size = udp_size,
        };
 
-       if (parser->layer == HASH_RXQ_IPV4)
-               parser->layer = HASH_RXQ_UDPV4;
-       else
-               parser->layer = HASH_RXQ_UDPV6;
+       /* Don't update layer for the inner pattern. */
+       if (!parser->inner) {
+               if (parser->layer == HASH_RXQ_IPV4)
+                       parser->layer = HASH_RXQ_UDPV4;
+               else
+                       parser->layer = HASH_RXQ_UDPV6;
+       }
        if (spec) {
                if (!mask)
                        mask = default_mask;
@@ -1459,10 +1540,13 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
                .size = tcp_size,
        };
 
-       if (parser->layer == HASH_RXQ_IPV4)
-               parser->layer = HASH_RXQ_TCPV4;
-       else
-               parser->layer = HASH_RXQ_TCPV6;
+       /* Don't update layer for the inner pattern. */
+       if (!parser->inner) {
+               if (parser->layer == HASH_RXQ_IPV4)
+                       parser->layer = HASH_RXQ_TCPV4;
+               else
+                       parser->layer = HASH_RXQ_TCPV6;
+       }
        if (spec) {
                if (!mask)
                        mask = default_mask;
@@ -1518,6 +1602,16 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
                /* Remove unwanted bits from values. */
                vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
        }
+       /*
+        * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
+        * layer is defined in the Verbs specification it is interpreted as
+        * wildcard and all packets will match this rule, if it follows a full
+        * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
+        * before will also match this rule.
+        * To avoid such situation, VNI 0 is currently refused.
+        */
+       if (!vxlan.val.tunnel_id)
+               return EINVAL;
        mlx5_flow_create_copy(parser, &vxlan, size);
        return 0;
 }
@@ -1545,6 +1639,40 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
        return 0;
 }
 
+/**
+ * Convert count action to Verbs specification.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param parser
+ *   Pointer to MLX5 flow parser structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+mlx5_flow_create_count(struct priv *priv __rte_unused,
+                      struct mlx5_flow_parse *parser __rte_unused)
+{
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+       unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
+       struct ibv_counter_set_init_attr init_attr = {0};
+       struct ibv_flow_spec_counter_action counter = {
+               .type = IBV_FLOW_SPEC_ACTION_COUNT,
+               .size = size,
+               .counter_set_handle = 0,
+       };
+
+       init_attr.counter_set_id = 0;
+       parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
+       if (!parser->cs)
+               return EINVAL;
+       counter.counter_set_handle = parser->cs->handle;
+       mlx5_flow_create_copy(parser, &counter, size);
+#endif
+       return 0;
+}
+
 /**
  * Complete flow rule creation with a drop queue.
  *
@@ -1581,9 +1709,11 @@ priv_flow_create_action_queue_drop(struct priv *priv,
        };
        ++parser->drop_q.ibv_attr->num_of_specs;
        parser->drop_q.offset += size;
+       flow->drxq.ibv_attr = parser->drop_q.ibv_attr;
+       if (parser->count)
+               flow->cs = parser->cs;
        if (!priv->dev->data->dev_started)
                return 0;
-       flow->drxq.ibv_attr = parser->drop_q.ibv_attr;
        parser->drop_q.ibv_attr = NULL;
        flow->drxq.ibv_flow = ibv_create_flow(priv->flow_drop_queue->qp,
                                              flow->drxq.ibv_attr);
@@ -1604,6 +1734,11 @@ error:
                rte_free(flow->drxq.ibv_attr);
                flow->drxq.ibv_attr = NULL;
        }
+       if (flow->cs) {
+               claim_zero(ibv_destroy_counter_set(flow->cs));
+               flow->cs = NULL;
+               parser->cs = NULL;
+       }
        return err;
 }
 
@@ -1638,13 +1773,15 @@ priv_flow_create_action_queue_rss(struct priv *priv,
                flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
                parser->queue[i].ibv_attr = NULL;
                hash_fields = hash_rxq_init[i].hash_fields;
+               if (!priv->dev->data->dev_started)
+                       continue;
                flow->frxq[i].hrxq =
                        mlx5_priv_hrxq_get(priv,
                                           parser->rss_conf.rss_key,
                                           parser->rss_conf.rss_key_len,
                                           hash_fields,
                                           parser->queues,
-                                          hash_fields ? parser->queues_n : 1);
+                                          parser->queues_n);
                if (flow->frxq[i].hrxq)
                        continue;
                flow->frxq[i].hrxq =
@@ -1653,7 +1790,7 @@ priv_flow_create_action_queue_rss(struct priv *priv,
                                           parser->rss_conf.rss_key_len,
                                           hash_fields,
                                           parser->queues,
-                                          hash_fields ? parser->queues_n : 1);
+                                          parser->queues_n);
                if (!flow->frxq[i].hrxq) {
                        rte_flow_error_set(error, ENOMEM,
                                           RTE_FLOW_ERROR_TYPE_HANDLE,
@@ -1694,6 +1831,8 @@ priv_flow_create_action_queue(struct priv *priv,
        err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
        if (err)
                goto error;
+       if (parser->count)
+               flow->cs = parser->cs;
        if (!priv->dev->data->dev_started)
                return 0;
        for (i = 0; i != hash_rxq_init_n; ++i) {
@@ -1734,6 +1873,11 @@ error:
                if (flow->frxq[i].ibv_attr)
                        rte_free(flow->frxq[i].ibv_attr);
        }
+       if (flow->cs) {
+               claim_zero(ibv_destroy_counter_set(flow->cs));
+               flow->cs = NULL;
+               parser->cs = NULL;
+       }
        return err;
 }
 
@@ -1925,6 +2069,10 @@ free:
                                rte_free(frxq->ibv_attr);
                }
        }
+       if (flow->cs) {
+               claim_zero(ibv_destroy_counter_set(flow->cs));
+               flow->cs = NULL;
+       }
        TAILQ_REMOVE(list, flow, next);
        DEBUG("Flow destroyed %p", (void *)flow);
        rte_free(flow);
@@ -2347,6 +2495,86 @@ mlx5_flow_flush(struct rte_eth_dev *dev,
        return 0;
 }
 
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+/**
+ * Query flow counter.
+ *
+ * @param cs
+ *   the counter set.
+ * @param counter_value
+ *   returned data from the counter.
+ *
+ * @return
+ *   0 on success, a errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_query_count(struct ibv_counter_set *cs,
+                     struct mlx5_flow_counter_stats *counter_stats,
+                     struct rte_flow_query_count *query_count,
+                     struct rte_flow_error *error)
+{
+       uint64_t counters[2];
+       struct ibv_query_counter_set_attr query_cs_attr = {
+               .cs = cs,
+               .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
+       };
+       struct ibv_counter_set_data query_out = {
+               .out = counters,
+               .outlen = 2 * sizeof(uint64_t),
+       };
+       int res = ibv_query_counter_set(&query_cs_attr, &query_out);
+
+       if (res) {
+               rte_flow_error_set(error, -res,
+                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                  NULL,
+                                  "cannot read counter");
+               return -res;
+       }
+       query_count->hits_set = 1;
+       query_count->bytes_set = 1;
+       query_count->hits = counters[0] - counter_stats->hits;
+       query_count->bytes = counters[1] - counter_stats->bytes;
+       if (query_count->reset) {
+               counter_stats->hits = counters[0];
+               counter_stats->bytes = counters[1];
+       }
+       return 0;
+}
+
+/**
+ * Query a flows.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_query(struct rte_eth_dev *dev,
+               struct rte_flow *flow,
+               enum rte_flow_action_type action __rte_unused,
+               void *data,
+               struct rte_flow_error *error)
+{
+       struct priv *priv = dev->data->dev_private;
+       int res = EINVAL;
+
+       priv_lock(priv);
+       if (flow->cs) {
+               res = priv_flow_query_count(flow->cs,
+                                       &flow->counter_stats,
+                                       (struct rte_flow_query_count *)data,
+                                       error);
+       } else {
+               rte_flow_error_set(error, res,
+                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                  NULL,
+                                  "no counter found for flow");
+       }
+       priv_unlock(priv);
+       return -res;
+}
+#endif
+
 /**
  * Isolated mode.
  *
@@ -2370,6 +2598,500 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
                return -rte_errno;
        }
        priv->isolated = !!enable;
+       if (enable)
+               priv->dev->dev_ops = &mlx5_dev_ops_isolate;
+       else
+               priv->dev->dev_ops = &mlx5_dev_ops;
        priv_unlock(priv);
        return 0;
 }
+
+/**
+ * Convert a flow director filter to a generic flow.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Flow director filter to add.
+ * @param attributes
+ *   Generic flow parameters structure.
+ *
+ * @return
+ *  0 on success, errno value on error.
+ */
+static int
+priv_fdir_filter_convert(struct priv *priv,
+                        const struct rte_eth_fdir_filter *fdir_filter,
+                        struct mlx5_fdir *attributes)
+{
+       const struct rte_eth_fdir_input *input = &fdir_filter->input;
+
+       /* Validate queue number. */
+       if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
+               ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
+               return EINVAL;
+       }
+       attributes->attr.ingress = 1;
+       attributes->items[0] = (struct rte_flow_item) {
+               .type = RTE_FLOW_ITEM_TYPE_ETH,
+               .spec = &attributes->l2,
+               .mask = &attributes->l2_mask,
+       };
+       switch (fdir_filter->action.behavior) {
+       case RTE_ETH_FDIR_ACCEPT:
+               attributes->actions[0] = (struct rte_flow_action){
+                       .type = RTE_FLOW_ACTION_TYPE_QUEUE,
+                       .conf = &attributes->queue,
+               };
+               break;
+       case RTE_ETH_FDIR_REJECT:
+               attributes->actions[0] = (struct rte_flow_action){
+                       .type = RTE_FLOW_ACTION_TYPE_DROP,
+               };
+               break;
+       default:
+               ERROR("invalid behavior %d", fdir_filter->action.behavior);
+               return ENOTSUP;
+       }
+       attributes->queue.index = fdir_filter->action.rx_queue;
+       switch (fdir_filter->input.flow_type) {
+       case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
+               attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+                       .src_addr = input->flow.udp4_flow.ip.src_ip,
+                       .dst_addr = input->flow.udp4_flow.ip.dst_ip,
+                       .time_to_live = input->flow.udp4_flow.ip.ttl,
+                       .type_of_service = input->flow.udp4_flow.ip.tos,
+                       .next_proto_id = input->flow.udp4_flow.ip.proto,
+               };
+               attributes->l4.udp.hdr = (struct udp_hdr){
+                       .src_port = input->flow.udp4_flow.src_port,
+                       .dst_port = input->flow.udp4_flow.dst_port,
+               };
+               attributes->items[1] = (struct rte_flow_item){
+                       .type = RTE_FLOW_ITEM_TYPE_IPV4,
+                       .spec = &attributes->l3,
+               };
+               attributes->items[2] = (struct rte_flow_item){
+                       .type = RTE_FLOW_ITEM_TYPE_UDP,
+                       .spec = &attributes->l4,
+               };
+               break;
+       case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
+               attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+                       .src_addr = input->flow.tcp4_flow.ip.src_ip,
+                       .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
+                       .time_to_live = input->flow.tcp4_flow.ip.ttl,
+                       .type_of_service = input->flow.tcp4_flow.ip.tos,
+                       .next_proto_id = input->flow.tcp4_flow.ip.proto,
+               };
+               attributes->l4.tcp.hdr = (struct tcp_hdr){
+                       .src_port = input->flow.tcp4_flow.src_port,
+                       .dst_port = input->flow.tcp4_flow.dst_port,
+               };
+               attributes->items[1] = (struct rte_flow_item){
+                       .type = RTE_FLOW_ITEM_TYPE_IPV4,
+                       .spec = &attributes->l3,
+               };
+               attributes->items[2] = (struct rte_flow_item){
+                       .type = RTE_FLOW_ITEM_TYPE_TCP,
+                       .spec = &attributes->l4,
+               };
+               break;
+       case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
+               attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+                       .src_addr = input->flow.ip4_flow.src_ip,
+                       .dst_addr = input->flow.ip4_flow.dst_ip,
+                       .time_to_live = input->flow.ip4_flow.ttl,
+                       .type_of_service = input->flow.ip4_flow.tos,
+                       .next_proto_id = input->flow.ip4_flow.proto,
+               };
+               attributes->items[1] = (struct rte_flow_item){
+                       .type = RTE_FLOW_ITEM_TYPE_IPV4,
+                       .spec = &attributes->l3,
+               };
+               break;
+       case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
+               attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+                       .hop_limits = input->flow.udp6_flow.ip.hop_limits,
+                       .proto = input->flow.udp6_flow.ip.proto,
+               };
+               memcpy(attributes->l3.ipv6.hdr.src_addr,
+                      input->flow.udp6_flow.ip.src_ip,
+                      RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+               memcpy(attributes->l3.ipv6.hdr.dst_addr,
+                      input->flow.udp6_flow.ip.dst_ip,
+                      RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+               attributes->l4.udp.hdr = (struct udp_hdr){
+                       .src_port = input->flow.udp6_flow.src_port,
+                       .dst_port = input->flow.udp6_flow.dst_port,
+               };
+               attributes->items[1] = (struct rte_flow_item){
+                       .type = RTE_FLOW_ITEM_TYPE_IPV6,
+                       .spec = &attributes->l3,
+               };
+               attributes->items[2] = (struct rte_flow_item){
+                       .type = RTE_FLOW_ITEM_TYPE_UDP,
+                       .spec = &attributes->l4,
+               };
+               break;
+       case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
+               attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+                       .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
+                       .proto = input->flow.tcp6_flow.ip.proto,
+               };
+               memcpy(attributes->l3.ipv6.hdr.src_addr,
+                      input->flow.tcp6_flow.ip.src_ip,
+                      RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+               memcpy(attributes->l3.ipv6.hdr.dst_addr,
+                      input->flow.tcp6_flow.ip.dst_ip,
+                      RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+               attributes->l4.tcp.hdr = (struct tcp_hdr){
+                       .src_port = input->flow.tcp6_flow.src_port,
+                       .dst_port = input->flow.tcp6_flow.dst_port,
+               };
+               attributes->items[1] = (struct rte_flow_item){
+                       .type = RTE_FLOW_ITEM_TYPE_IPV6,
+                       .spec = &attributes->l3,
+               };
+               attributes->items[2] = (struct rte_flow_item){
+                       .type = RTE_FLOW_ITEM_TYPE_TCP,
+                       .spec = &attributes->l4,
+               };
+               break;
+       case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
+               attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+                       .hop_limits = input->flow.ipv6_flow.hop_limits,
+                       .proto = input->flow.ipv6_flow.proto,
+               };
+               memcpy(attributes->l3.ipv6.hdr.src_addr,
+                      input->flow.ipv6_flow.src_ip,
+                      RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+               memcpy(attributes->l3.ipv6.hdr.dst_addr,
+                      input->flow.ipv6_flow.dst_ip,
+                      RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+               attributes->items[1] = (struct rte_flow_item){
+                       .type = RTE_FLOW_ITEM_TYPE_IPV6,
+                       .spec = &attributes->l3,
+               };
+               break;
+       default:
+               ERROR("invalid flow type%d",
+                     fdir_filter->input.flow_type);
+               return ENOTSUP;
+       }
+       return 0;
+}
+
+/**
+ * Add new flow director filter and store it in list.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Flow director filter to add.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_filter_add(struct priv *priv,
+                    const struct rte_eth_fdir_filter *fdir_filter)
+{
+       struct mlx5_fdir attributes = {
+               .attr.group = 0,
+               .l2_mask = {
+                       .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+                       .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+                       .type = 0,
+               },
+       };
+       struct mlx5_flow_parse parser = {
+               .layer = HASH_RXQ_ETH,
+       };
+       struct rte_flow_error error;
+       struct rte_flow *flow;
+       int ret;
+
+       ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
+       if (ret)
+               return -ret;
+       ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
+                               attributes.actions, &error, &parser);
+       if (ret)
+               return -ret;
+       flow = priv_flow_create(priv,
+                               &priv->flows,
+                               &attributes.attr,
+                               attributes.items,
+                               attributes.actions,
+                               &error);
+       if (flow) {
+               DEBUG("FDIR created %p", (void *)flow);
+               return 0;
+       }
+       return ENOTSUP;
+}
+
+/**
+ * Delete specific filter.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Filter to be deleted.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_filter_delete(struct priv *priv,
+                       const struct rte_eth_fdir_filter *fdir_filter)
+{
+       struct mlx5_fdir attributes = {
+               .attr.group = 0,
+       };
+       struct mlx5_flow_parse parser = {
+               .create = 1,
+               .layer = HASH_RXQ_ETH,
+       };
+       struct rte_flow_error error;
+       struct rte_flow *flow;
+       unsigned int i;
+       int ret;
+
+       ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
+       if (ret)
+               return -ret;
+       ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
+                               attributes.actions, &error, &parser);
+       if (ret)
+               goto exit;
+       /*
+        * Special case for drop action which is only set in the
+        * specifications when the flow is created.  In this situation the
+        * drop specification is missing.
+        */
+       if (parser.drop) {
+               struct ibv_flow_spec_action_drop *drop;
+
+               drop = (void *)((uintptr_t)parser.drop_q.ibv_attr +
+                               parser.drop_q.offset);
+               *drop = (struct ibv_flow_spec_action_drop){
+                       .type = IBV_FLOW_SPEC_ACTION_DROP,
+                       .size = sizeof(struct ibv_flow_spec_action_drop),
+               };
+               parser.drop_q.ibv_attr->num_of_specs++;
+       }
+       TAILQ_FOREACH(flow, &priv->flows, next) {
+               struct ibv_flow_attr *attr;
+               struct ibv_spec_header *attr_h;
+               void *spec;
+               struct ibv_flow_attr *flow_attr;
+               struct ibv_spec_header *flow_h;
+               void *flow_spec;
+               unsigned int specs_n;
+
+               if (parser.drop)
+                       attr = parser.drop_q.ibv_attr;
+               else
+                       attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
+               if (flow->drop)
+                       flow_attr = flow->drxq.ibv_attr;
+               else
+                       flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
+               /* Compare first the attributes. */
+               if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
+                       continue;
+               if (attr->num_of_specs == 0)
+                       continue;
+               spec = (void *)((uintptr_t)attr +
+                               sizeof(struct ibv_flow_attr));
+               flow_spec = (void *)((uintptr_t)flow_attr +
+                                    sizeof(struct ibv_flow_attr));
+               specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
+               for (i = 0; i != specs_n; ++i) {
+                       attr_h = spec;
+                       flow_h = flow_spec;
+                       if (memcmp(spec, flow_spec,
+                                  RTE_MIN(attr_h->size, flow_h->size)))
+                               goto wrong_flow;
+                       spec = (void *)((uintptr_t)spec + attr_h->size);
+                       flow_spec = (void *)((uintptr_t)flow_spec +
+                                            flow_h->size);
+               }
+               /* At this point, the flow match. */
+               break;
+wrong_flow:
+               /* The flow does not match. */
+               continue;
+       }
+       if (flow)
+               priv_flow_destroy(priv, &priv->flows, flow);
+exit:
+       if (parser.drop) {
+               rte_free(parser.drop_q.ibv_attr);
+       } else {
+               for (i = 0; i != hash_rxq_init_n; ++i) {
+                       if (parser.queue[i].ibv_attr)
+                               rte_free(parser.queue[i].ibv_attr);
+               }
+       }
+       return -ret;
+}
+
+/**
+ * Update queue for specific filter.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Filter to be updated.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_filter_update(struct priv *priv,
+                       const struct rte_eth_fdir_filter *fdir_filter)
+{
+       int ret;
+
+       ret = priv_fdir_filter_delete(priv, fdir_filter);
+       if (ret)
+               return ret;
+       ret = priv_fdir_filter_add(priv, fdir_filter);
+       return ret;
+}
+
+/**
+ * Flush all filters.
+ *
+ * @param priv
+ *   Private structure.
+ */
+static void
+priv_fdir_filter_flush(struct priv *priv)
+{
+       priv_flow_flush(priv, &priv->flows);
+}
+
+/**
+ * Get flow director information.
+ *
+ * @param priv
+ *   Private structure.
+ * @param[out] fdir_info
+ *   Resulting flow director information.
+ */
+static void
+priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
+{
+       struct rte_eth_fdir_masks *mask =
+               &priv->dev->data->dev_conf.fdir_conf.mask;
+
+       fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
+       fdir_info->guarant_spc = 0;
+       rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
+       fdir_info->max_flexpayload = 0;
+       fdir_info->flow_types_mask[0] = 0;
+       fdir_info->flex_payload_unit = 0;
+       fdir_info->max_flex_payload_segment_num = 0;
+       fdir_info->flex_payload_limit = 0;
+       memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
+}
+
+/**
+ * Deal with flow director operations.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param filter_op
+ *   Operation to perform.
+ * @param arg
+ *   Pointer to operation-specific structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
+{
+       enum rte_fdir_mode fdir_mode =
+               priv->dev->data->dev_conf.fdir_conf.mode;
+       int ret = 0;
+
+       if (filter_op == RTE_ETH_FILTER_NOP)
+               return 0;
+       if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
+           fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
+               ERROR("%p: flow director mode %d not supported",
+                     (void *)priv, fdir_mode);
+               return EINVAL;
+       }
+       switch (filter_op) {
+       case RTE_ETH_FILTER_ADD:
+               ret = priv_fdir_filter_add(priv, arg);
+               break;
+       case RTE_ETH_FILTER_UPDATE:
+               ret = priv_fdir_filter_update(priv, arg);
+               break;
+       case RTE_ETH_FILTER_DELETE:
+               ret = priv_fdir_filter_delete(priv, arg);
+               break;
+       case RTE_ETH_FILTER_FLUSH:
+               priv_fdir_filter_flush(priv);
+               break;
+       case RTE_ETH_FILTER_INFO:
+               priv_fdir_info_get(priv, arg);
+               break;
+       default:
+               DEBUG("%p: unknown operation %u", (void *)priv,
+                     filter_op);
+               ret = EINVAL;
+               break;
+       }
+       return ret;
+}
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param filter_type
+ *   Filter type.
+ * @param filter_op
+ *   Operation to perform.
+ * @param arg
+ *   Pointer to operation-specific structure.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+int
+mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
+                    enum rte_filter_type filter_type,
+                    enum rte_filter_op filter_op,
+                    void *arg)
+{
+       int ret = EINVAL;
+       struct priv *priv = dev->data->dev_private;
+
+       switch (filter_type) {
+       case RTE_ETH_FILTER_GENERIC:
+               if (filter_op != RTE_ETH_FILTER_GET)
+                       return -EINVAL;
+               *(const void **)arg = &mlx5_flow_ops;
+               return 0;
+       case RTE_ETH_FILTER_FDIR:
+               priv_lock(priv);
+               ret = priv_fdir_ctrl_func(priv, filter_op, arg);
+               priv_unlock(priv);
+               break;
+       default:
+               ERROR("%p: filter type (%d) not supported",
+                     (void *)dev, filter_type);
+               break;
+       }
+       return -ret;
+}