net/mlx5: fix packet type offload for tunnels
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
index 146f1b8..923fc28 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include <sys/queue.h>
+#include <stdalign.h>
 #include <stdint.h>
 #include <string.h>
 
@@ -88,6 +89,7 @@ extern const struct eth_dev_ops mlx5_dev_ops_isolate;
 /* Modify a packet. */
 #define MLX5_FLOW_MOD_FLAG (1u << 0)
 #define MLX5_FLOW_MOD_MARK (1u << 1)
+#define MLX5_FLOW_MOD_COUNT (1u << 2)
 
 /* possible L3 layers protocols filtering. */
 #define MLX5_IP_PROTOCOL_TCP 6
@@ -249,6 +251,17 @@ struct mlx5_flow_verbs {
        uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
 };
 
+/* Counters information. */
+struct mlx5_flow_counter {
+       LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */
+       uint32_t shared:1; /**< Share counter ID with other flow rules. */
+       uint32_t ref_cnt:31; /**< Reference counter. */
+       uint32_t id; /**< Counter ID. */
+       struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
+       uint64_t hits; /**< Number of packets matched by the rule. */
+       uint64_t bytes; /**< Number of bytes matched by the rule. */
+};
+
 /* Flow structure. */
 struct rte_flow {
        TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
@@ -264,9 +277,11 @@ struct rte_flow {
        LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
        struct mlx5_flow_verbs *cur_verbs;
        /**< Current Verbs flow structure being filled. */
+       struct mlx5_flow_counter *counter; /**< Holds Verbs flow counter. */
        struct rte_flow_action_rss rss;/**< RSS context. */
        uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
        uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
+       void *nl_flow; /**< Netlink flow buffer if relevant. */
 };
 
 static const struct rte_flow_ops mlx5_flow_ops = {
@@ -275,6 +290,7 @@ static const struct rte_flow_ops mlx5_flow_ops = {
        .destroy = mlx5_flow_destroy,
        .flush = mlx5_flow_flush,
        .isolate = mlx5_flow_isolate,
+       .query = mlx5_flow_query,
 };
 
 /* Convert FDIR request to Generic flow. */
@@ -454,6 +470,80 @@ mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
        flow->cur_verbs->attr->priority = priority;
 }
 
+/**
+ * Get a flow counter.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] shared
+ *   Indicate if this counter is shared with other flows.
+ * @param[in] id
+ *   Counter identifier.
+ *
+ * @return
+ *   A pointer to the counter, NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_flow_counter *
+mlx5_flow_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
+{
+       struct priv *priv = dev->data->dev_private;
+       struct mlx5_flow_counter *cnt;
+
+       LIST_FOREACH(cnt, &priv->flow_counters, next) {
+               if (!cnt->shared || cnt->shared != shared)
+                       continue;
+               if (cnt->id != id)
+                       continue;
+               cnt->ref_cnt++;
+               return cnt;
+       }
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+
+       struct mlx5_flow_counter tmpl = {
+               .shared = shared,
+               .id = id,
+               .cs = mlx5_glue->create_counter_set
+                       (priv->ctx,
+                        &(struct ibv_counter_set_init_attr){
+                                .counter_set_id = id,
+                        }),
+               .hits = 0,
+               .bytes = 0,
+       };
+
+       if (!tmpl.cs) {
+               rte_errno = errno;
+               return NULL;
+       }
+       cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
+       if (!cnt) {
+               rte_errno = ENOMEM;
+               return NULL;
+       }
+       *cnt = tmpl;
+       LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
+       return cnt;
+#endif
+       rte_errno = ENOTSUP;
+       return NULL;
+}
+
+/**
+ * Release a flow counter.
+ *
+ * @param[in] counter
+ *   Pointer to the counter handler.
+ */
+static void
+mlx5_flow_counter_release(struct mlx5_flow_counter *counter)
+{
+       if (--counter->ref_cnt == 0) {
+               claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
+               LIST_REMOVE(counter, next);
+               rte_free(counter);
+       }
+}
+
 /**
  * Verify the @p attributes will be correctly understood by the NIC and store
  * them in the @p flow if everything is correct.
@@ -2128,6 +2218,70 @@ mlx5_flow_action_mark(const struct rte_flow_action *action,
        return size;
 }
 
+/**
+ * Convert the @p action into a Verbs specification after ensuring the NIC
+ * will understand and process it correctly.
+ * If the necessary size for the conversion is greater than the @p flow_size,
+ * nothing is written in @p flow, the validation is still performed.
+ *
+ * @param action[in]
+ *   Action configuration.
+ * @param flow[in, out]
+ *   Pointer to flow structure.
+ * @param flow_size[in]
+ *   Size in bytes of the available space in @p flow, if too small, nothing is
+ *   written.
+ * @param error[int, out]
+ *   Pointer to error structure.
+ *
+ * @return
+ *   On success the number of bytes consumed/necessary, if the returned value
+ *   is lesser or equal to @p flow_size, the @p action has fully been
+ *   converted, otherwise another call with this returned memory size should
+ *   be done.
+ *   On error, a negative errno value is returned and rte_errno is set.
+ */
+static int
+mlx5_flow_action_count(struct rte_eth_dev *dev,
+                      const struct rte_flow_action *action,
+                      struct rte_flow *flow,
+                      const size_t flow_size __rte_unused,
+                      struct rte_flow_error *error)
+{
+       const struct rte_flow_action_count *count = action->conf;
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+       unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
+       struct ibv_flow_spec_counter_action counter = {
+               .type = IBV_FLOW_SPEC_ACTION_COUNT,
+               .size = size,
+       };
+#endif
+
+       if (!flow->counter) {
+               flow->counter = mlx5_flow_counter_new(dev, count->shared,
+                                                     count->id);
+               if (!flow->counter)
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 action,
+                                                 "cannot get counter"
+                                                 " context.");
+       }
+       if (!((struct priv *)dev->data->dev_private)->config.flow_counter_en)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION,
+                                         action,
+                                         "flow counters are not supported.");
+       flow->modifier |= MLX5_FLOW_MOD_COUNT;
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+       counter.counter_set_handle = flow->counter->cs->handle;
+       if (size <= flow_size)
+               mlx5_flow_spec_verbs_add(flow, &counter, size);
+       return size;
+#endif
+       return 0;
+}
+
 /**
  * Convert the @p action into @p flow after ensuring the NIC will understand
  * and process it correctly.
@@ -2187,6 +2341,10 @@ mlx5_flow_actions(struct rte_eth_dev *dev,
                case RTE_FLOW_ACTION_TYPE_RSS:
                        ret = mlx5_flow_action_rss(dev, actions, flow, error);
                        break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+                       ret = mlx5_flow_action_count(dev, actions, flow, remain,
+                                                    error);
+                       break;
                default:
                        return rte_flow_error_set(error, ENOTSUP,
                                                  RTE_FLOW_ERROR_TYPE_ACTION,
@@ -2209,6 +2367,103 @@ mlx5_flow_actions(struct rte_eth_dev *dev,
        return size;
 }
 
+/**
+ * Validate flow rule and fill flow structure accordingly.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param[out] flow
+ *   Pointer to flow structure.
+ * @param flow_size
+ *   Size of allocated space for @p flow.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] pattern
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A positive value representing the size of the flow object in bytes
+ *   regardless of @p flow_size on success, a negative errno value otherwise
+ *   and rte_errno is set.
+ */
+static int
+mlx5_flow_merge_switch(struct rte_eth_dev *dev,
+                      struct rte_flow *flow,
+                      size_t flow_size,
+                      const struct rte_flow_attr *attr,
+                      const struct rte_flow_item pattern[],
+                      const struct rte_flow_action actions[],
+                      struct rte_flow_error *error)
+{
+       unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
+       uint16_t port_id[!n + n];
+       struct mlx5_nl_flow_ptoi ptoi[!n + n + 1];
+       size_t off = RTE_ALIGN_CEIL(sizeof(*flow), alignof(max_align_t));
+       unsigned int i;
+       unsigned int own = 0;
+       int ret;
+
+       /* At least one port is needed when no switch domain is present. */
+       if (!n) {
+               n = 1;
+               port_id[0] = dev->data->port_id;
+       } else {
+               n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
+       }
+       for (i = 0; i != n; ++i) {
+               struct rte_eth_dev_info dev_info;
+
+               rte_eth_dev_info_get(port_id[i], &dev_info);
+               if (port_id[i] == dev->data->port_id)
+                       own = i;
+               ptoi[i].port_id = port_id[i];
+               ptoi[i].ifindex = dev_info.if_index;
+       }
+       /* Ensure first entry of ptoi[] is the current device. */
+       if (own) {
+               ptoi[n] = ptoi[0];
+               ptoi[0] = ptoi[own];
+               ptoi[own] = ptoi[n];
+       }
+       /* An entry with zero ifindex terminates ptoi[]. */
+       ptoi[n].port_id = 0;
+       ptoi[n].ifindex = 0;
+       if (flow_size < off)
+               flow_size = 0;
+       ret = mlx5_nl_flow_transpose((uint8_t *)flow + off,
+                                    flow_size ? flow_size - off : 0,
+                                    ptoi, attr, pattern, actions, error);
+       if (ret < 0)
+               return ret;
+       if (flow_size) {
+               *flow = (struct rte_flow){
+                       .attributes = *attr,
+                       .nl_flow = (uint8_t *)flow + off,
+               };
+               /*
+                * Generate a reasonably unique handle based on the address
+                * of the target buffer.
+                *
+                * This is straightforward on 32-bit systems where the flow
+                * pointer can be used directly. Otherwise, its least
+                * significant part is taken after shifting it by the
+                * previous power of two of the pointed buffer size.
+                */
+               if (sizeof(flow) <= 4)
+                       mlx5_nl_flow_brand(flow->nl_flow, (uintptr_t)flow);
+               else
+                       mlx5_nl_flow_brand
+                               (flow->nl_flow,
+                                (uintptr_t)flow >>
+                                rte_log2_u32(rte_align32prevpow2(flow_size)));
+       }
+       return off + ret;
+}
+
 /**
  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
  * after ensuring the NIC will understand and process it correctly.
@@ -2263,6 +2518,10 @@ mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
        int ret;
        uint32_t i;
 
+       if (attributes->transfer)
+               return mlx5_flow_merge_switch(dev, flow, flow_size,
+                                             attributes, pattern,
+                                             actions, error);
        if (size > flow_size)
                flow = &local_flow;
        ret = mlx5_flow_attributes(dev, attributes, flow, error);
@@ -2503,22 +2762,20 @@ mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev)
 {
        struct priv *priv = dev->data->dev_private;
        unsigned int i;
-       unsigned int idx;
 
-       for (idx = 0, i = 0; idx != priv->rxqs_n; ++i) {
+       for (i = 0; i != priv->rxqs_n; ++i) {
                struct mlx5_rxq_ctrl *rxq_ctrl;
                unsigned int j;
 
-               if (!(*priv->rxqs)[idx])
+               if (!(*priv->rxqs)[i])
                        continue;
-               rxq_ctrl = container_of((*priv->rxqs)[idx],
+               rxq_ctrl = container_of((*priv->rxqs)[i],
                                        struct mlx5_rxq_ctrl, rxq);
                rxq_ctrl->flow_mark_n = 0;
                rxq_ctrl->rxq.mark = 0;
                for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
                        rxq_ctrl->flow_tunnels_n[j] = 0;
                rxq_ctrl->rxq.tunnel = 0;
-               ++idx;
        }
 }
 
@@ -2553,8 +2810,11 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
 static void
 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_flow_verbs *verbs;
 
+       if (flow->nl_flow && priv->mnl_socket)
+               mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL);
        LIST_FOREACH(verbs, &flow->verbs, next) {
                if (verbs->flow) {
                        claim_zero(mlx5_glue->destroy_flow(verbs->flow));
@@ -2568,6 +2828,10 @@ mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
                        verbs->hrxq = NULL;
                }
        }
+       if (flow->counter) {
+               mlx5_flow_counter_release(flow->counter);
+               flow->counter = NULL;
+       }
 }
 
 /**
@@ -2587,6 +2851,7 @@ static int
 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
                struct rte_flow_error *error)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_flow_verbs *verbs;
        int err;
 
@@ -2614,7 +2879,9 @@ mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
                                                     MLX5_RSS_HASH_KEY_LEN,
                                                     verbs->hash_fields,
                                                     (*flow->queue),
-                                                    flow->rss.queue_num);
+                                                    flow->rss.queue_num,
+                                                    !!(flow->layers &
+                                                     MLX5_FLOW_LAYER_TUNNEL));
                        if (!hrxq) {
                                rte_flow_error_set
                                        (error, rte_errno,
@@ -2635,6 +2902,10 @@ mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
                        goto error;
                }
        }
+       if (flow->nl_flow &&
+           priv->mnl_socket &&
+           mlx5_nl_flow_create(priv->mnl_socket, flow->nl_flow, error))
+               goto error;
        return 0;
 error:
        err = rte_errno; /* Save rte_errno before cleanup. */
@@ -3017,6 +3288,96 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
        return 0;
 }
 
+/**
+ * Query flow counter.
+ *
+ * @param flow
+ *   Pointer to the flow.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
+                     void *data __rte_unused,
+                     struct rte_flow_error *error)
+{
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+       if (flow->modifier & MLX5_FLOW_MOD_COUNT) {
+               struct rte_flow_query_count *qc = data;
+               uint64_t counters[2] = {0, 0};
+               struct ibv_query_counter_set_attr query_cs_attr = {
+                       .cs = flow->counter->cs,
+                       .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
+               };
+               struct ibv_counter_set_data query_out = {
+                       .out = counters,
+                       .outlen = 2 * sizeof(uint64_t),
+               };
+               int err = mlx5_glue->query_counter_set(&query_cs_attr,
+                                                      &query_out);
+
+               if (err)
+                       return rte_flow_error_set
+                               (error, err,
+                                RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                NULL,
+                                "cannot read counter");
+               qc->hits_set = 1;
+               qc->bytes_set = 1;
+               qc->hits = counters[0] - flow->counter->hits;
+               qc->bytes = counters[1] - flow->counter->bytes;
+               if (qc->reset) {
+                       flow->counter->hits = counters[0];
+                       flow->counter->bytes = counters[1];
+               }
+               return 0;
+       }
+       return rte_flow_error_set(error, ENOTSUP,
+                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                 NULL,
+                                 "flow does not have counter");
+#endif
+       return rte_flow_error_set(error, ENOTSUP,
+                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                 NULL,
+                                 "counters are not available");
+}
+
+/**
+ * Query a flows.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
+               struct rte_flow *flow,
+               const struct rte_flow_action *actions,
+               void *data,
+               struct rte_flow_error *error)
+{
+       int ret = 0;
+
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+                       ret = mlx5_flow_query_count(flow, data, error);
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "action not supported");
+               }
+               if (ret < 0)
+                       return ret;
+       }
+       return 0;
+}
+
 /**
  * Convert a flow director filter to a generic flow.
  *