net/mlx5: skip table zero to improve insertion rate
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_verbs.c
index 699cc88..23110f2 100644 (file)
@@ -20,7 +20,6 @@
 
 #include <rte_common.h>
 #include <rte_ether.h>
-#include <rte_eth_ctrl.h>
 #include <rte_ethdev_driver.h>
 #include <rte_flow.h>
 #include <rte_flow_driver.h>
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 #define VERBS_SPEC_INNER(item_flags) \
        (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
@@ -55,23 +55,26 @@ flow_verbs_counter_create(struct rte_eth_dev *dev,
                          struct mlx5_flow_counter *counter)
 {
 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
-       struct priv *priv = dev->data->dev_private;
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct ibv_context *ctx = priv->sh->ctx;
        struct ibv_counter_set_init_attr init = {
                         .counter_set_id = counter->id};
 
-       counter->cs = mlx5_glue->create_counter_set(priv->ctx, &init);
+       counter->cs = mlx5_glue->create_counter_set(ctx, &init);
        if (!counter->cs) {
                rte_errno = ENOTSUP;
                return -ENOTSUP;
        }
        return 0;
 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
-       struct priv *priv = dev->data->dev_private;
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct ibv_context *ctx = priv->sh->ctx;
        struct ibv_counters_init_attr init = {0};
-       struct ibv_counter_attach_attr attach = {0};
+       struct ibv_counter_attach_attr attach;
        int ret;
 
-       counter->cs = mlx5_glue->create_counters(priv->ctx, &init);
+       memset(&attach, 0, sizeof(attach));
+       counter->cs = mlx5_glue->create_counters(ctx, &init);
        if (!counter->cs) {
                rte_errno = ENOTSUP;
                return -ENOTSUP;
@@ -116,17 +119,17 @@ flow_verbs_counter_create(struct rte_eth_dev *dev,
 static struct mlx5_flow_counter *
 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
 {
-       struct priv *priv = dev->data->dev_private;
+       struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_flow_counter *cnt;
        int ret;
 
-       LIST_FOREACH(cnt, &priv->flow_counters, next) {
-               if (!cnt->shared || cnt->shared != shared)
-                       continue;
-               if (cnt->id != id)
-                       continue;
-               cnt->ref_cnt++;
-               return cnt;
+       if (shared) {
+               TAILQ_FOREACH(cnt, &priv->sh->cmng.flow_counters, next) {
+                       if (cnt->shared && cnt->id == id) {
+                               cnt->ref_cnt++;
+                               return cnt;
+                       }
+               }
        }
        cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
        if (!cnt) {
@@ -141,7 +144,7 @@ flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
        /* Create counter with Verbs. */
        ret = flow_verbs_counter_create(dev, cnt);
        if (!ret) {
-               LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
+               TAILQ_INSERT_HEAD(&priv->sh->cmng.flow_counters, cnt, next);
                return cnt;
        }
        /* Some error occurred in Verbs library. */
@@ -153,19 +156,24 @@ flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
 /**
  * Release a flow counter.
  *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
  * @param[in] counter
  *   Pointer to the counter handler.
  */
 static void
-flow_verbs_counter_release(struct mlx5_flow_counter *counter)
+flow_verbs_counter_release(struct rte_eth_dev *dev,
+                          struct mlx5_flow_counter *counter)
 {
+       struct mlx5_priv *priv = dev->data->dev_private;
+
        if (--counter->ref_cnt == 0) {
 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
                claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
                claim_zero(mlx5_glue->destroy_counters(counter->cs));
 #endif
-               LIST_REMOVE(counter, next);
+               TAILQ_REMOVE(&priv->sh->cmng.flow_counters, counter, next);
                rte_free(counter);
        }
 }
@@ -287,14 +295,18 @@ flow_verbs_translate_item_eth(struct mlx5_flow *dev_flow,
        if (spec) {
                unsigned int i;
 
-               memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
-               memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+               memcpy(&eth.val.dst_mac, spec->dst.addr_bytes,
+                       RTE_ETHER_ADDR_LEN);
+               memcpy(&eth.val.src_mac, spec->src.addr_bytes,
+                       RTE_ETHER_ADDR_LEN);
                eth.val.ether_type = spec->type;
-               memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
-               memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+               memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes,
+                       RTE_ETHER_ADDR_LEN);
+               memcpy(&eth.mask.src_mac, mask->src.addr_bytes,
+                       RTE_ETHER_ADDR_LEN);
                eth.mask.ether_type = mask->type;
                /* Remove unwanted bits from values. */
-               for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+               for (i = 0; i < RTE_ETHER_ADDR_LEN; ++i) {
                        eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
                        eth.val.src_mac[i] &= eth.mask.src_mac[i];
                }
@@ -379,6 +391,9 @@ flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
                flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
        else
                flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
+       if (!tunnel)
+               dev_flow->verbs.vf_vlan.tag =
+                       rte_be_to_cpu_16(spec->tci) & 0x0fff;
 }
 
 /**
@@ -473,17 +488,17 @@ flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow,
                vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
                vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
                ipv6.val.flow_label =
-                       rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
-                                        IPV6_HDR_FL_SHIFT);
-               ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
-                                        IPV6_HDR_TC_SHIFT;
+                       rte_cpu_to_be_32((vtc_flow_val & RTE_IPV6_HDR_FL_MASK) >>
+                                        RTE_IPV6_HDR_FL_SHIFT);
+               ipv6.val.traffic_class = (vtc_flow_val & RTE_IPV6_HDR_TC_MASK) >>
+                                        RTE_IPV6_HDR_TC_SHIFT;
                ipv6.val.next_hdr = spec->hdr.proto;
                ipv6.val.hop_limit = spec->hdr.hop_limits;
                ipv6.mask.flow_label =
-                       rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
-                                        IPV6_HDR_FL_SHIFT);
-               ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
-                                         IPV6_HDR_TC_SHIFT;
+                       rte_cpu_to_be_32((vtc_flow_mask & RTE_IPV6_HDR_FL_MASK) >>
+                                        RTE_IPV6_HDR_FL_SHIFT);
+               ipv6.mask.traffic_class = (vtc_flow_mask & RTE_IPV6_HDR_TC_MASK) >>
+                                         RTE_IPV6_HDR_TC_SHIFT;
                ipv6.mask.next_hdr = mask->hdr.proto;
                ipv6.mask.hop_limit = mask->hdr.hop_limits;
                /* Remove unwanted bits from values. */
@@ -1001,6 +1016,8 @@ flow_verbs_translate_action_count(struct mlx5_flow *dev_flow,
  *   Pointer to the list of items.
  * @param[in] actions
  *   Pointer to the list of actions.
+ * @param[in] external
+ *   This flow rule is created by request external to PMD.
  * @param[out] error
  *   Pointer to the error structure.
  *
@@ -1012,11 +1029,13 @@ flow_verbs_validate(struct rte_eth_dev *dev,
                    const struct rte_flow_attr *attr,
                    const struct rte_flow_item items[],
                    const struct rte_flow_action actions[],
+                   bool external __rte_unused,
                    struct rte_flow_error *error)
 {
        int ret;
        uint64_t action_flags = 0;
        uint64_t item_flags = 0;
+       uint64_t last_item = 0;
        uint8_t next_protocol = 0xff;
 
        if (items == NULL)
@@ -1036,46 +1055,60 @@ flow_verbs_validate(struct rte_eth_dev *dev,
                                                          error);
                        if (ret < 0)
                                return ret;
-                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
-                                              MLX5_FLOW_LAYER_OUTER_L2;
+                       last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+                                            MLX5_FLOW_LAYER_OUTER_L2;
                        break;
                case RTE_FLOW_ITEM_TYPE_VLAN:
                        ret = mlx5_flow_validate_item_vlan(items, item_flags,
-                                                          error);
+                                                          dev, error);
                        if (ret < 0)
                                return ret;
-                       item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
-                                               MLX5_FLOW_LAYER_INNER_VLAN) :
-                                              (MLX5_FLOW_LAYER_OUTER_L2 |
-                                               MLX5_FLOW_LAYER_OUTER_VLAN);
+                       last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
+                                             MLX5_FLOW_LAYER_INNER_VLAN) :
+                                            (MLX5_FLOW_LAYER_OUTER_L2 |
+                                             MLX5_FLOW_LAYER_OUTER_VLAN);
                        break;
                case RTE_FLOW_ITEM_TYPE_IPV4:
                        ret = mlx5_flow_validate_item_ipv4(items, item_flags,
-                                                          error);
+                                                          NULL, error);
                        if (ret < 0)
                                return ret;
-                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
-                                              MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+                       last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+                                            MLX5_FLOW_LAYER_OUTER_L3_IPV4;
                        if (items->mask != NULL &&
                            ((const struct rte_flow_item_ipv4 *)
-                            items->mask)->hdr.next_proto_id)
+                            items->mask)->hdr.next_proto_id) {
                                next_protocol =
                                        ((const struct rte_flow_item_ipv4 *)
                                         (items->spec))->hdr.next_proto_id;
+                               next_protocol &=
+                                       ((const struct rte_flow_item_ipv4 *)
+                                        (items->mask))->hdr.next_proto_id;
+                       } else {
+                               /* Reset for inner layer. */
+                               next_protocol = 0xff;
+                       }
                        break;
                case RTE_FLOW_ITEM_TYPE_IPV6:
                        ret = mlx5_flow_validate_item_ipv6(items, item_flags,
-                                                          error);
+                                                          NULL, error);
                        if (ret < 0)
                                return ret;
-                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
-                                              MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+                       last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+                                            MLX5_FLOW_LAYER_OUTER_L3_IPV6;
                        if (items->mask != NULL &&
                            ((const struct rte_flow_item_ipv6 *)
-                            items->mask)->hdr.proto)
+                            items->mask)->hdr.proto) {
                                next_protocol =
                                        ((const struct rte_flow_item_ipv6 *)
                                         items->spec)->hdr.proto;
+                               next_protocol &=
+                                       ((const struct rte_flow_item_ipv6 *)
+                                        items->mask)->hdr.proto;
+                       } else {
+                               /* Reset for inner layer. */
+                               next_protocol = 0xff;
+                       }
                        break;
                case RTE_FLOW_ITEM_TYPE_UDP:
                        ret = mlx5_flow_validate_item_udp(items, item_flags,
@@ -1083,8 +1116,8 @@ flow_verbs_validate(struct rte_eth_dev *dev,
                                                          error);
                        if (ret < 0)
                                return ret;
-                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
-                                              MLX5_FLOW_LAYER_OUTER_L4_UDP;
+                       last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
+                                            MLX5_FLOW_LAYER_OUTER_L4_UDP;
                        break;
                case RTE_FLOW_ITEM_TYPE_TCP:
                        ret = mlx5_flow_validate_item_tcp
@@ -1094,15 +1127,15 @@ flow_verbs_validate(struct rte_eth_dev *dev,
                                                 error);
                        if (ret < 0)
                                return ret;
-                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
-                                              MLX5_FLOW_LAYER_OUTER_L4_TCP;
+                       last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
+                                            MLX5_FLOW_LAYER_OUTER_L4_TCP;
                        break;
                case RTE_FLOW_ITEM_TYPE_VXLAN:
                        ret = mlx5_flow_validate_item_vxlan(items, item_flags,
                                                            error);
                        if (ret < 0)
                                return ret;
-                       item_flags |= MLX5_FLOW_LAYER_VXLAN;
+                       last_item = MLX5_FLOW_LAYER_VXLAN;
                        break;
                case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
                        ret = mlx5_flow_validate_item_vxlan_gpe(items,
@@ -1110,35 +1143,29 @@ flow_verbs_validate(struct rte_eth_dev *dev,
                                                                dev, error);
                        if (ret < 0)
                                return ret;
-                       item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
+                       last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
                        break;
                case RTE_FLOW_ITEM_TYPE_GRE:
                        ret = mlx5_flow_validate_item_gre(items, item_flags,
                                                          next_protocol, error);
                        if (ret < 0)
                                return ret;
-                       item_flags |= MLX5_FLOW_LAYER_GRE;
+                       last_item = MLX5_FLOW_LAYER_GRE;
                        break;
                case RTE_FLOW_ITEM_TYPE_MPLS:
-                       ret = mlx5_flow_validate_item_mpls(items, item_flags,
-                                                          next_protocol,
-                                                          error);
+                       ret = mlx5_flow_validate_item_mpls(dev, items,
+                                                          item_flags,
+                                                          last_item, error);
                        if (ret < 0)
                                return ret;
-                       if (next_protocol != 0xff &&
-                           next_protocol != IPPROTO_MPLS)
-                               return rte_flow_error_set
-                                       (error, EINVAL,
-                                        RTE_FLOW_ERROR_TYPE_ITEM, items,
-                                        "protocol filtering not compatible"
-                                        " with MPLS layer");
-                       item_flags |= MLX5_FLOW_LAYER_MPLS;
+                       last_item = MLX5_FLOW_LAYER_MPLS;
                        break;
                default:
                        return rte_flow_error_set(error, ENOTSUP,
                                                  RTE_FLOW_ERROR_TYPE_ITEM,
                                                  NULL, "item not supported");
                }
+               item_flags |= last_item;
        }
        for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
                switch (actions->type) {
@@ -1181,7 +1208,7 @@ flow_verbs_validate(struct rte_eth_dev *dev,
                case RTE_FLOW_ACTION_TYPE_RSS:
                        ret = mlx5_flow_validate_action_rss(actions,
                                                            action_flags, dev,
-                                                           attr,
+                                                           attr, item_flags,
                                                            error);
                        if (ret < 0)
                                return ret;
@@ -1373,7 +1400,7 @@ flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
  *   Pointer to the error structure.
  *
  * @return
- *   0 on success, else a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, else a negative errno value otherwise and rte_errno is set.
  */
 static int
 flow_verbs_translate(struct rte_eth_dev *dev,
@@ -1388,7 +1415,7 @@ flow_verbs_translate(struct rte_eth_dev *dev,
        uint64_t action_flags = 0;
        uint64_t priority = attr->priority;
        uint32_t subpriority = 0;
-       struct priv *priv = dev->data->dev_private;
+       struct mlx5_priv *priv = dev->data->dev_private;
 
        if (priority == MLX5_FLOW_PRIO_RSVD)
                priority = priv->config.flow_prio - 1;
@@ -1538,6 +1565,7 @@ flow_verbs_translate(struct rte_eth_dev *dev,
        dev_flow->layers = item_flags;
        dev_flow->verbs.attr->priority =
                mlx5_flow_adjust_priority(dev, priority, subpriority);
+       dev_flow->verbs.attr->port = (uint8_t)priv->ibv_port;
        return 0;
 }
 
@@ -1570,6 +1598,10 @@ flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
                                mlx5_hrxq_release(dev, verbs->hrxq);
                        verbs->hrxq = NULL;
                }
+               if (dev_flow->verbs.vf_vlan.tag &&
+                   dev_flow->verbs.vf_vlan.created) {
+                       mlx5_vlan_vmwa_release(dev, &dev_flow->verbs.vf_vlan);
+               }
        }
 }
 
@@ -1595,7 +1627,7 @@ flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
                rte_free(dev_flow);
        }
        if (flow->counter) {
-               flow_verbs_counter_release(flow->counter);
+               flow_verbs_counter_release(dev, flow->counter);
                flow->counter = NULL;
        }
 }
@@ -1617,6 +1649,7 @@ static int
 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
                 struct rte_flow_error *error)
 {
+       struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_flow_verbs *verbs;
        struct mlx5_flow *dev_flow;
        int err;
@@ -1647,7 +1680,7 @@ flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
                                                     (*flow->queue),
                                                     flow->rss.queue_num,
                                                     !!(dev_flow->layers &
-                                                     MLX5_FLOW_LAYER_TUNNEL));
+                                                      MLX5_FLOW_LAYER_TUNNEL));
                        if (!hrxq) {
                                rte_flow_error_set
                                        (error, rte_errno,
@@ -1666,6 +1699,17 @@ flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
                                           "hardware refuses to create flow");
                        goto error;
                }
+               if (priv->vmwa_context &&
+                   dev_flow->verbs.vf_vlan.tag &&
+                   !dev_flow->verbs.vf_vlan.created) {
+                       /*
+                        * The rule contains the VLAN pattern.
+                        * For VF we are going to create VLAN
+                        * interface to make hypervisor set correct
+                        * e-Switch vport context.
+                        */
+                       mlx5_vlan_vmwa_acquire(dev, &dev_flow->verbs.vf_vlan);
+               }
        }
        return 0;
 error:
@@ -1679,6 +1723,10 @@ error:
                                mlx5_hrxq_release(dev, verbs->hrxq);
                        verbs->hrxq = NULL;
                }
+               if (dev_flow->verbs.vf_vlan.tag &&
+                   dev_flow->verbs.vf_vlan.created) {
+                       mlx5_vlan_vmwa_release(dev, &dev_flow->verbs.vf_vlan);
+               }
        }
        rte_errno = err; /* Restore rte_errno. */
        return -rte_errno;