net/mlx5: add workaround for VLAN in virtual machine
authorViacheslav Ovsiienko <viacheslavo@mellanox.com>
Tue, 30 Jul 2019 09:20:24 +0000 (09:20 +0000)
committerRaslan Darawsheh <rasland@mellanox.com>
Tue, 6 Aug 2019 15:42:12 +0000 (17:42 +0200)
On some virtual setups (particularly on ESXi) when we have SR-IOV and
E-Switch enabled there is the problem to receive VLAN traffic on VF
interfaces. The NIC driver in ESXi hypervisor does not setup E-Switch
vport setting correctly and VLAN traffic targeted to VF is dropped.

The patch provides the temporary workaround - if the rule
containing the VLAN pattern is being installed for VF the VLAN
network interface over VF is created, like the command does:

  ip link add link vf.if name mlx5.wa.1.100 type vlan id 100

The PMD in DPDK maintains the database of created VLAN interfaces
for each existing VF and requested VLAN tags. When all of the RTE
Flows using the given VLAN tag are removed the created VLAN interface
with this VLAN tag is deleted.

The name of created VLAN interface follows the format:

  evmlx.d1.d2, where d1 is VF interface ifindex, d2 - VLAN ifindex

Implementation limitations:

- mask in rules is ignored, rule must specify VLAN tags exactly,
  no wildcards (which are implemented by the masks) are allowed

- virtual environment is detected via rte_hypervisor() call,
  and the type of hypervisor is checked. Currently we engage
  the workaround for ESXi and unrecognized hypervisors (which
  always happen on platforms other than x86 - it means workaround
  applied for the Flow over PCI VF). There are no confirmed data
  the other hypervisors (HyperV, Qemu) need this workaround,
  we are trying to reduce the list of configurations on those
  workaround should be applied.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
drivers/net/mlx5/mlx5.c
drivers/net/mlx5/mlx5.h
drivers/net/mlx5/mlx5_flow.c
drivers/net/mlx5/mlx5_flow.h
drivers/net/mlx5/mlx5_flow_dv.c
drivers/net/mlx5/mlx5_flow_verbs.c
drivers/net/mlx5/mlx5_nl.c

index 3a345c7..f5bc31f 100644 (file)
@@ -843,6 +843,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
                close(priv->nl_socket_route);
        if (priv->nl_socket_rdma >= 0)
                close(priv->nl_socket_rdma);
+       if (priv->vmwa_context)
+               mlx5_vlan_vmwa_exit(priv->vmwa_context);
        if (priv->sh) {
                /*
                 * Free the shared context in last turn, because the cleanup
@@ -1990,6 +1992,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        mlx5_set_min_inline(spawn, &config);
        /* Store device configuration on private structure. */
        priv->config = config;
+       /* Create context for virtual machine VLAN workaround. */
+       priv->vmwa_context = mlx5_vlan_vmwa_init(eth_dev, spawn->ifindex);
        if (config.dv_flow_en) {
                err = mlx5_alloc_shared_dr(priv);
                if (err)
@@ -2016,6 +2020,8 @@ error:
                        close(priv->nl_socket_route);
                if (priv->nl_socket_rdma >= 0)
                        close(priv->nl_socket_rdma);
+               if (priv->vmwa_context)
+                       mlx5_vlan_vmwa_exit(priv->vmwa_context);
                if (own_domain_id)
                        claim_zero(rte_eth_switch_domain_free(priv->domain_id));
                rte_free(priv);
index e812374..caf2491 100644 (file)
@@ -355,6 +355,30 @@ enum mlx5_verbs_alloc_type {
        MLX5_VERBS_ALLOC_TYPE_RX_QUEUE,
 };
 
+/* VLAN netdev for VLAN workaround. */
+struct mlx5_vlan_dev {
+       uint32_t refcnt;
+       uint32_t ifindex; /**< Own interface index. */
+};
+
+/* Structure for VF VLAN workaround. */
+struct mlx5_vf_vlan {
+       uint32_t tag:12;
+       uint32_t created:1;
+};
+
+/*
+ * Array of VLAN devices created on the base of VF
+ * used for workaround in virtual environments.
+ */
+struct mlx5_vlan_vmwa_context {
+       int nl_socket;
+       uint32_t nl_sn;
+       uint32_t vf_ifindex;
+       struct rte_eth_dev *dev;
+       struct mlx5_vlan_dev vlan_dev[4096];
+};
+
 /**
  * Verbs allocator needs a context to know in the callback which kind of
  * resources it is allocating.
@@ -631,6 +655,7 @@ struct mlx5_priv {
        int nl_socket_route; /* Netlink socket (NETLINK_ROUTE). */
        uint32_t nl_sn; /* Netlink message sequence number. */
        LIST_HEAD(dbrpage, mlx5_devx_dbr_page) dbrpgs; /* Door-bell pages. */
+       struct mlx5_vlan_vmwa_context *vmwa_context; /* VLAN WA context. */
 #ifndef RTE_ARCH_64
        rte_spinlock_t uar_lock_cq; /* CQs share a common distinct UAR */
        rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
@@ -830,6 +855,14 @@ unsigned int mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex);
 int mlx5_nl_switch_info(int nl, unsigned int ifindex,
                        struct mlx5_switch_info *info);
 
+struct mlx5_vlan_vmwa_context *mlx5_vlan_vmwa_init(struct rte_eth_dev *dev,
+                                                  uint32_t ifindex);
+void mlx5_vlan_vmwa_exit(struct mlx5_vlan_vmwa_context *ctx);
+void mlx5_vlan_vmwa_release(struct rte_eth_dev *dev,
+                           struct mlx5_vf_vlan *vf_vlan);
+void mlx5_vlan_vmwa_acquire(struct rte_eth_dev *dev,
+                           struct mlx5_vf_vlan *vf_vlan);
+
 /* mlx5_devx_cmds.c */
 
 struct mlx5_devx_obj *mlx5_devx_cmd_flow_counter_alloc(struct ibv_context *ctx,
index 3d2d5fc..f40fee5 100644 (file)
@@ -1204,6 +1204,8 @@ mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
  *   Item specification.
  * @param[in] item_flags
  *   Bit-fields that holds the items detected until now.
+ * @param[in] dev
+ *   Ethernet device flow is being created on.
  * @param[out] error
  *   Pointer to error structure.
  *
@@ -1213,6 +1215,7 @@ mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
 int
 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
                             uint64_t item_flags,
+                            struct rte_eth_dev *dev,
                             struct rte_flow_error *error)
 {
        const struct rte_flow_item_vlan *spec = item->spec;
@@ -1247,6 +1250,25 @@ mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
                                        error);
        if (ret)
                return ret;
+       if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
+               struct mlx5_priv *priv = dev->data->dev_private;
+
+               if (priv->vmwa_context) {
+                       /*
+                        * Non-NULL context means we have a virtual machine
+                        * and SR-IOV enabled, we have to create VLAN interface
+                        * to make hypervisor to setup E-Switch vport
+                        * context correctly. We avoid creating the multiple
+                        * VLAN interfaces, so we cannot support VLAN tag mask.
+                        */
+                       return rte_flow_error_set(error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ITEM,
+                                                 item,
+                                                 "VLAN tag mask is not"
+                                                 " supported in virtual"
+                                                 " environment");
+               }
+       }
        if (spec) {
                vlan_tag = spec->tci;
                vlan_tag &= mask->tci;
index 24da74b..822ff36 100644 (file)
@@ -330,6 +330,8 @@ struct mlx5_flow_dv {
        /**< Pointer to the jump action resource. */
        struct mlx5_flow_dv_port_id_action_resource *port_id_action;
        /**< Pointer to port ID action resource. */
+       struct mlx5_vf_vlan vf_vlan;
+       /**< Structure for VF VLAN workaround. */
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
        void *actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS];
        /**< Action list. */
@@ -355,6 +357,8 @@ struct mlx5_flow_verbs {
        struct ibv_flow *flow; /**< Verbs flow pointer. */
        struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
        uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
+       struct mlx5_vf_vlan vf_vlan;
+       /**< Structure for VF VLAN workaround. */
 };
 
 /** Device flow structure. */
@@ -505,6 +509,7 @@ int mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
                                struct rte_flow_error *error);
 int mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
                                 uint64_t item_flags,
+                                struct rte_eth_dev *dev,
                                 struct rte_flow_error *error);
 int mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
                                  uint64_t item_flags,
index 59ef716..9c0a261 100644 (file)
@@ -2892,7 +2892,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
                        break;
                case RTE_FLOW_ITEM_TYPE_VLAN:
                        ret = mlx5_flow_validate_item_vlan(items, item_flags,
-                                                          error);
+                                                          dev, error);
                        if (ret < 0)
                                return ret;
                        last_item = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
@@ -3450,6 +3450,8 @@ flow_dv_translate_item_eth(void *matcher, void *key,
 /**
  * Add VLAN item to matcher and to the value.
  *
+ * @param[in, out] dev_flow
+ *   Flow descriptor.
  * @param[in, out] matcher
  *   Flow matcher.
  * @param[in, out] key
@@ -3460,7 +3462,8 @@ flow_dv_translate_item_eth(void *matcher, void *key,
  *   Item is inner pattern.
  */
 static void
-flow_dv_translate_item_vlan(void *matcher, void *key,
+flow_dv_translate_item_vlan(struct mlx5_flow *dev_flow,
+                           void *matcher, void *key,
                            const struct rte_flow_item *item,
                            int inner)
 {
@@ -3487,6 +3490,12 @@ flow_dv_translate_item_vlan(void *matcher, void *key,
                headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
                                         outer_headers);
                headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+               /*
+                * This is workaround, masks are not supported,
+                * and pre-validated.
+                */
+               dev_flow->dv.vf_vlan.tag =
+                       rte_be_to_cpu_16(vlan_v->tci) & 0x0fff;
        }
        tci_m = rte_be_to_cpu_16(vlan_m->tci);
        tci_v = rte_be_to_cpu_16(vlan_m->tci & vlan_v->tci);
@@ -4995,7 +5004,8 @@ cnt_err:
                                             MLX5_FLOW_LAYER_OUTER_L2;
                        break;
                case RTE_FLOW_ITEM_TYPE_VLAN:
-                       flow_dv_translate_item_vlan(match_mask, match_value,
+                       flow_dv_translate_item_vlan(dev_flow,
+                                                   match_mask, match_value,
                                                    items, tunnel);
                        matcher.priority = MLX5_PRIORITY_MAP_L2;
                        last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
@@ -5211,6 +5221,17 @@ flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
                                           "hardware refuses to create flow");
                        goto error;
                }
+               if (priv->vmwa_context &&
+                   dev_flow->dv.vf_vlan.tag &&
+                   !dev_flow->dv.vf_vlan.created) {
+                       /*
+                        * The rule contains the VLAN pattern.
+                        * For VF we are going to create VLAN
+                        * interface to make hypervisor set correct
+                        * e-Switch vport context.
+                        */
+                       mlx5_vlan_vmwa_acquire(dev, &dev_flow->dv.vf_vlan);
+               }
        }
        return 0;
 error:
@@ -5224,6 +5245,9 @@ error:
                                mlx5_hrxq_release(dev, dv->hrxq);
                        dv->hrxq = NULL;
                }
+               if (dev_flow->dv.vf_vlan.tag &&
+                   dev_flow->dv.vf_vlan.created)
+                       mlx5_vlan_vmwa_release(dev, &dev_flow->dv.vf_vlan);
        }
        rte_errno = err; /* Restore rte_errno. */
        return -rte_errno;
@@ -5424,6 +5448,9 @@ flow_dv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
                                mlx5_hrxq_release(dev, dv->hrxq);
                        dv->hrxq = NULL;
                }
+               if (dev_flow->dv.vf_vlan.tag &&
+                   dev_flow->dv.vf_vlan.created)
+                       mlx5_vlan_vmwa_release(dev, &dev_flow->dv.vf_vlan);
        }
 }
 
index fd6f2d5..c5b28e3 100644 (file)
@@ -391,6 +391,9 @@ flow_verbs_translate_item_vlan(struct mlx5_flow *dev_flow,
                flow_verbs_spec_add(&dev_flow->verbs, &eth, size);
        else
                flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
+       if (!tunnel)
+               dev_flow->verbs.vf_vlan.tag =
+                       rte_be_to_cpu_16(spec->tci) & 0x0fff;
 }
 
 /**
@@ -1054,7 +1057,7 @@ flow_verbs_validate(struct rte_eth_dev *dev,
                        break;
                case RTE_FLOW_ITEM_TYPE_VLAN:
                        ret = mlx5_flow_validate_item_vlan(items, item_flags,
-                                                          error);
+                                                          dev, error);
                        if (ret < 0)
                                return ret;
                        last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
@@ -1592,6 +1595,10 @@ flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
                                mlx5_hrxq_release(dev, verbs->hrxq);
                        verbs->hrxq = NULL;
                }
+               if (dev_flow->verbs.vf_vlan.tag &&
+                   dev_flow->verbs.vf_vlan.created) {
+                       mlx5_vlan_vmwa_release(dev, &dev_flow->verbs.vf_vlan);
+               }
        }
 }
 
@@ -1639,6 +1646,7 @@ static int
 flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
                 struct rte_flow_error *error)
 {
+       struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_flow_verbs *verbs;
        struct mlx5_flow *dev_flow;
        int err;
@@ -1688,6 +1696,17 @@ flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
                                           "hardware refuses to create flow");
                        goto error;
                }
+               if (priv->vmwa_context &&
+                   dev_flow->verbs.vf_vlan.tag &&
+                   !dev_flow->verbs.vf_vlan.created) {
+                       /*
+                        * The rule contains the VLAN pattern.
+                        * For VF we are going to create VLAN
+                        * interface to make hypervisor set correct
+                        * e-Switch vport context.
+                        */
+                       mlx5_vlan_vmwa_acquire(dev, &dev_flow->verbs.vf_vlan);
+               }
        }
        return 0;
 error:
@@ -1701,6 +1720,10 @@ error:
                                mlx5_hrxq_release(dev, verbs->hrxq);
                        verbs->hrxq = NULL;
                }
+               if (dev_flow->verbs.vf_vlan.tag &&
+                   dev_flow->verbs.vf_vlan.created) {
+                       mlx5_vlan_vmwa_release(dev, &dev_flow->verbs.vf_vlan);
+               }
        }
        rte_errno = err; /* Restore rte_errno. */
        return -rte_errno;
index 5773fa7..f0f57de 100644 (file)
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
+#include <stdalign.h>
 #include <string.h>
 #include <sys/socket.h>
 #include <unistd.h>
 
 #include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_hypervisor.h>
 
 #include "mlx5.h"
 #include "mlx5_utils.h"
@@ -28,6 +31,8 @@
 /* Receive buffer size for the Netlink socket */
 #define MLX5_RECV_BUF_SIZE 32768
 
+/** Parameters of VLAN devices created by driver. */
+#define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx"
 /*
  * Define NDA_RTA as defined in iproute2 sources.
  *
@@ -987,3 +992,292 @@ mlx5_nl_switch_info(int nl, unsigned int ifindex, struct mlx5_switch_info *info)
        }
        return ret;
 }
+
+/*
+ * Delete VLAN network device by ifindex.
+ *
+ * @param[in] tcf
+ *   Context object initialized by mlx5_vlan_vmwa_init().
+ * @param[in] ifindex
+ *   Interface index of network device to delete.
+ */
+static void
+mlx5_vlan_vmwa_delete(struct mlx5_vlan_vmwa_context *vmwa,
+                     uint32_t ifindex)
+{
+       int ret;
+       struct {
+               struct nlmsghdr nh;
+               struct ifinfomsg info;
+       } req = {
+               .nh = {
+                       .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+                       .nlmsg_type = RTM_DELLINK,
+                       .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+               },
+               .info = {
+                       .ifi_family = AF_UNSPEC,
+                       .ifi_index = ifindex,
+               },
+       };
+
+       if (ifindex) {
+               ++vmwa->nl_sn;
+               if (!vmwa->nl_sn)
+                       ++vmwa->nl_sn;
+               ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, vmwa->nl_sn);
+               if (ret >= 0)
+                       ret = mlx5_nl_recv(vmwa->nl_socket,
+                                          vmwa->nl_sn,
+                                          NULL, NULL);
+               if (ret < 0)
+                       DRV_LOG(WARNING, "netlink: error deleting"
+                                        " VLAN WA ifindex %u, %d",
+                                        ifindex, ret);
+       }
+}
+
+/* Set of subroutines to build Netlink message. */
+static struct nlattr *
+nl_msg_tail(struct nlmsghdr *nlh)
+{
+       return (struct nlattr *)
+               (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len));
+}
+
+static void
+nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen)
+{
+       struct nlattr *nla = nl_msg_tail(nlh);
+
+       nla->nla_type = type;
+       nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr) + alen);
+       nlh->nlmsg_len = NLMSG_ALIGN(nlh->nlmsg_len) + nla->nla_len;
+
+       if (alen)
+               memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen);
+}
+
+static struct nlattr *
+nl_attr_nest_start(struct nlmsghdr *nlh, int type)
+{
+       struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh);
+
+       nl_attr_put(nlh, type, NULL, 0);
+       return nest;
+}
+
+static void
+nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest)
+{
+       nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest;
+}
+
+/*
+ * Create network VLAN device with specified VLAN tag.
+ *
+ * @param[in] tcf
+ *   Context object initialized by mlx5_vlan_vmwa_init().
+ * @param[in] ifindex
+ *   Base network interface index.
+ * @param[in] tag
+ *   VLAN tag for VLAN network device to create.
+ */
+static uint32_t
+mlx5_vlan_vmwa_create(struct mlx5_vlan_vmwa_context *vmwa,
+                     uint32_t ifindex,
+                     uint16_t tag)
+{
+       struct nlmsghdr *nlh;
+       struct ifinfomsg *ifm;
+       char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32];
+
+       alignas(RTE_CACHE_LINE_SIZE)
+       uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+                   NLMSG_ALIGN(sizeof(struct ifinfomsg)) +
+                   NLMSG_ALIGN(sizeof(struct nlattr)) * 8 +
+                   NLMSG_ALIGN(sizeof(uint32_t)) +
+                   NLMSG_ALIGN(sizeof(name)) +
+                   NLMSG_ALIGN(sizeof("vlan")) +
+                   NLMSG_ALIGN(sizeof(uint32_t)) +
+                   NLMSG_ALIGN(sizeof(uint16_t)) + 16];
+       struct nlattr *na_info;
+       struct nlattr *na_vlan;
+       int ret;
+
+       memset(buf, 0, sizeof(buf));
+       ++vmwa->nl_sn;
+       if (!vmwa->nl_sn)
+               ++vmwa->nl_sn;
+       nlh = (struct nlmsghdr *)buf;
+       nlh->nlmsg_len = sizeof(struct nlmsghdr);
+       nlh->nlmsg_type = RTM_NEWLINK;
+       nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
+                          NLM_F_EXCL | NLM_F_ACK;
+       ifm = (struct ifinfomsg *)nl_msg_tail(nlh);
+       nlh->nlmsg_len += sizeof(struct ifinfomsg);
+       ifm->ifi_family = AF_UNSPEC;
+       ifm->ifi_type = 0;
+       ifm->ifi_index = 0;
+       ifm->ifi_flags = IFF_UP;
+       ifm->ifi_change = 0xffffffff;
+       nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex));
+       ret = snprintf(name, sizeof(name), "%s.%u.%u",
+                      MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag);
+       nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1);
+       na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO);
+       nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan"));
+       na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA);
+       nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag));
+       nl_attr_nest_end(nlh, na_vlan);
+       nl_attr_nest_end(nlh, na_info);
+       assert(sizeof(buf) >= nlh->nlmsg_len);
+       ret = mlx5_nl_send(vmwa->nl_socket, nlh, vmwa->nl_sn);
+       if (ret >= 0)
+               ret = mlx5_nl_recv(vmwa->nl_socket, vmwa->nl_sn, NULL, NULL);
+       if (ret < 0) {
+               DRV_LOG(WARNING,
+                       "netlink: VLAN %s create failure (%d)",
+                       name, ret);
+       }
+       // Try to get ifindex of created or pre-existing device.
+       ret = if_nametoindex(name);
+       if (!ret) {
+               DRV_LOG(WARNING,
+                       "VLAN %s failed to get index (%d)",
+                       name, errno);
+               return 0;
+       }
+       return ret;
+}
+
+/*
+ * Release VLAN network device, created for VM workaround.
+ *
+ * @param[in] dev
+ *   Ethernet device object, Netlink context provider.
+ * @param[in] vlan
+ *   Object representing the network device to release.
+ */
+void mlx5_vlan_vmwa_release(struct rte_eth_dev *dev,
+                           struct mlx5_vf_vlan *vlan)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_vlan_vmwa_context *vmwa = priv->vmwa_context;
+       struct mlx5_vlan_dev *vlan_dev = &vmwa->vlan_dev[0];
+
+       assert(vlan->created);
+       assert(priv->vmwa_context);
+       if (!vlan->created || !vmwa)
+               return;
+       vlan->created = 0;
+       assert(vlan_dev[vlan->tag].refcnt);
+       if (--vlan_dev[vlan->tag].refcnt == 0 &&
+           vlan_dev[vlan->tag].ifindex) {
+               mlx5_vlan_vmwa_delete(vmwa, vlan_dev[vlan->tag].ifindex);
+               vlan_dev[vlan->tag].ifindex = 0;
+       }
+}
+
+/**
+ * Acquire VLAN interface with specified tag for VM workaround.
+ *
+ * @param[in] dev
+ *   Ethernet device object, Netlink context provider.
+ * @param[in] vlan
+ *   Object representing the network device to acquire.
+ */
+void mlx5_vlan_vmwa_acquire(struct rte_eth_dev *dev,
+                           struct mlx5_vf_vlan *vlan)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_vlan_vmwa_context *vmwa = priv->vmwa_context;
+       struct mlx5_vlan_dev *vlan_dev = &vmwa->vlan_dev[0];
+
+       assert(!vlan->created);
+       assert(priv->vmwa_context);
+       if (vlan->created || !vmwa)
+               return;
+       if (vlan_dev[vlan->tag].refcnt == 0) {
+               assert(!vlan_dev[vlan->tag].ifindex);
+               vlan_dev[vlan->tag].ifindex =
+                       mlx5_vlan_vmwa_create(vmwa,
+                                             vmwa->vf_ifindex,
+                                             vlan->tag);
+       }
+       if (vlan_dev[vlan->tag].ifindex) {
+               vlan_dev[vlan->tag].refcnt++;
+               vlan->created = 1;
+       }
+}
+
+/*
+ * Create per ethernet device VLAN VM workaround context
+ */
+struct mlx5_vlan_vmwa_context *
+mlx5_vlan_vmwa_init(struct rte_eth_dev *dev,
+                   uint32_t ifindex)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_dev_config *config = &priv->config;
+       struct mlx5_vlan_vmwa_context *vmwa;
+       enum rte_hypervisor hv_type;
+
+       /* Do not engage workaround over PF. */
+       if (!config->vf)
+               return NULL;
+       /* Check whether there is desired virtual environment */
+       hv_type = rte_hypervisor_get();
+       switch (hv_type) {
+       case RTE_HYPERVISOR_UNKNOWN:
+       case RTE_HYPERVISOR_VMWARE:
+               /*
+                * The "white list" of configurations
+                * to engage the workaround.
+                */
+               break;
+       default:
+               /*
+                * The configuration is not found in the "white list".
+                * We should not engage the VLAN workaround.
+                */
+               return NULL;
+       }
+       vmwa = rte_zmalloc(__func__, sizeof(*vmwa), sizeof(uint32_t));
+       if (!vmwa) {
+               DRV_LOG(WARNING,
+                       "Can not allocate memory"
+                       " for VLAN workaround context");
+               return NULL;
+       }
+       vmwa->nl_socket = mlx5_nl_init(NETLINK_ROUTE);
+       if (vmwa->nl_socket < 0) {
+               DRV_LOG(WARNING,
+                       "Can not create Netlink socket"
+                       " for VLAN workaround context");
+               rte_free(vmwa);
+               return NULL;
+       }
+       vmwa->nl_sn = random();
+       vmwa->vf_ifindex = ifindex;
+       vmwa->dev = dev;
+       /* Cleanup for existing VLAN devices. */
+       return vmwa;
+}
+
+/*
+ * Destroy per ethernet device VLAN VM workaround context
+ */
+void mlx5_vlan_vmwa_exit(struct mlx5_vlan_vmwa_context *vmwa)
+{
+       unsigned int i;
+
+       /* Delete all remaining VLAN devices. */
+       for (i = 0; i < RTE_DIM(vmwa->vlan_dev); i++) {
+               if (vmwa->vlan_dev[i].ifindex)
+                       mlx5_vlan_vmwa_delete(vmwa, vmwa->vlan_dev[i].ifindex);
+       }
+       if (vmwa->nl_socket >= 0)
+               close(vmwa->nl_socket);
+       rte_free(vmwa);
+}