X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_nl.c;h=3e073c6e29a5df12884dc7bc96c1d81b677327b7;hb=c9333f457eeb708375dd0c8592079da8cfc174cd;hp=5773fa75c65739fb7d31710466323accbb41ef87;hpb=35b2d13fd6fdcbd191f2a30d74648faeb1186c65;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c index 5773fa75c6..3e073c6e29 100644 --- a/drivers/net/mlx5/mlx5_nl.c +++ b/drivers/net/mlx5/mlx5_nl.c @@ -12,11 +12,14 @@ #include #include #include +#include #include #include #include #include +#include +#include #include "mlx5.h" #include "mlx5_utils.h" @@ -28,6 +31,8 @@ /* Receive buffer size for the Netlink socket */ #define MLX5_RECV_BUF_SIZE 32768 +/** Parameters of VLAN devices created by driver. */ +#define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx" /* * Define NDA_RTA as defined in iproute2 sources. * @@ -85,12 +90,18 @@ struct mlx5_nl_mac_addr { int mac_n; /**< Number of addresses in the array. */ }; +#define MLX5_NL_CMD_GET_IB_NAME (1 << 0) +#define MLX5_NL_CMD_GET_IB_INDEX (1 << 1) +#define MLX5_NL_CMD_GET_NET_INDEX (1 << 2) +#define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3) + /** Data structure used by mlx5_nl_cmdget_cb(). */ struct mlx5_nl_ifindex_data { const char *name; /**< IB device name (in). */ + uint32_t flags; /**< found attribute flags (out). */ uint32_t ibindex; /**< IB device index (out). */ uint32_t ifindex; /**< Network interface index (out). */ - uint32_t portnum; /**< IB device max port number. */ + uint32_t portnum; /**< IB device max port number (out). */ }; /** @@ -699,11 +710,10 @@ static int mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg) { struct mlx5_nl_ifindex_data *data = arg; + struct mlx5_nl_ifindex_data local = { + .flags = 0, + }; size_t off = NLMSG_HDRLEN; - uint32_t ibindex = 0; - uint32_t ifindex = 0; - uint32_t portnum = 0; - int found = 0; if (nh->nlmsg_type != RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) && @@ -718,27 +728,37 @@ mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg) goto error; switch (na->nla_type) { case RDMA_NLDEV_ATTR_DEV_INDEX: - ibindex = *(uint32_t *)payload; + local.ibindex = *(uint32_t *)payload; + local.flags |= MLX5_NL_CMD_GET_IB_INDEX; break; case RDMA_NLDEV_ATTR_DEV_NAME: if (!strcmp(payload, data->name)) - found = 1; + local.flags |= MLX5_NL_CMD_GET_IB_NAME; break; case RDMA_NLDEV_ATTR_NDEV_INDEX: - ifindex = *(uint32_t *)payload; + local.ifindex = *(uint32_t *)payload; + local.flags |= MLX5_NL_CMD_GET_NET_INDEX; break; case RDMA_NLDEV_ATTR_PORT_INDEX: - portnum = *(uint32_t *)payload; + local.portnum = *(uint32_t *)payload; + local.flags |= MLX5_NL_CMD_GET_PORT_INDEX; break; default: break; } off += NLA_ALIGN(na->nla_len); } - if (found) { - data->ibindex = ibindex; - data->ifindex = ifindex; - data->portnum = portnum; + /* + * It is possible to have multiple messages for all + * Infiniband devices in the system with appropriate name. + * So we should gather parameters locally and copy to + * query context only in case of coinciding device name. + */ + if (local.flags & MLX5_NL_CMD_GET_IB_NAME) { + data->flags = local.flags; + data->ibindex = local.ibindex; + data->ifindex = local.ifindex; + data->portnum = local.portnum; } return 0; error: @@ -769,6 +789,7 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex) uint32_t seq = random(); struct mlx5_nl_ifindex_data data = { .name = name, + .flags = 0, .ibindex = 0, /* Determined during first pass. */ .ifindex = 0, /* Determined during second pass. */ }; @@ -794,8 +815,10 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex) ret = mlx5_nl_recv(nl, seq, mlx5_nl_cmdget_cb, &data); if (ret < 0) return 0; - if (!data.ibindex) + if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || + !(data.flags & MLX5_NL_CMD_GET_IB_INDEX)) goto error; + data.flags = 0; ++seq; req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET); @@ -817,7 +840,10 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex) ret = mlx5_nl_recv(nl, seq, mlx5_nl_cmdget_cb, &data); if (ret < 0) return 0; - if (!data.ifindex) + if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || + !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || + !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) || + !data.ifindex) goto error; return data.ifindex; error: @@ -842,8 +868,8 @@ mlx5_nl_portnum(int nl, const char *name) { uint32_t seq = random(); struct mlx5_nl_ifindex_data data = { + .flags = 0, .name = name, - .ibindex = 0, .ifindex = 0, .portnum = 0, }; @@ -861,7 +887,9 @@ mlx5_nl_portnum(int nl, const char *name) ret = mlx5_nl_recv(nl, seq, mlx5_nl_cmdget_cb, &data); if (ret < 0) return 0; - if (!data.ibindex) { + if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || + !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || + !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) { rte_errno = ENODEV; return 0; } @@ -987,3 +1015,292 @@ mlx5_nl_switch_info(int nl, unsigned int ifindex, struct mlx5_switch_info *info) } return ret; } + +/* + * Delete VLAN network device by ifindex. + * + * @param[in] tcf + * Context object initialized by mlx5_vlan_vmwa_init(). + * @param[in] ifindex + * Interface index of network device to delete. + */ +static void +mlx5_vlan_vmwa_delete(struct mlx5_vlan_vmwa_context *vmwa, + uint32_t ifindex) +{ + int ret; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + } req = { + .nh = { + .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlmsg_type = RTM_DELLINK, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + }, + .info = { + .ifi_family = AF_UNSPEC, + .ifi_index = ifindex, + }, + }; + + if (ifindex) { + ++vmwa->nl_sn; + if (!vmwa->nl_sn) + ++vmwa->nl_sn; + ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, vmwa->nl_sn); + if (ret >= 0) + ret = mlx5_nl_recv(vmwa->nl_socket, + vmwa->nl_sn, + NULL, NULL); + if (ret < 0) + DRV_LOG(WARNING, "netlink: error deleting" + " VLAN WA ifindex %u, %d", + ifindex, ret); + } +} + +/* Set of subroutines to build Netlink message. */ +static struct nlattr * +nl_msg_tail(struct nlmsghdr *nlh) +{ + return (struct nlattr *) + (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); +} + +static void +nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen) +{ + struct nlattr *nla = nl_msg_tail(nlh); + + nla->nla_type = type; + nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr) + alen); + nlh->nlmsg_len = NLMSG_ALIGN(nlh->nlmsg_len) + nla->nla_len; + + if (alen) + memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen); +} + +static struct nlattr * +nl_attr_nest_start(struct nlmsghdr *nlh, int type) +{ + struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh); + + nl_attr_put(nlh, type, NULL, 0); + return nest; +} + +static void +nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest) +{ + nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest; +} + +/* + * Create network VLAN device with specified VLAN tag. + * + * @param[in] tcf + * Context object initialized by mlx5_vlan_vmwa_init(). + * @param[in] ifindex + * Base network interface index. + * @param[in] tag + * VLAN tag for VLAN network device to create. + */ +static uint32_t +mlx5_vlan_vmwa_create(struct mlx5_vlan_vmwa_context *vmwa, + uint32_t ifindex, + uint16_t tag) +{ + struct nlmsghdr *nlh; + struct ifinfomsg *ifm; + char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32]; + + alignas(RTE_CACHE_LINE_SIZE) + uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + NLMSG_ALIGN(sizeof(struct nlattr)) * 8 + + NLMSG_ALIGN(sizeof(uint32_t)) + + NLMSG_ALIGN(sizeof(name)) + + NLMSG_ALIGN(sizeof("vlan")) + + NLMSG_ALIGN(sizeof(uint32_t)) + + NLMSG_ALIGN(sizeof(uint16_t)) + 16]; + struct nlattr *na_info; + struct nlattr *na_vlan; + int ret; + + memset(buf, 0, sizeof(buf)); + ++vmwa->nl_sn; + if (!vmwa->nl_sn) + ++vmwa->nl_sn; + nlh = (struct nlmsghdr *)buf; + nlh->nlmsg_len = sizeof(struct nlmsghdr); + nlh->nlmsg_type = RTM_NEWLINK; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_EXCL | NLM_F_ACK; + ifm = (struct ifinfomsg *)nl_msg_tail(nlh); + nlh->nlmsg_len += sizeof(struct ifinfomsg); + ifm->ifi_family = AF_UNSPEC; + ifm->ifi_type = 0; + ifm->ifi_index = 0; + ifm->ifi_flags = IFF_UP; + ifm->ifi_change = 0xffffffff; + nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex)); + ret = snprintf(name, sizeof(name), "%s.%u.%u", + MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag); + nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1); + na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO); + nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan")); + na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA); + nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag)); + nl_attr_nest_end(nlh, na_vlan); + nl_attr_nest_end(nlh, na_info); + assert(sizeof(buf) >= nlh->nlmsg_len); + ret = mlx5_nl_send(vmwa->nl_socket, nlh, vmwa->nl_sn); + if (ret >= 0) + ret = mlx5_nl_recv(vmwa->nl_socket, vmwa->nl_sn, NULL, NULL); + if (ret < 0) { + DRV_LOG(WARNING, + "netlink: VLAN %s create failure (%d)", + name, ret); + } + // Try to get ifindex of created or pre-existing device. + ret = if_nametoindex(name); + if (!ret) { + DRV_LOG(WARNING, + "VLAN %s failed to get index (%d)", + name, errno); + return 0; + } + return ret; +} + +/* + * Release VLAN network device, created for VM workaround. + * + * @param[in] dev + * Ethernet device object, Netlink context provider. + * @param[in] vlan + * Object representing the network device to release. + */ +void mlx5_vlan_vmwa_release(struct rte_eth_dev *dev, + struct mlx5_vf_vlan *vlan) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_vlan_vmwa_context *vmwa = priv->vmwa_context; + struct mlx5_vlan_dev *vlan_dev = &vmwa->vlan_dev[0]; + + assert(vlan->created); + assert(priv->vmwa_context); + if (!vlan->created || !vmwa) + return; + vlan->created = 0; + assert(vlan_dev[vlan->tag].refcnt); + if (--vlan_dev[vlan->tag].refcnt == 0 && + vlan_dev[vlan->tag].ifindex) { + mlx5_vlan_vmwa_delete(vmwa, vlan_dev[vlan->tag].ifindex); + vlan_dev[vlan->tag].ifindex = 0; + } +} + +/** + * Acquire VLAN interface with specified tag for VM workaround. + * + * @param[in] dev + * Ethernet device object, Netlink context provider. + * @param[in] vlan + * Object representing the network device to acquire. + */ +void mlx5_vlan_vmwa_acquire(struct rte_eth_dev *dev, + struct mlx5_vf_vlan *vlan) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_vlan_vmwa_context *vmwa = priv->vmwa_context; + struct mlx5_vlan_dev *vlan_dev = &vmwa->vlan_dev[0]; + + assert(!vlan->created); + assert(priv->vmwa_context); + if (vlan->created || !vmwa) + return; + if (vlan_dev[vlan->tag].refcnt == 0) { + assert(!vlan_dev[vlan->tag].ifindex); + vlan_dev[vlan->tag].ifindex = + mlx5_vlan_vmwa_create(vmwa, + vmwa->vf_ifindex, + vlan->tag); + } + if (vlan_dev[vlan->tag].ifindex) { + vlan_dev[vlan->tag].refcnt++; + vlan->created = 1; + } +} + +/* + * Create per ethernet device VLAN VM workaround context + */ +struct mlx5_vlan_vmwa_context * +mlx5_vlan_vmwa_init(struct rte_eth_dev *dev, + uint32_t ifindex) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_dev_config *config = &priv->config; + struct mlx5_vlan_vmwa_context *vmwa; + enum rte_hypervisor hv_type; + + /* Do not engage workaround over PF. */ + if (!config->vf) + return NULL; + /* Check whether there is desired virtual environment */ + hv_type = rte_hypervisor_get(); + switch (hv_type) { + case RTE_HYPERVISOR_UNKNOWN: + case RTE_HYPERVISOR_VMWARE: + /* + * The "white list" of configurations + * to engage the workaround. + */ + break; + default: + /* + * The configuration is not found in the "white list". + * We should not engage the VLAN workaround. + */ + return NULL; + } + vmwa = rte_zmalloc(__func__, sizeof(*vmwa), sizeof(uint32_t)); + if (!vmwa) { + DRV_LOG(WARNING, + "Can not allocate memory" + " for VLAN workaround context"); + return NULL; + } + vmwa->nl_socket = mlx5_nl_init(NETLINK_ROUTE); + if (vmwa->nl_socket < 0) { + DRV_LOG(WARNING, + "Can not create Netlink socket" + " for VLAN workaround context"); + rte_free(vmwa); + return NULL; + } + vmwa->nl_sn = random(); + vmwa->vf_ifindex = ifindex; + vmwa->dev = dev; + /* Cleanup for existing VLAN devices. */ + return vmwa; +} + +/* + * Destroy per ethernet device VLAN VM workaround context + */ +void mlx5_vlan_vmwa_exit(struct mlx5_vlan_vmwa_context *vmwa) +{ + unsigned int i; + + /* Delete all remaining VLAN devices. */ + for (i = 0; i < RTE_DIM(vmwa->vlan_dev); i++) { + if (vmwa->vlan_dev[i].ifindex) + mlx5_vlan_vmwa_delete(vmwa, vmwa->vlan_dev[i].ifindex); + } + if (vmwa->nl_socket >= 0) + close(vmwa->nl_socket); + rte_free(vmwa); +}