X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_nl.c;h=e7ba03471d04786684322ba30ba7016b6bc459d1;hb=c3e33304a7f6;hp=8a10109f9b5a8f8687c0dc1a3ae60efb0360b7a2;hpb=b2f3a38101255ac6970eb055831588091bd7aba2;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c index 8a10109f9b..e7ba03471d 100644 --- a/drivers/net/mlx5/mlx5_nl.c +++ b/drivers/net/mlx5/mlx5_nl.c @@ -12,11 +12,14 @@ #include #include #include +#include #include #include #include #include +#include +#include #include "mlx5.h" #include "mlx5_utils.h" @@ -28,6 +31,8 @@ /* Receive buffer size for the Netlink socket */ #define MLX5_RECV_BUF_SIZE 32768 +/** Parameters of VLAN devices created by driver. */ +#define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx" /* * Define NDA_RTA as defined in iproute2 sources. * @@ -37,7 +42,15 @@ #define MLX5_NDA_RTA(r) \ ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) #endif - +/* + * Define NLMSG_TAIL as defined in iproute2 sources. + * + * see in iproute2 sources file include/libnetlink.h + */ +#ifndef NLMSG_TAIL +#define NLMSG_TAIL(nmsg) \ + ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) +#endif /* * The following definitions are normally found in rdma/rdma_netlink.h, * however they are so recent that most systems do not expose them yet. @@ -65,6 +78,12 @@ #endif /* These are normally found in linux/if_link.h. */ +#ifndef HAVE_IFLA_NUM_VF +#define IFLA_NUM_VF 21 +#endif +#ifndef HAVE_IFLA_EXT_MASK +#define IFLA_EXT_MASK 29 +#endif #ifndef HAVE_IFLA_PHYS_SWITCH_ID #define IFLA_PHYS_SWITCH_ID 36 #endif @@ -74,16 +93,23 @@ /* Add/remove MAC address through Netlink */ struct mlx5_nl_mac_addr { - struct ether_addr (*mac)[]; + struct rte_ether_addr (*mac)[]; /**< MAC address handled by the device. */ int mac_n; /**< Number of addresses in the array. */ }; -/** Data structure used by mlx5_nl_ifindex_cb(). */ +#define MLX5_NL_CMD_GET_IB_NAME (1 << 0) +#define MLX5_NL_CMD_GET_IB_INDEX (1 << 1) +#define MLX5_NL_CMD_GET_NET_INDEX (1 << 2) +#define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3) + +/** Data structure used by mlx5_nl_cmdget_cb(). */ struct mlx5_nl_ifindex_data { const char *name; /**< IB device name (in). */ + uint32_t flags; /**< found attribute flags (out). */ uint32_t ibindex; /**< IB device index (out). */ uint32_t ifindex; /**< Network interface index (out). */ + uint32_t portnum; /**< IB device max port number (out). */ }; /** @@ -333,11 +359,11 @@ mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg) #ifndef NDEBUG char m[18]; - ether_format_addr(m, 18, RTA_DATA(attribute)); + rte_ether_format_addr(m, 18, RTA_DATA(attribute)); DRV_LOG(DEBUG, "bridge MAC address %s", m); #endif memcpy(&(*data->mac)[data->mac_n++], - RTA_DATA(attribute), ETHER_ADDR_LEN); + RTA_DATA(attribute), RTE_ETHER_ADDR_LEN); } } return 0; @@ -358,7 +384,7 @@ mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg) * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_nl_mac_addr_list(struct rte_eth_dev *dev, struct ether_addr (*mac)[], +mlx5_nl_mac_addr_list(struct rte_eth_dev *dev, struct rte_ether_addr (*mac)[], int *mac_n) { struct mlx5_priv *priv = dev->data->dev_private; @@ -417,7 +443,7 @@ error: * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_nl_mac_addr_modify(struct rte_eth_dev *dev, struct ether_addr *mac, +mlx5_nl_mac_addr_modify(struct rte_eth_dev *dev, struct rte_ether_addr *mac, int add) { struct mlx5_priv *priv = dev->data->dev_private; @@ -426,7 +452,7 @@ mlx5_nl_mac_addr_modify(struct rte_eth_dev *dev, struct ether_addr *mac, struct nlmsghdr hdr; struct ndmsg ndm; struct rtattr rta; - uint8_t buffer[ETHER_ADDR_LEN]; + uint8_t buffer[RTE_ETHER_ADDR_LEN]; } req = { .hdr = { .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), @@ -442,7 +468,7 @@ mlx5_nl_mac_addr_modify(struct rte_eth_dev *dev, struct ether_addr *mac, }, .rta = { .rta_type = NDA_LLADDR, - .rta_len = RTA_LENGTH(ETHER_ADDR_LEN), + .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN), }, }; int fd; @@ -452,7 +478,7 @@ mlx5_nl_mac_addr_modify(struct rte_eth_dev *dev, struct ether_addr *mac, if (priv->nl_socket_route == -1) return 0; fd = priv->nl_socket_route; - memcpy(RTA_DATA(&req.rta), mac, ETHER_ADDR_LEN); + memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN); req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + RTA_ALIGN(req.rta.rta_len); ret = mlx5_nl_send(fd, &req.hdr, sn); @@ -475,6 +501,94 @@ error: return -rte_errno; } +/** + * Modify the VF MAC address neighbour table with Netlink. + * + * @param dev + * Pointer to Ethernet device. + * @param mac + * MAC address to consider. + * @param vf_index + * VF index. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_nl_vf_mac_addr_modify(struct rte_eth_dev *dev, + struct rte_ether_addr *mac, int vf_index) +{ + int fd, ret; + struct mlx5_priv *priv = dev->data->dev_private; + unsigned int iface_idx = mlx5_ifindex(dev); + struct { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + struct rtattr vf_list_rta; + struct rtattr vf_info_rta; + struct rtattr vf_mac_rta; + struct ifla_vf_mac ivm; + } req = { + .hdr = { + .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + .nlmsg_type = RTM_BASE, + }, + .ifm = { + .ifi_index = iface_idx, + }, + .vf_list_rta = { + .rta_type = IFLA_VFINFO_LIST, + .rta_len = RTA_ALIGN(RTA_LENGTH(0)), + }, + .vf_info_rta = { + .rta_type = IFLA_VF_INFO, + .rta_len = RTA_ALIGN(RTA_LENGTH(0)), + }, + .vf_mac_rta = { + .rta_type = IFLA_VF_MAC, + }, + }; + uint32_t sn = priv->nl_sn++; + struct ifla_vf_mac ivm = { + .vf = vf_index, + }; + + memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN); + memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm)); + + req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm)); + req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + + RTA_ALIGN(req.vf_list_rta.rta_len) + + RTA_ALIGN(req.vf_info_rta.rta_len) + + RTA_ALIGN(req.vf_mac_rta.rta_len); + req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), + &req.vf_list_rta); + req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), + &req.vf_info_rta); + + fd = priv->nl_socket_route; + if (fd < 0) + return -1; + ret = mlx5_nl_send(fd, &req.hdr, sn); + if (ret < 0) + goto error; + ret = mlx5_nl_recv(fd, sn, NULL, NULL); + if (ret < 0) + goto error; + return 0; +error: + DRV_LOG(ERR, + "representor %u cannot set VF MAC address " + "%02X:%02X:%02X:%02X:%02X:%02X : %s", + vf_index, + mac->addr_bytes[0], mac->addr_bytes[1], + mac->addr_bytes[2], mac->addr_bytes[3], + mac->addr_bytes[4], mac->addr_bytes[5], + strerror(rte_errno)); + return -rte_errno; +} + /** * Add a MAC address. * @@ -489,7 +603,7 @@ error: * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_nl_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac, +mlx5_nl_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac, uint32_t index) { struct mlx5_priv *priv = dev->data->dev_private; @@ -517,7 +631,7 @@ mlx5_nl_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac, * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_nl_mac_addr_remove(struct rte_eth_dev *dev, struct ether_addr *mac, +mlx5_nl_mac_addr_remove(struct rte_eth_dev *dev, struct rte_ether_addr *mac, uint32_t index) { struct mlx5_priv *priv = dev->data->dev_private; @@ -535,7 +649,7 @@ mlx5_nl_mac_addr_remove(struct rte_eth_dev *dev, struct ether_addr *mac, void mlx5_nl_mac_addr_sync(struct rte_eth_dev *dev) { - struct ether_addr macs[MLX5_MAX_MAC_ADDRESSES]; + struct rte_ether_addr macs[MLX5_MAX_MAC_ADDRESSES]; int macs_n = 0; int i; int ret; @@ -548,14 +662,14 @@ mlx5_nl_mac_addr_sync(struct rte_eth_dev *dev) /* Verify the address is not in the array yet. */ for (j = 0; j != MLX5_MAX_MAC_ADDRESSES; ++j) - if (is_same_ether_addr(&macs[i], + if (rte_is_same_ether_addr(&macs[i], &dev->data->mac_addrs[j])) break; if (j != MLX5_MAX_MAC_ADDRESSES) continue; /* Find the first entry available. */ for (j = 0; j != MLX5_MAX_MAC_ADDRESSES; ++j) { - if (is_zero_ether_addr(&dev->data->mac_addrs[j])) { + if (rte_is_zero_ether_addr(&dev->data->mac_addrs[j])) { dev->data->mac_addrs[j] = macs[i]; break; } @@ -576,7 +690,7 @@ mlx5_nl_mac_addr_flush(struct rte_eth_dev *dev) int i; for (i = MLX5_MAX_MAC_ADDRESSES - 1; i >= 0; --i) { - struct ether_addr *m = &dev->data->mac_addrs[i]; + struct rte_ether_addr *m = &dev->data->mac_addrs[i]; if (BITFIELD_ISSET(priv->mac_own, i)) mlx5_nl_mac_addr_remove(dev, m, i); @@ -689,13 +803,13 @@ mlx5_nl_allmulti(struct rte_eth_dev *dev, int enable) * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_nl_ifindex_cb(struct nlmsghdr *nh, void *arg) +mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg) { struct mlx5_nl_ifindex_data *data = arg; + struct mlx5_nl_ifindex_data local = { + .flags = 0, + }; size_t off = NLMSG_HDRLEN; - uint32_t ibindex = 0; - uint32_t ifindex = 0; - int found = 0; if (nh->nlmsg_type != RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) && @@ -710,23 +824,37 @@ mlx5_nl_ifindex_cb(struct nlmsghdr *nh, void *arg) goto error; switch (na->nla_type) { case RDMA_NLDEV_ATTR_DEV_INDEX: - ibindex = *(uint32_t *)payload; + local.ibindex = *(uint32_t *)payload; + local.flags |= MLX5_NL_CMD_GET_IB_INDEX; break; case RDMA_NLDEV_ATTR_DEV_NAME: if (!strcmp(payload, data->name)) - found = 1; + local.flags |= MLX5_NL_CMD_GET_IB_NAME; break; case RDMA_NLDEV_ATTR_NDEV_INDEX: - ifindex = *(uint32_t *)payload; + local.ifindex = *(uint32_t *)payload; + local.flags |= MLX5_NL_CMD_GET_NET_INDEX; + break; + case RDMA_NLDEV_ATTR_PORT_INDEX: + local.portnum = *(uint32_t *)payload; + local.flags |= MLX5_NL_CMD_GET_PORT_INDEX; break; default: break; } off += NLA_ALIGN(na->nla_len); } - if (found) { - data->ibindex = ibindex; - data->ifindex = ifindex; + /* + * It is possible to have multiple messages for all + * Infiniband devices in the system with appropriate name. + * So we should gather parameters locally and copy to + * query context only in case of coinciding device name. + */ + if (local.flags & MLX5_NL_CMD_GET_IB_NAME) { + data->flags = local.flags; + data->ibindex = local.ibindex; + data->ifindex = local.ifindex; + data->portnum = local.portnum; } return 0; error: @@ -745,18 +873,19 @@ error: * Netlink socket of the RDMA kind (NETLINK_RDMA). * @param[in] name * IB device name. - * + * @param[in] pindex + * IB device port index, starting from 1 * @return * A valid (nonzero) interface index on success, 0 otherwise and rte_errno * is set. */ unsigned int -mlx5_nl_ifindex(int nl, const char *name) +mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex) { - static const uint32_t pindex = 1; uint32_t seq = random(); struct mlx5_nl_ifindex_data data = { .name = name, + .flags = 0, .ibindex = 0, /* Determined during first pass. */ .ifindex = 0, /* Determined during second pass. */ }; @@ -779,11 +908,13 @@ mlx5_nl_ifindex(int nl, const char *name) ret = mlx5_nl_send(nl, &req.nh, seq); if (ret < 0) return 0; - ret = mlx5_nl_recv(nl, seq, mlx5_nl_ifindex_cb, &data); + ret = mlx5_nl_recv(nl, seq, mlx5_nl_cmdget_cb, &data); if (ret < 0) return 0; - if (!data.ibindex) + if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || + !(data.flags & MLX5_NL_CMD_GET_IB_INDEX)) goto error; + data.flags = 0; ++seq; req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET); @@ -802,10 +933,13 @@ mlx5_nl_ifindex(int nl, const char *name) ret = mlx5_nl_send(nl, &req.nh, seq); if (ret < 0) return 0; - ret = mlx5_nl_recv(nl, seq, mlx5_nl_ifindex_cb, &data); + ret = mlx5_nl_recv(nl, seq, mlx5_nl_cmdget_cb, &data); if (ret < 0) return 0; - if (!data.ifindex) + if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || + !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || + !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) || + !data.ifindex) goto error; return data.ifindex; error: @@ -813,6 +947,53 @@ error: return 0; } +/** + * Get the number of physical ports of given IB device. + * + * @param nl + * Netlink socket of the RDMA kind (NETLINK_RDMA). + * @param[in] name + * IB device name. + * + * @return + * A valid (nonzero) number of ports on success, 0 otherwise + * and rte_errno is set. + */ +unsigned int +mlx5_nl_portnum(int nl, const char *name) +{ + uint32_t seq = random(); + struct mlx5_nl_ifindex_data data = { + .flags = 0, + .name = name, + .ifindex = 0, + .portnum = 0, + }; + struct nlmsghdr req = { + .nlmsg_len = NLMSG_LENGTH(0), + .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NLDEV_CMD_GET), + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, + }; + int ret; + + ret = mlx5_nl_send(nl, &req, seq); + if (ret < 0) + return 0; + ret = mlx5_nl_recv(nl, seq, mlx5_nl_cmdget_cb, &data); + if (ret < 0) + return 0; + if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || + !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || + !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) { + rte_errno = ENODEV; + return 0; + } + if (!data.portnum) + rte_errno = EINVAL; + return data.portnum; +} + /** * Process switch information from Netlink message. * @@ -830,13 +1011,13 @@ mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) struct mlx5_switch_info info = { .master = 0, .representor = 0, - .port_name_new = 0, + .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, .port_name = 0, .switch_id = 0, }; size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); - bool port_name_set = false; bool switch_id_set = false; + bool num_vf_set = false; if (nh->nlmsg_type != RTM_NEWLINK) goto error; @@ -848,10 +1029,11 @@ mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) if (ra->rta_len > nh->nlmsg_len - off) goto error; switch (ra->rta_type) { + case IFLA_NUM_VF: + num_vf_set = true; + break; case IFLA_PHYS_PORT_NAME: - port_name_set = - mlx5_translate_port_name((char *)payload, - &info); + mlx5_translate_port_name((char *)payload, &info); break; case IFLA_PHYS_SWITCH_ID: info.switch_id = 0; @@ -864,8 +1046,11 @@ mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) } off += RTA_ALIGN(ra->rta_len); } - info.master = switch_id_set && !port_name_set; - info.representor = switch_id_set && port_name_set; + if (switch_id_set) { + /* We have some E-Switch configuration. */ + mlx5_nl_check_switch_info(num_vf_set, &info); + } + assert(!(info.master && info.representor)); memcpy(arg, &info, sizeof(info)); return 0; error: @@ -893,9 +1078,13 @@ mlx5_nl_switch_info(int nl, unsigned int ifindex, struct mlx5_switch_info *info) struct { struct nlmsghdr nh; struct ifinfomsg info; + struct rtattr rta; + uint32_t extmask; } req = { .nh = { - .nlmsg_len = NLMSG_LENGTH(sizeof(req.info)), + .nlmsg_len = NLMSG_LENGTH + (sizeof(req.info) + + RTA_LENGTH(sizeof(uint32_t))), .nlmsg_type = RTM_GETLINK, .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, }, @@ -903,11 +1092,311 @@ mlx5_nl_switch_info(int nl, unsigned int ifindex, struct mlx5_switch_info *info) .ifi_family = AF_UNSPEC, .ifi_index = ifindex, }, + .rta = { + .rta_type = IFLA_EXT_MASK, + .rta_len = RTA_LENGTH(sizeof(int32_t)), + }, + .extmask = RTE_LE32(1), }; int ret; ret = mlx5_nl_send(nl, &req.nh, seq); if (ret >= 0) ret = mlx5_nl_recv(nl, seq, mlx5_nl_switch_info_cb, info); + if (info->master && info->representor) { + DRV_LOG(ERR, "ifindex %u device is recognized as master" + " and as representor", ifindex); + rte_errno = ENODEV; + ret = -rte_errno; + } + return ret; +} + +/* + * Delete VLAN network device by ifindex. + * + * @param[in] tcf + * Context object initialized by mlx5_vlan_vmwa_init(). + * @param[in] ifindex + * Interface index of network device to delete. + */ +static void +mlx5_vlan_vmwa_delete(struct mlx5_vlan_vmwa_context *vmwa, + uint32_t ifindex) +{ + int ret; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + } req = { + .nh = { + .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlmsg_type = RTM_DELLINK, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + }, + .info = { + .ifi_family = AF_UNSPEC, + .ifi_index = ifindex, + }, + }; + + if (ifindex) { + ++vmwa->nl_sn; + if (!vmwa->nl_sn) + ++vmwa->nl_sn; + ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, vmwa->nl_sn); + if (ret >= 0) + ret = mlx5_nl_recv(vmwa->nl_socket, + vmwa->nl_sn, + NULL, NULL); + if (ret < 0) + DRV_LOG(WARNING, "netlink: error deleting" + " VLAN WA ifindex %u, %d", + ifindex, ret); + } +} + +/* Set of subroutines to build Netlink message. */ +static struct nlattr * +nl_msg_tail(struct nlmsghdr *nlh) +{ + return (struct nlattr *) + (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); +} + +static void +nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen) +{ + struct nlattr *nla = nl_msg_tail(nlh); + + nla->nla_type = type; + nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr) + alen); + nlh->nlmsg_len = NLMSG_ALIGN(nlh->nlmsg_len) + nla->nla_len; + + if (alen) + memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen); +} + +static struct nlattr * +nl_attr_nest_start(struct nlmsghdr *nlh, int type) +{ + struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh); + + nl_attr_put(nlh, type, NULL, 0); + return nest; +} + +static void +nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest) +{ + nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest; +} + +/* + * Create network VLAN device with specified VLAN tag. + * + * @param[in] tcf + * Context object initialized by mlx5_vlan_vmwa_init(). + * @param[in] ifindex + * Base network interface index. + * @param[in] tag + * VLAN tag for VLAN network device to create. + */ +static uint32_t +mlx5_vlan_vmwa_create(struct mlx5_vlan_vmwa_context *vmwa, + uint32_t ifindex, + uint16_t tag) +{ + struct nlmsghdr *nlh; + struct ifinfomsg *ifm; + char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32]; + + alignas(RTE_CACHE_LINE_SIZE) + uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + NLMSG_ALIGN(sizeof(struct nlattr)) * 8 + + NLMSG_ALIGN(sizeof(uint32_t)) + + NLMSG_ALIGN(sizeof(name)) + + NLMSG_ALIGN(sizeof("vlan")) + + NLMSG_ALIGN(sizeof(uint32_t)) + + NLMSG_ALIGN(sizeof(uint16_t)) + 16]; + struct nlattr *na_info; + struct nlattr *na_vlan; + int ret; + + memset(buf, 0, sizeof(buf)); + ++vmwa->nl_sn; + if (!vmwa->nl_sn) + ++vmwa->nl_sn; + nlh = (struct nlmsghdr *)buf; + nlh->nlmsg_len = sizeof(struct nlmsghdr); + nlh->nlmsg_type = RTM_NEWLINK; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_EXCL | NLM_F_ACK; + ifm = (struct ifinfomsg *)nl_msg_tail(nlh); + nlh->nlmsg_len += sizeof(struct ifinfomsg); + ifm->ifi_family = AF_UNSPEC; + ifm->ifi_type = 0; + ifm->ifi_index = 0; + ifm->ifi_flags = IFF_UP; + ifm->ifi_change = 0xffffffff; + nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex)); + ret = snprintf(name, sizeof(name), "%s.%u.%u", + MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag); + nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1); + na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO); + nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan")); + na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA); + nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag)); + nl_attr_nest_end(nlh, na_vlan); + nl_attr_nest_end(nlh, na_info); + assert(sizeof(buf) >= nlh->nlmsg_len); + ret = mlx5_nl_send(vmwa->nl_socket, nlh, vmwa->nl_sn); + if (ret >= 0) + ret = mlx5_nl_recv(vmwa->nl_socket, vmwa->nl_sn, NULL, NULL); + if (ret < 0) { + DRV_LOG(WARNING, + "netlink: VLAN %s create failure (%d)", + name, ret); + } + // Try to get ifindex of created or pre-existing device. + ret = if_nametoindex(name); + if (!ret) { + DRV_LOG(WARNING, + "VLAN %s failed to get index (%d)", + name, errno); + return 0; + } return ret; } + +/* + * Release VLAN network device, created for VM workaround. + * + * @param[in] dev + * Ethernet device object, Netlink context provider. + * @param[in] vlan + * Object representing the network device to release. + */ +void mlx5_vlan_vmwa_release(struct rte_eth_dev *dev, + struct mlx5_vf_vlan *vlan) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_vlan_vmwa_context *vmwa = priv->vmwa_context; + struct mlx5_vlan_dev *vlan_dev = &vmwa->vlan_dev[0]; + + assert(vlan->created); + assert(priv->vmwa_context); + if (!vlan->created || !vmwa) + return; + vlan->created = 0; + assert(vlan_dev[vlan->tag].refcnt); + if (--vlan_dev[vlan->tag].refcnt == 0 && + vlan_dev[vlan->tag].ifindex) { + mlx5_vlan_vmwa_delete(vmwa, vlan_dev[vlan->tag].ifindex); + vlan_dev[vlan->tag].ifindex = 0; + } +} + +/** + * Acquire VLAN interface with specified tag for VM workaround. + * + * @param[in] dev + * Ethernet device object, Netlink context provider. + * @param[in] vlan + * Object representing the network device to acquire. + */ +void mlx5_vlan_vmwa_acquire(struct rte_eth_dev *dev, + struct mlx5_vf_vlan *vlan) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_vlan_vmwa_context *vmwa = priv->vmwa_context; + struct mlx5_vlan_dev *vlan_dev = &vmwa->vlan_dev[0]; + + assert(!vlan->created); + assert(priv->vmwa_context); + if (vlan->created || !vmwa) + return; + if (vlan_dev[vlan->tag].refcnt == 0) { + assert(!vlan_dev[vlan->tag].ifindex); + vlan_dev[vlan->tag].ifindex = + mlx5_vlan_vmwa_create(vmwa, + vmwa->vf_ifindex, + vlan->tag); + } + if (vlan_dev[vlan->tag].ifindex) { + vlan_dev[vlan->tag].refcnt++; + vlan->created = 1; + } +} + +/* + * Create per ethernet device VLAN VM workaround context + */ +struct mlx5_vlan_vmwa_context * +mlx5_vlan_vmwa_init(struct rte_eth_dev *dev, + uint32_t ifindex) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_dev_config *config = &priv->config; + struct mlx5_vlan_vmwa_context *vmwa; + enum rte_hypervisor hv_type; + + /* Do not engage workaround over PF. */ + if (!config->vf) + return NULL; + /* Check whether there is desired virtual environment */ + hv_type = rte_hypervisor_get(); + switch (hv_type) { + case RTE_HYPERVISOR_UNKNOWN: + case RTE_HYPERVISOR_VMWARE: + /* + * The "white list" of configurations + * to engage the workaround. + */ + break; + default: + /* + * The configuration is not found in the "white list". + * We should not engage the VLAN workaround. + */ + return NULL; + } + vmwa = rte_zmalloc(__func__, sizeof(*vmwa), sizeof(uint32_t)); + if (!vmwa) { + DRV_LOG(WARNING, + "Can not allocate memory" + " for VLAN workaround context"); + return NULL; + } + vmwa->nl_socket = mlx5_nl_init(NETLINK_ROUTE); + if (vmwa->nl_socket < 0) { + DRV_LOG(WARNING, + "Can not create Netlink socket" + " for VLAN workaround context"); + rte_free(vmwa); + return NULL; + } + vmwa->nl_sn = random(); + vmwa->vf_ifindex = ifindex; + vmwa->dev = dev; + /* Cleanup for existing VLAN devices. */ + return vmwa; +} + +/* + * Destroy per ethernet device VLAN VM workaround context + */ +void mlx5_vlan_vmwa_exit(struct mlx5_vlan_vmwa_context *vmwa) +{ + unsigned int i; + + /* Delete all remaining VLAN devices. */ + for (i = 0; i < RTE_DIM(vmwa->vlan_dev); i++) { + if (vmwa->vlan_dev[i].ifindex) + mlx5_vlan_vmwa_delete(vmwa, vmwa->vlan_dev[i].ifindex); + } + if (vmwa->nl_socket >= 0) + close(vmwa->nl_socket); + rte_free(vmwa); +}