From: Matan Azrad Date: Wed, 29 Jan 2020 12:38:49 +0000 (+0000) Subject: common/mlx5: share Netlink commands X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=654810b56828eb5138a86db395e5cafcef3d70dd;p=dpdk.git common/mlx5: share Netlink commands Move Netlink mechanism and its dependencies from net/mlx5 to common/mlx5 in order to be ready to use by other mlx5 drivers. The dependencies are BITFIELD defines, the ppc64 compilation workaround for bool type and the function mlx5_translate_port_name. Update build mechanism accordingly. Signed-off-by: Matan Azrad Acked-by: Viacheslav Ovsiienko --- diff --git a/drivers/common/mlx5/Makefile b/drivers/common/mlx5/Makefile index b9e9803167..6a14b7d695 100644 --- a/drivers/common/mlx5/Makefile +++ b/drivers/common/mlx5/Makefile @@ -15,6 +15,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_glue.c endif SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_devx_cmds.c SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_common.c +SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c ifeq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y) INSTALL-$(CONFIG_RTE_LIBRTE_MLX5_PMD)-lib += $(LIB_GLUE) @@ -41,7 +42,7 @@ else LDLIBS += -libverbs -lmlx5 endif -LDLIBS += -lrte_eal -lrte_pci -lrte_kvargs +LDLIBS += -lrte_eal -lrte_pci -lrte_kvargs -lrte_net # A few warnings cannot be avoided in external headers. CFLAGS += -Wno-error=cast-qual -DNDEBUG -UPEDANTIC diff --git a/drivers/common/mlx5/meson.build b/drivers/common/mlx5/meson.build index b88822ef00..34cb7b9e0d 100644 --- a/drivers/common/mlx5/meson.build +++ b/drivers/common/mlx5/meson.build @@ -42,6 +42,7 @@ if build sources = files( 'mlx5_devx_cmds.c', 'mlx5_common.c', + 'mlx5_nl.c', ) if not pmd_dlopen sources += files('mlx5_glue.c') diff --git a/drivers/common/mlx5/mlx5_common.c b/drivers/common/mlx5/mlx5_common.c index 57d72b4af9..99d15cdd9f 100644 --- a/drivers/common/mlx5/mlx5_common.c +++ b/drivers/common/mlx5/mlx5_common.c @@ -105,6 +105,61 @@ mlx5_class_get(struct rte_devargs *devargs) return ret; } +/** + * Extract port name, as a number, from sysfs or netlink information. + * + * @param[in] port_name_in + * String representing the port name. + * @param[out] port_info_out + * Port information, including port name as a number and port name + * type if recognized + * + * @return + * port_name field set according to recognized name format. + */ +void +mlx5_translate_port_name(const char *port_name_in, + struct mlx5_switch_info *port_info_out) +{ + char pf_c1, pf_c2, vf_c1, vf_c2; + char *end; + int sc_items; + + /* + * Check for port-name as a string of the form pf0vf0 + * (support kernel ver >= 5.0 or OFED ver >= 4.6). + */ + sc_items = sscanf(port_name_in, "%c%c%d%c%c%d", + &pf_c1, &pf_c2, &port_info_out->pf_num, + &vf_c1, &vf_c2, &port_info_out->port_name); + if (sc_items == 6 && + pf_c1 == 'p' && pf_c2 == 'f' && + vf_c1 == 'v' && vf_c2 == 'f') { + port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF; + return; + } + /* + * Check for port-name as a string of the form p0 + * (support kernel ver >= 5.0, or OFED ver >= 4.6). + */ + sc_items = sscanf(port_name_in, "%c%d", + &pf_c1, &port_info_out->port_name); + if (sc_items == 2 && pf_c1 == 'p') { + port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; + return; + } + /* Check for port-name as a number (support kernel ver < 5.0 */ + errno = 0; + port_info_out->port_name = strtol(port_name_in, &end, 0); + if (!errno && + (size_t)(end - port_name_in) == strlen(port_name_in)) { + port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; + return; + } + port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; + return; +} + #ifdef RTE_IBVERBS_LINK_DLOPEN /** diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h index 2988f4b26a..d9c2d262a0 100644 --- a/drivers/common/mlx5/mlx5_common.h +++ b/drivers/common/mlx5/mlx5_common.h @@ -17,6 +17,35 @@ #include "mlx5_prm.h" +/* + * Compilation workaround for PPC64 when AltiVec is fully enabled, e.g. std=c11. + * Otherwise there would be a type conflict between stdbool and altivec. + */ +#if defined(__PPC64__) && !defined(__APPLE_ALTIVEC__) +#undef bool +/* redefine as in stdbool.h */ +#define bool _Bool +#endif + +/* Bit-field manipulation. */ +#define BITFIELD_DECLARE(bf, type, size) \ + type bf[(((size_t)(size) / (sizeof(type) * CHAR_BIT)) + \ + !!((size_t)(size) % (sizeof(type) * CHAR_BIT)))] +#define BITFIELD_DEFINE(bf, type, size) \ + BITFIELD_DECLARE((bf), type, (size)) = { 0 } +#define BITFIELD_SET(bf, b) \ + (assert((size_t)(b) < (sizeof(bf) * CHAR_BIT)), \ + (void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] |= \ + ((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT))))) +#define BITFIELD_RESET(bf, b) \ + (assert((size_t)(b) < (sizeof(bf) * CHAR_BIT)), \ + (void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] &= \ + ~((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT))))) +#define BITFIELD_ISSET(bf, b) \ + (assert((size_t)(b) < (sizeof(bf) * CHAR_BIT)), \ + !!(((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] & \ + ((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT)))))) + /* * Helper macros to work around __VA_ARGS__ limitations in a C99 compliant * manner. @@ -112,6 +141,33 @@ enum { PCI_DEVICE_ID_MELLANOX_CONNECTX6DXVF = 0x101e, }; +/* Maximum number of simultaneous unicast MAC addresses. */ +#define MLX5_MAX_UC_MAC_ADDRESSES 128 +/* Maximum number of simultaneous Multicast MAC addresses. */ +#define MLX5_MAX_MC_MAC_ADDRESSES 128 +/* Maximum number of simultaneous MAC addresses. */ +#define MLX5_MAX_MAC_ADDRESSES \ + (MLX5_MAX_UC_MAC_ADDRESSES + MLX5_MAX_MC_MAC_ADDRESSES) + +/* Recognized Infiniband device physical port name types. */ +enum mlx5_nl_phys_port_name_type { + MLX5_PHYS_PORT_NAME_TYPE_NOTSET = 0, /* Not set. */ + MLX5_PHYS_PORT_NAME_TYPE_LEGACY, /* before kernel ver < 5.0 */ + MLX5_PHYS_PORT_NAME_TYPE_UPLINK, /* p0, kernel ver >= 5.0 */ + MLX5_PHYS_PORT_NAME_TYPE_PFVF, /* pf0vf0, kernel ver >= 5.0 */ + MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN, /* Unrecognized. */ +}; + +/** Switch information returned by mlx5_nl_switch_info(). */ +struct mlx5_switch_info { + uint32_t master:1; /**< Master device. */ + uint32_t representor:1; /**< Representor device. */ + enum mlx5_nl_phys_port_name_type name_type; /** < Port name type. */ + int32_t pf_num; /**< PF number (valid for pfxvfx format only). */ + int32_t port_name; /**< Representor port name. */ + uint64_t switch_id; /**< Switch identifier. */ +}; + /* CQE status. */ enum mlx5_cqe_status { MLX5_CQE_STATUS_SW_OWN = -1, @@ -159,6 +215,9 @@ enum mlx5_class { MLX5_CLASS_VDPA, MLX5_CLASS_INVALID, }; + enum mlx5_class mlx5_class_get(struct rte_devargs *devargs); +void mlx5_translate_port_name(const char *port_name_in, + struct mlx5_switch_info *port_info_out); #endif /* RTE_PMD_MLX5_COMMON_H_ */ diff --git a/drivers/common/mlx5/mlx5_nl.c b/drivers/common/mlx5/mlx5_nl.c new file mode 100644 index 0000000000..31627436c9 --- /dev/null +++ b/drivers/common/mlx5/mlx5_nl.c @@ -0,0 +1,1336 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 6WIND S.A. + * Copyright 2018 Mellanox Technologies, Ltd + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mlx5_nl.h" +#include "mlx5_common_utils.h" + +/* Size of the buffer to receive kernel messages */ +#define MLX5_NL_BUF_SIZE (32 * 1024) +/* Send buffer size for the Netlink socket */ +#define MLX5_SEND_BUF_SIZE 32768 +/* Receive buffer size for the Netlink socket */ +#define MLX5_RECV_BUF_SIZE 32768 + +/** Parameters of VLAN devices created by driver. */ +#define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx" +/* + * Define NDA_RTA as defined in iproute2 sources. + * + * see in iproute2 sources file include/libnetlink.h + */ +#ifndef MLX5_NDA_RTA +#define MLX5_NDA_RTA(r) \ + ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) +#endif +/* + * Define NLMSG_TAIL as defined in iproute2 sources. + * + * see in iproute2 sources file include/libnetlink.h + */ +#ifndef NLMSG_TAIL +#define NLMSG_TAIL(nmsg) \ + ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) +#endif +/* + * The following definitions are normally found in rdma/rdma_netlink.h, + * however they are so recent that most systems do not expose them yet. + */ +#ifndef HAVE_RDMA_NL_NLDEV +#define RDMA_NL_NLDEV 5 +#endif +#ifndef HAVE_RDMA_NLDEV_CMD_GET +#define RDMA_NLDEV_CMD_GET 1 +#endif +#ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET +#define RDMA_NLDEV_CMD_PORT_GET 5 +#endif +#ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX +#define RDMA_NLDEV_ATTR_DEV_INDEX 1 +#endif +#ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME +#define RDMA_NLDEV_ATTR_DEV_NAME 2 +#endif +#ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX +#define RDMA_NLDEV_ATTR_PORT_INDEX 3 +#endif +#ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX +#define RDMA_NLDEV_ATTR_NDEV_INDEX 50 +#endif + +/* These are normally found in linux/if_link.h. */ +#ifndef HAVE_IFLA_NUM_VF +#define IFLA_NUM_VF 21 +#endif +#ifndef HAVE_IFLA_EXT_MASK +#define IFLA_EXT_MASK 29 +#endif +#ifndef HAVE_IFLA_PHYS_SWITCH_ID +#define IFLA_PHYS_SWITCH_ID 36 +#endif +#ifndef HAVE_IFLA_PHYS_PORT_NAME +#define IFLA_PHYS_PORT_NAME 38 +#endif + +/* Add/remove MAC address through Netlink */ +struct mlx5_nl_mac_addr { + struct rte_ether_addr (*mac)[]; + /**< MAC address handled by the device. */ + int mac_n; /**< Number of addresses in the array. */ +}; + +#define MLX5_NL_CMD_GET_IB_NAME (1 << 0) +#define MLX5_NL_CMD_GET_IB_INDEX (1 << 1) +#define MLX5_NL_CMD_GET_NET_INDEX (1 << 2) +#define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3) + +/** Data structure used by mlx5_nl_cmdget_cb(). */ +struct mlx5_nl_ifindex_data { + const char *name; /**< IB device name (in). */ + uint32_t flags; /**< found attribute flags (out). */ + uint32_t ibindex; /**< IB device index (out). */ + uint32_t ifindex; /**< Network interface index (out). */ + uint32_t portnum; /**< IB device max port number (out). */ +}; + +rte_atomic32_t atomic_sn = RTE_ATOMIC32_INIT(0); + +/* Generate Netlink sequence number. */ +#define MLX5_NL_SN_GENERATE ((uint32_t)rte_atomic32_add_return(&atomic_sn, 1)) + +/** + * Opens a Netlink socket. + * + * @param protocol + * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA). + * + * @return + * A file descriptor on success, a negative errno value otherwise and + * rte_errno is set. + */ +int +mlx5_nl_init(int protocol) +{ + int fd; + int sndbuf_size = MLX5_SEND_BUF_SIZE; + int rcvbuf_size = MLX5_RECV_BUF_SIZE; + struct sockaddr_nl local = { + .nl_family = AF_NETLINK, + }; + int ret; + + fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol); + if (fd == -1) { + rte_errno = errno; + return -rte_errno; + } + ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int)); + if (ret == -1) { + rte_errno = errno; + goto error; + } + ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int)); + if (ret == -1) { + rte_errno = errno; + goto error; + } + ret = bind(fd, (struct sockaddr *)&local, sizeof(local)); + if (ret == -1) { + rte_errno = errno; + goto error; + } + return fd; +error: + close(fd); + return -rte_errno; +} + +/** + * Send a request message to the kernel on the Netlink socket. + * + * @param[in] nlsk_fd + * Netlink socket file descriptor. + * @param[in] nh + * The Netlink message send to the kernel. + * @param[in] ssn + * Sequence number. + * @param[in] req + * Pointer to the request structure. + * @param[in] len + * Length of the request in bytes. + * + * @return + * The number of sent bytes on success, a negative errno value otherwise and + * rte_errno is set. + */ +static int +mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req, + int len) +{ + struct sockaddr_nl sa = { + .nl_family = AF_NETLINK, + }; + struct iovec iov[2] = { + { .iov_base = nh, .iov_len = sizeof(*nh), }, + { .iov_base = req, .iov_len = len, }, + }; + struct msghdr msg = { + .msg_name = &sa, + .msg_namelen = sizeof(sa), + .msg_iov = iov, + .msg_iovlen = 2, + }; + int send_bytes; + + nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ + nh->nlmsg_seq = sn; + send_bytes = sendmsg(nlsk_fd, &msg, 0); + if (send_bytes < 0) { + rte_errno = errno; + return -rte_errno; + } + return send_bytes; +} + +/** + * Send a message to the kernel on the Netlink socket. + * + * @param[in] nlsk_fd + * The Netlink socket file descriptor used for communication. + * @param[in] nh + * The Netlink message send to the kernel. + * @param[in] sn + * Sequence number. + * + * @return + * The number of sent bytes on success, a negative errno value otherwise and + * rte_errno is set. + */ +static int +mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn) +{ + struct sockaddr_nl sa = { + .nl_family = AF_NETLINK, + }; + struct iovec iov = { + .iov_base = nh, + .iov_len = nh->nlmsg_len, + }; + struct msghdr msg = { + .msg_name = &sa, + .msg_namelen = sizeof(sa), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + int send_bytes; + + nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ + nh->nlmsg_seq = sn; + send_bytes = sendmsg(nlsk_fd, &msg, 0); + if (send_bytes < 0) { + rte_errno = errno; + return -rte_errno; + } + return send_bytes; +} + +/** + * Receive a message from the kernel on the Netlink socket, following + * mlx5_nl_send(). + * + * @param[in] nlsk_fd + * The Netlink socket file descriptor used for communication. + * @param[in] sn + * Sequence number. + * @param[in] cb + * The callback function to call for each Netlink message received. + * @param[in, out] arg + * Custom arguments for the callback. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), + void *arg) +{ + struct sockaddr_nl sa; + char buf[MLX5_RECV_BUF_SIZE]; + struct iovec iov = { + .iov_base = buf, + .iov_len = sizeof(buf), + }; + struct msghdr msg = { + .msg_name = &sa, + .msg_namelen = sizeof(sa), + .msg_iov = &iov, + /* One message at a time */ + .msg_iovlen = 1, + }; + int multipart = 0; + int ret = 0; + + do { + struct nlmsghdr *nh; + int recv_bytes = 0; + + do { + recv_bytes = recvmsg(nlsk_fd, &msg, 0); + if (recv_bytes == -1) { + rte_errno = errno; + return -rte_errno; + } + nh = (struct nlmsghdr *)buf; + } while (nh->nlmsg_seq != sn); + for (; + NLMSG_OK(nh, (unsigned int)recv_bytes); + nh = NLMSG_NEXT(nh, recv_bytes)) { + if (nh->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err_data = NLMSG_DATA(nh); + + if (err_data->error < 0) { + rte_errno = -err_data->error; + return -rte_errno; + } + /* Ack message. */ + return 0; + } + /* Multi-part msgs and their trailing DONE message. */ + if (nh->nlmsg_flags & NLM_F_MULTI) { + if (nh->nlmsg_type == NLMSG_DONE) + return 0; + multipart = 1; + } + if (cb) { + ret = cb(nh, arg); + if (ret < 0) + return ret; + } + } + } while (multipart); + return ret; +} + +/** + * Parse Netlink message to retrieve the bridge MAC address. + * + * @param nh + * Pointer to Netlink Message Header. + * @param arg + * PMD data register with this callback. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg) +{ + struct mlx5_nl_mac_addr *data = arg; + struct ndmsg *r = NLMSG_DATA(nh); + struct rtattr *attribute; + int len; + + len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)); + for (attribute = MLX5_NDA_RTA(r); + RTA_OK(attribute, len); + attribute = RTA_NEXT(attribute, len)) { + if (attribute->rta_type == NDA_LLADDR) { + if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) { + DRV_LOG(WARNING, + "not enough room to finalize the" + " request"); + rte_errno = ENOMEM; + return -rte_errno; + } +#ifndef NDEBUG + char m[18]; + + rte_ether_format_addr(m, 18, RTA_DATA(attribute)); + DRV_LOG(DEBUG, "bridge MAC address %s", m); +#endif + memcpy(&(*data->mac)[data->mac_n++], + RTA_DATA(attribute), RTE_ETHER_ADDR_LEN); + } + } + return 0; +} + +/** + * Get bridge MAC addresses. + * + * @param[in] nlsk_fd + * Netlink socket file descriptor. + * @param[in] iface_idx + * Net device interface index. + * @param mac[out] + * Pointer to the array table of MAC addresses to fill. + * Its size should be of MLX5_MAX_MAC_ADDRESSES. + * @param mac_n[out] + * Number of entries filled in MAC array. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx, + struct rte_ether_addr (*mac)[], int *mac_n) +{ + struct { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + } req = { + .hdr = { + .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlmsg_type = RTM_GETNEIGH, + .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + }, + .ifm = { + .ifi_family = PF_BRIDGE, + .ifi_index = iface_idx, + }, + }; + struct mlx5_nl_mac_addr data = { + .mac = mac, + .mac_n = 0, + }; + uint32_t sn = MLX5_NL_SN_GENERATE; + int ret; + + if (nlsk_fd == -1) + return 0; + ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm, + sizeof(struct ifinfomsg)); + if (ret < 0) + goto error; + ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data); + if (ret < 0) + goto error; + *mac_n = data.mac_n; + return 0; +error: + DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s", + iface_idx, strerror(rte_errno)); + return -rte_errno; +} + +/** + * Modify the MAC address neighbour table with Netlink. + * + * @param[in] nlsk_fd + * Netlink socket file descriptor. + * @param[in] iface_idx + * Net device interface index. + * @param mac + * MAC address to consider. + * @param add + * 1 to add the MAC address, 0 to remove the MAC address. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, + struct rte_ether_addr *mac, int add) +{ + struct { + struct nlmsghdr hdr; + struct ndmsg ndm; + struct rtattr rta; + uint8_t buffer[RTE_ETHER_ADDR_LEN]; + } req = { + .hdr = { + .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), + .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_EXCL | NLM_F_ACK, + .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH, + }, + .ndm = { + .ndm_family = PF_BRIDGE, + .ndm_state = NUD_NOARP | NUD_PERMANENT, + .ndm_ifindex = iface_idx, + .ndm_flags = NTF_SELF, + }, + .rta = { + .rta_type = NDA_LLADDR, + .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN), + }, + }; + uint32_t sn = MLX5_NL_SN_GENERATE; + int ret; + + if (nlsk_fd == -1) + return 0; + memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN); + req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + + RTA_ALIGN(req.rta.rta_len); + ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); + if (ret < 0) + goto error; + ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); + if (ret < 0) + goto error; + return 0; +error: + DRV_LOG(DEBUG, + "Interface %u cannot %s MAC address" + " %02X:%02X:%02X:%02X:%02X:%02X %s", + iface_idx, + add ? "add" : "remove", + mac->addr_bytes[0], mac->addr_bytes[1], + mac->addr_bytes[2], mac->addr_bytes[3], + mac->addr_bytes[4], mac->addr_bytes[5], + strerror(rte_errno)); + return -rte_errno; +} + +/** + * Modify the VF MAC address neighbour table with Netlink. + * + * @param[in] nlsk_fd + * Netlink socket file descriptor. + * @param[in] iface_idx + * Net device interface index. + * @param mac + * MAC address to consider. + * @param vf_index + * VF index. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, + struct rte_ether_addr *mac, int vf_index) +{ + int ret; + struct { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + struct rtattr vf_list_rta; + struct rtattr vf_info_rta; + struct rtattr vf_mac_rta; + struct ifla_vf_mac ivm; + } req = { + .hdr = { + .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + .nlmsg_type = RTM_BASE, + }, + .ifm = { + .ifi_index = iface_idx, + }, + .vf_list_rta = { + .rta_type = IFLA_VFINFO_LIST, + .rta_len = RTA_ALIGN(RTA_LENGTH(0)), + }, + .vf_info_rta = { + .rta_type = IFLA_VF_INFO, + .rta_len = RTA_ALIGN(RTA_LENGTH(0)), + }, + .vf_mac_rta = { + .rta_type = IFLA_VF_MAC, + }, + }; + struct ifla_vf_mac ivm = { + .vf = vf_index, + }; + uint32_t sn = MLX5_NL_SN_GENERATE; + + memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN); + memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm)); + + req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm)); + req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + + RTA_ALIGN(req.vf_list_rta.rta_len) + + RTA_ALIGN(req.vf_info_rta.rta_len) + + RTA_ALIGN(req.vf_mac_rta.rta_len); + req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), + &req.vf_list_rta); + req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), + &req.vf_info_rta); + + if (nlsk_fd < 0) + return -1; + ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); + if (ret < 0) + goto error; + ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); + if (ret < 0) + goto error; + return 0; +error: + DRV_LOG(ERR, + "representor %u cannot set VF MAC address " + "%02X:%02X:%02X:%02X:%02X:%02X : %s", + vf_index, + mac->addr_bytes[0], mac->addr_bytes[1], + mac->addr_bytes[2], mac->addr_bytes[3], + mac->addr_bytes[4], mac->addr_bytes[5], + strerror(rte_errno)); + return -rte_errno; +} + +/** + * Add a MAC address. + * + * @param[in] nlsk_fd + * Netlink socket file descriptor. + * @param[in] iface_idx + * Net device interface index. + * @param mac_own + * BITFIELD_DECLARE array to store the mac. + * @param mac + * MAC address to register. + * @param index + * MAC address index. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, + uint64_t *mac_own, struct rte_ether_addr *mac, + uint32_t index) +{ + int ret; + + ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1); + if (!ret) + BITFIELD_SET(mac_own, index); + if (ret == -EEXIST) + return 0; + return ret; +} + +/** + * Remove a MAC address. + * + * @param[in] nlsk_fd + * Netlink socket file descriptor. + * @param[in] iface_idx + * Net device interface index. + * @param mac_own + * BITFIELD_DECLARE array to store the mac. + * @param mac + * MAC address to remove. + * @param index + * MAC address index. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own, + struct rte_ether_addr *mac, uint32_t index) +{ + BITFIELD_RESET(mac_own, index); + return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0); +} + +/** + * Synchronize Netlink bridge table to the internal table. + * + * @param[in] nlsk_fd + * Netlink socket file descriptor. + * @param[in] iface_idx + * Net device interface index. + * @param mac_addrs + * Mac addresses array to sync. + * @param n + * @p mac_addrs array size. + */ +void +mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx, + struct rte_ether_addr *mac_addrs, int n) +{ + struct rte_ether_addr macs[n]; + int macs_n = 0; + int i; + int ret; + + ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n); + if (ret) + return; + for (i = 0; i != macs_n; ++i) { + int j; + + /* Verify the address is not in the array yet. */ + for (j = 0; j != n; ++j) + if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j])) + break; + if (j != n) + continue; + /* Find the first entry available. */ + for (j = 0; j != n; ++j) { + if (rte_is_zero_ether_addr(&mac_addrs[j])) { + mac_addrs[j] = macs[i]; + break; + } + } + } +} + +/** + * Flush all added MAC addresses. + * + * @param[in] nlsk_fd + * Netlink socket file descriptor. + * @param[in] iface_idx + * Net device interface index. + * @param[in] mac_addrs + * Mac addresses array to flush. + * @param n + * @p mac_addrs array size. + * @param mac_own + * BITFIELD_DECLARE array to store the mac. + */ +void +mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx, + struct rte_ether_addr *mac_addrs, int n, + uint64_t *mac_own) +{ + int i; + + for (i = n - 1; i >= 0; --i) { + struct rte_ether_addr *m = &mac_addrs[i]; + + if (BITFIELD_ISSET(mac_own, i)) + mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m, + i); + } +} + +/** + * Enable promiscuous / all multicast mode through Netlink. + * + * @param[in] nlsk_fd + * Netlink socket file descriptor. + * @param[in] iface_idx + * Net device interface index. + * @param flags + * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti. + * @param enable + * Nonzero to enable, disable otherwise. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags, + int enable) +{ + struct { + struct nlmsghdr hdr; + struct ifinfomsg ifi; + } req = { + .hdr = { + .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlmsg_type = RTM_NEWLINK, + .nlmsg_flags = NLM_F_REQUEST, + }, + .ifi = { + .ifi_flags = enable ? flags : 0, + .ifi_change = flags, + .ifi_index = iface_idx, + }, + }; + uint32_t sn = MLX5_NL_SN_GENERATE; + int ret; + + assert(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI))); + if (nlsk_fd < 0) + return 0; + ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); + if (ret < 0) + return ret; + return 0; +} + +/** + * Enable promiscuous mode through Netlink. + * + * @param[in] nlsk_fd + * Netlink socket file descriptor. + * @param[in] iface_idx + * Net device interface index. + * @param enable + * Nonzero to enable, disable otherwise. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable) +{ + int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable); + + if (ret) + DRV_LOG(DEBUG, + "Interface %u cannot %s promisc mode: Netlink error %s", + iface_idx, enable ? "enable" : "disable", + strerror(rte_errno)); + return ret; +} + +/** + * Enable all multicast mode through Netlink. + * + * @param[in] nlsk_fd + * Netlink socket file descriptor. + * @param[in] iface_idx + * Net device interface index. + * @param enable + * Nonzero to enable, disable otherwise. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable) +{ + int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI, + enable); + + if (ret) + DRV_LOG(DEBUG, + "Interface %u cannot %s allmulti : Netlink error %s", + iface_idx, enable ? "enable" : "disable", + strerror(rte_errno)); + return ret; +} + +/** + * Process network interface information from Netlink message. + * + * @param nh + * Pointer to Netlink message header. + * @param arg + * Opaque data pointer for this callback. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg) +{ + struct mlx5_nl_ifindex_data *data = arg; + struct mlx5_nl_ifindex_data local = { + .flags = 0, + }; + size_t off = NLMSG_HDRLEN; + + if (nh->nlmsg_type != + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) && + nh->nlmsg_type != + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET)) + goto error; + while (off < nh->nlmsg_len) { + struct nlattr *na = (void *)((uintptr_t)nh + off); + void *payload = (void *)((uintptr_t)na + NLA_HDRLEN); + + if (na->nla_len > nh->nlmsg_len - off) + goto error; + switch (na->nla_type) { + case RDMA_NLDEV_ATTR_DEV_INDEX: + local.ibindex = *(uint32_t *)payload; + local.flags |= MLX5_NL_CMD_GET_IB_INDEX; + break; + case RDMA_NLDEV_ATTR_DEV_NAME: + if (!strcmp(payload, data->name)) + local.flags |= MLX5_NL_CMD_GET_IB_NAME; + break; + case RDMA_NLDEV_ATTR_NDEV_INDEX: + local.ifindex = *(uint32_t *)payload; + local.flags |= MLX5_NL_CMD_GET_NET_INDEX; + break; + case RDMA_NLDEV_ATTR_PORT_INDEX: + local.portnum = *(uint32_t *)payload; + local.flags |= MLX5_NL_CMD_GET_PORT_INDEX; + break; + default: + break; + } + off += NLA_ALIGN(na->nla_len); + } + /* + * It is possible to have multiple messages for all + * Infiniband devices in the system with appropriate name. + * So we should gather parameters locally and copy to + * query context only in case of coinciding device name. + */ + if (local.flags & MLX5_NL_CMD_GET_IB_NAME) { + data->flags = local.flags; + data->ibindex = local.ibindex; + data->ifindex = local.ifindex; + data->portnum = local.portnum; + } + return 0; +error: + rte_errno = EINVAL; + return -rte_errno; +} + +/** + * Get index of network interface associated with some IB device. + * + * This is the only somewhat safe method to avoid resorting to heuristics + * when faced with port representors. Unfortunately it requires at least + * Linux 4.17. + * + * @param nl + * Netlink socket of the RDMA kind (NETLINK_RDMA). + * @param[in] name + * IB device name. + * @param[in] pindex + * IB device port index, starting from 1 + * @return + * A valid (nonzero) interface index on success, 0 otherwise and rte_errno + * is set. + */ +unsigned int +mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex) +{ + struct mlx5_nl_ifindex_data data = { + .name = name, + .flags = 0, + .ibindex = 0, /* Determined during first pass. */ + .ifindex = 0, /* Determined during second pass. */ + }; + union { + struct nlmsghdr nh; + uint8_t buf[NLMSG_HDRLEN + + NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) + + NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))]; + } req = { + .nh = { + .nlmsg_len = NLMSG_LENGTH(0), + .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NLDEV_CMD_GET), + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, + }, + }; + struct nlattr *na; + uint32_t sn = MLX5_NL_SN_GENERATE; + int ret; + + ret = mlx5_nl_send(nl, &req.nh, sn); + if (ret < 0) + return 0; + ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); + if (ret < 0) + return 0; + if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || + !(data.flags & MLX5_NL_CMD_GET_IB_INDEX)) + goto error; + data.flags = 0; + sn = MLX5_NL_SN_GENERATE; + req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NLDEV_CMD_PORT_GET); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN); + na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN); + na->nla_len = NLA_HDRLEN + sizeof(data.ibindex); + na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX; + memcpy((void *)((uintptr_t)na + NLA_HDRLEN), + &data.ibindex, sizeof(data.ibindex)); + na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len)); + na->nla_len = NLA_HDRLEN + sizeof(pindex); + na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX; + memcpy((void *)((uintptr_t)na + NLA_HDRLEN), + &pindex, sizeof(pindex)); + ret = mlx5_nl_send(nl, &req.nh, sn); + if (ret < 0) + return 0; + ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); + if (ret < 0) + return 0; + if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || + !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || + !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) || + !data.ifindex) + goto error; + return data.ifindex; +error: + rte_errno = ENODEV; + return 0; +} + +/** + * Get the number of physical ports of given IB device. + * + * @param nl + * Netlink socket of the RDMA kind (NETLINK_RDMA). + * @param[in] name + * IB device name. + * + * @return + * A valid (nonzero) number of ports on success, 0 otherwise + * and rte_errno is set. + */ +unsigned int +mlx5_nl_portnum(int nl, const char *name) +{ + struct mlx5_nl_ifindex_data data = { + .flags = 0, + .name = name, + .ifindex = 0, + .portnum = 0, + }; + struct nlmsghdr req = { + .nlmsg_len = NLMSG_LENGTH(0), + .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NLDEV_CMD_GET), + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, + }; + uint32_t sn = MLX5_NL_SN_GENERATE; + int ret; + + ret = mlx5_nl_send(nl, &req, sn); + if (ret < 0) + return 0; + ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); + if (ret < 0) + return 0; + if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || + !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || + !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) { + rte_errno = ENODEV; + return 0; + } + if (!data.portnum) + rte_errno = EINVAL; + return data.portnum; +} + +/** + * Analyze gathered port parameters via Netlink to recognize master + * and representor devices for E-Switch configuration. + * + * @param[in] num_vf_set + * flag of presence of number of VFs port attribute. + * @param[inout] switch_info + * Port information, including port name as a number and port name + * type if recognized + * + * @return + * master and representor flags are set in switch_info according to + * recognized parameters (if any). + */ +static void +mlx5_nl_check_switch_info(bool num_vf_set, + struct mlx5_switch_info *switch_info) +{ + switch (switch_info->name_type) { + case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: + /* + * Name is not recognized, assume the master, + * check the number of VFs key presence. + */ + switch_info->master = num_vf_set; + break; + case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: + /* + * Name is not set, this assumes the legacy naming + * schema for master, just check if there is a + * number of VFs key. + */ + switch_info->master = num_vf_set; + break; + case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: + /* New uplink naming schema recognized. */ + switch_info->master = 1; + break; + case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: + /* Legacy representors naming schema. */ + switch_info->representor = !num_vf_set; + break; + case MLX5_PHYS_PORT_NAME_TYPE_PFVF: + /* New representors naming schema. */ + switch_info->representor = 1; + break; + } +} + +/** + * Process switch information from Netlink message. + * + * @param nh + * Pointer to Netlink message header. + * @param arg + * Opaque data pointer for this callback. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) +{ + struct mlx5_switch_info info = { + .master = 0, + .representor = 0, + .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, + .port_name = 0, + .switch_id = 0, + }; + size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + bool switch_id_set = false; + bool num_vf_set = false; + + if (nh->nlmsg_type != RTM_NEWLINK) + goto error; + while (off < nh->nlmsg_len) { + struct rtattr *ra = (void *)((uintptr_t)nh + off); + void *payload = RTA_DATA(ra); + unsigned int i; + + if (ra->rta_len > nh->nlmsg_len - off) + goto error; + switch (ra->rta_type) { + case IFLA_NUM_VF: + num_vf_set = true; + break; + case IFLA_PHYS_PORT_NAME: + mlx5_translate_port_name((char *)payload, &info); + break; + case IFLA_PHYS_SWITCH_ID: + info.switch_id = 0; + for (i = 0; i < RTA_PAYLOAD(ra); ++i) { + info.switch_id <<= 8; + info.switch_id |= ((uint8_t *)payload)[i]; + } + switch_id_set = true; + break; + } + off += RTA_ALIGN(ra->rta_len); + } + if (switch_id_set) { + /* We have some E-Switch configuration. */ + mlx5_nl_check_switch_info(num_vf_set, &info); + } + assert(!(info.master && info.representor)); + memcpy(arg, &info, sizeof(info)); + return 0; +error: + rte_errno = EINVAL; + return -rte_errno; +} + +/** + * Get switch information associated with network interface. + * + * @param nl + * Netlink socket of the ROUTE kind (NETLINK_ROUTE). + * @param ifindex + * Network interface index. + * @param[out] info + * Switch information object, populated in case of success. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_nl_switch_info(int nl, unsigned int ifindex, + struct mlx5_switch_info *info) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + struct rtattr rta; + uint32_t extmask; + } req = { + .nh = { + .nlmsg_len = NLMSG_LENGTH + (sizeof(req.info) + + RTA_LENGTH(sizeof(uint32_t))), + .nlmsg_type = RTM_GETLINK, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + }, + .info = { + .ifi_family = AF_UNSPEC, + .ifi_index = ifindex, + }, + .rta = { + .rta_type = IFLA_EXT_MASK, + .rta_len = RTA_LENGTH(sizeof(int32_t)), + }, + .extmask = RTE_LE32(1), + }; + uint32_t sn = MLX5_NL_SN_GENERATE; + int ret; + + ret = mlx5_nl_send(nl, &req.nh, sn); + if (ret >= 0) + ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info); + if (info->master && info->representor) { + DRV_LOG(ERR, "ifindex %u device is recognized as master" + " and as representor", ifindex); + rte_errno = ENODEV; + ret = -rte_errno; + } + return ret; +} + +/* + * Delete VLAN network device by ifindex. + * + * @param[in] tcf + * Context object initialized by mlx5_nl_vlan_vmwa_init(). + * @param[in] ifindex + * Interface index of network device to delete. + */ +void +mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa, + uint32_t ifindex) +{ + uint32_t sn = MLX5_NL_SN_GENERATE; + int ret; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + } req = { + .nh = { + .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlmsg_type = RTM_DELLINK, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + }, + .info = { + .ifi_family = AF_UNSPEC, + .ifi_index = ifindex, + }, + }; + + if (ifindex) { + ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn); + if (ret >= 0) + ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); + if (ret < 0) + DRV_LOG(WARNING, "netlink: error deleting VLAN WA" + " ifindex %u, %d", ifindex, ret); + } +} + +/* Set of subroutines to build Netlink message. */ +static struct nlattr * +nl_msg_tail(struct nlmsghdr *nlh) +{ + return (struct nlattr *) + (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); +} + +static void +nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen) +{ + struct nlattr *nla = nl_msg_tail(nlh); + + nla->nla_type = type; + nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr) + alen); + nlh->nlmsg_len = NLMSG_ALIGN(nlh->nlmsg_len) + nla->nla_len; + + if (alen) + memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen); +} + +static struct nlattr * +nl_attr_nest_start(struct nlmsghdr *nlh, int type) +{ + struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh); + + nl_attr_put(nlh, type, NULL, 0); + return nest; +} + +static void +nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest) +{ + nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest; +} + +/* + * Create network VLAN device with specified VLAN tag. + * + * @param[in] tcf + * Context object initialized by mlx5_nl_vlan_vmwa_init(). + * @param[in] ifindex + * Base network interface index. + * @param[in] tag + * VLAN tag for VLAN network device to create. + */ +uint32_t +mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa, + uint32_t ifindex, uint16_t tag) +{ + struct nlmsghdr *nlh; + struct ifinfomsg *ifm; + char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32]; + + alignas(RTE_CACHE_LINE_SIZE) + uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + NLMSG_ALIGN(sizeof(struct nlattr)) * 8 + + NLMSG_ALIGN(sizeof(uint32_t)) + + NLMSG_ALIGN(sizeof(name)) + + NLMSG_ALIGN(sizeof("vlan")) + + NLMSG_ALIGN(sizeof(uint32_t)) + + NLMSG_ALIGN(sizeof(uint16_t)) + 16]; + struct nlattr *na_info; + struct nlattr *na_vlan; + uint32_t sn = MLX5_NL_SN_GENERATE; + int ret; + + memset(buf, 0, sizeof(buf)); + nlh = (struct nlmsghdr *)buf; + nlh->nlmsg_len = sizeof(struct nlmsghdr); + nlh->nlmsg_type = RTM_NEWLINK; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_EXCL | NLM_F_ACK; + ifm = (struct ifinfomsg *)nl_msg_tail(nlh); + nlh->nlmsg_len += sizeof(struct ifinfomsg); + ifm->ifi_family = AF_UNSPEC; + ifm->ifi_type = 0; + ifm->ifi_index = 0; + ifm->ifi_flags = IFF_UP; + ifm->ifi_change = 0xffffffff; + nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex)); + ret = snprintf(name, sizeof(name), "%s.%u.%u", + MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag); + nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1); + na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO); + nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan")); + na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA); + nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag)); + nl_attr_nest_end(nlh, na_vlan); + nl_attr_nest_end(nlh, na_info); + assert(sizeof(buf) >= nlh->nlmsg_len); + ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn); + if (ret >= 0) + ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); + if (ret < 0) { + DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name, + ret); + } + // Try to get ifindex of created or pre-existing device. + ret = if_nametoindex(name); + if (!ret) { + DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name, + errno); + return 0; + } + return ret; +} diff --git a/drivers/common/mlx5/mlx5_nl.h b/drivers/common/mlx5/mlx5_nl.h new file mode 100644 index 0000000000..8e66a98c66 --- /dev/null +++ b/drivers/common/mlx5/mlx5_nl.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2019 Mellanox Technologies, Ltd + */ + +#ifndef RTE_PMD_MLX5_NL_H_ +#define RTE_PMD_MLX5_NL_H_ + +#include + +#include + +#include "mlx5_common.h" + + +/* VLAN netdev for VLAN workaround. */ +struct mlx5_nl_vlan_dev { + uint32_t refcnt; + uint32_t ifindex; /**< Own interface index. */ +}; + +/* + * Array of VLAN devices created on the base of VF + * used for workaround in virtual environments. + */ +struct mlx5_nl_vlan_vmwa_context { + int nl_socket; + uint32_t vf_ifindex; + struct mlx5_nl_vlan_dev vlan_dev[4096]; +}; + + +int mlx5_nl_init(int protocol); +int mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own, + struct rte_ether_addr *mac, uint32_t index); +int mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, + uint64_t *mac_own, struct rte_ether_addr *mac, + uint32_t index); +void mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx, + struct rte_ether_addr *mac_addrs, int n); +void mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx, + struct rte_ether_addr *mac_addrs, int n, + uint64_t *mac_own); +int mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable); +int mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable); +unsigned int mlx5_nl_portnum(int nl, const char *name); +unsigned int mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex); +int mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, + struct rte_ether_addr *mac, int vf_index); +int mlx5_nl_switch_info(int nl, unsigned int ifindex, + struct mlx5_switch_info *info); + +void mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa, + uint32_t ifindex); +uint32_t mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa, + uint32_t ifindex, uint16_t tag); + +#endif /* RTE_PMD_MLX5_NL_H_ */ diff --git a/drivers/common/mlx5/rte_common_mlx5_version.map b/drivers/common/mlx5/rte_common_mlx5_version.map index 3e7038bdc3..f93f5cb315 100644 --- a/drivers/common/mlx5/rte_common_mlx5_version.map +++ b/drivers/common/mlx5/rte_common_mlx5_version.map @@ -28,4 +28,20 @@ DPDK_20.02 { mlx5_devx_get_out_command_status; mlx5_dev_to_pci_addr; + + mlx5_nl_allmulti; + mlx5_nl_ifindex; + mlx5_nl_init; + mlx5_nl_mac_addr_add; + mlx5_nl_mac_addr_flush; + mlx5_nl_mac_addr_remove; + mlx5_nl_mac_addr_sync; + mlx5_nl_portnum; + mlx5_nl_promisc; + mlx5_nl_switch_info; + mlx5_nl_vf_mac_addr_modify; + mlx5_nl_vlan_vmwa_create; + mlx5_nl_vlan_vmwa_delete; + + mlx5_translate_port_name; }; diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index dc6b3c8c38..d26afbb5b1 100644 --- a/drivers/net/mlx5/Makefile +++ b/drivers/net/mlx5/Makefile @@ -30,7 +30,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_meter.c SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_dv.c SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_verbs.c SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mp.c -SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_utils.c SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build index e10ef3ab2d..d45be00a68 100644 --- a/drivers/net/mlx5/meson.build +++ b/drivers/net/mlx5/meson.build @@ -19,7 +19,6 @@ sources = files( 'mlx5_flow_verbs.c', 'mlx5_mac.c', 'mlx5_mr.c', - 'mlx5_nl.c', 'mlx5_rss.c', 'mlx5_rxmode.c', 'mlx5_rxq.c', diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 9864aa7342..a7e70895e1 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -35,11 +35,11 @@ #include #include #include +#include #include "mlx5_defs.h" #include "mlx5_utils.h" #include "mlx5_mr.h" -#include "mlx5_nl.h" #include "mlx5_autoconf.h" /* Request types for IPC. */ diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h index dc9b965c37..9b392ed036 100644 --- a/drivers/net/mlx5/mlx5_defs.h +++ b/drivers/net/mlx5/mlx5_defs.h @@ -14,14 +14,6 @@ /* Reported driver name. */ #define MLX5_DRIVER_NAME "net_mlx5" -/* Maximum number of simultaneous unicast MAC addresses. */ -#define MLX5_MAX_UC_MAC_ADDRESSES 128 -/* Maximum number of simultaneous Multicast MAC addresses. */ -#define MLX5_MAX_MC_MAC_ADDRESSES 128 -/* Maximum number of simultaneous MAC addresses. */ -#define MLX5_MAX_MAC_ADDRESSES \ - (MLX5_MAX_UC_MAC_ADDRESSES + MLX5_MAX_MC_MAC_ADDRESSES) - /* Maximum number of simultaneous VLAN filters. */ #define MLX5_MAX_VLAN_IDS 128 diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index 5484104cb6..b765636620 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -1939,61 +1939,6 @@ mlx5_sysfs_check_switch_info(bool device_dir, } } -/** - * Extract port name, as a number, from sysfs or netlink information. - * - * @param[in] port_name_in - * String representing the port name. - * @param[out] port_info_out - * Port information, including port name as a number and port name - * type if recognized - * - * @return - * port_name field set according to recognized name format. - */ -void -mlx5_translate_port_name(const char *port_name_in, - struct mlx5_switch_info *port_info_out) -{ - char pf_c1, pf_c2, vf_c1, vf_c2; - char *end; - int sc_items; - - /* - * Check for port-name as a string of the form pf0vf0 - * (support kernel ver >= 5.0 or OFED ver >= 4.6). - */ - sc_items = sscanf(port_name_in, "%c%c%d%c%c%d", - &pf_c1, &pf_c2, &port_info_out->pf_num, - &vf_c1, &vf_c2, &port_info_out->port_name); - if (sc_items == 6 && - pf_c1 == 'p' && pf_c2 == 'f' && - vf_c1 == 'v' && vf_c2 == 'f') { - port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF; - return; - } - /* - * Check for port-name as a string of the form p0 - * (support kernel ver >= 5.0, or OFED ver >= 4.6). - */ - sc_items = sscanf(port_name_in, "%c%d", - &pf_c1, &port_info_out->port_name); - if (sc_items == 2 && pf_c1 == 'p') { - port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; - return; - } - /* Check for port-name as a number (support kernel ver < 5.0 */ - errno = 0; - port_info_out->port_name = strtol(port_name_in, &end, 0); - if (!errno && - (size_t)(end - port_name_in) == strlen(port_name_in)) { - port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; - return; - } - port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; - return; -} - /** * DPDK callback to retrieve plug-in module EEPROM information (type and size). * diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c deleted file mode 100644 index 6b8ca00418..0000000000 --- a/drivers/net/mlx5/mlx5_nl.c +++ /dev/null @@ -1,1338 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2018 6WIND S.A. - * Copyright 2018 Mellanox Technologies, Ltd - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mlx5.h" -#include "mlx5_nl.h" -#include "mlx5_utils.h" - -/* Size of the buffer to receive kernel messages */ -#define MLX5_NL_BUF_SIZE (32 * 1024) -/* Send buffer size for the Netlink socket */ -#define MLX5_SEND_BUF_SIZE 32768 -/* Receive buffer size for the Netlink socket */ -#define MLX5_RECV_BUF_SIZE 32768 - -/** Parameters of VLAN devices created by driver. */ -#define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx" -/* - * Define NDA_RTA as defined in iproute2 sources. - * - * see in iproute2 sources file include/libnetlink.h - */ -#ifndef MLX5_NDA_RTA -#define MLX5_NDA_RTA(r) \ - ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) -#endif -/* - * Define NLMSG_TAIL as defined in iproute2 sources. - * - * see in iproute2 sources file include/libnetlink.h - */ -#ifndef NLMSG_TAIL -#define NLMSG_TAIL(nmsg) \ - ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) -#endif -/* - * The following definitions are normally found in rdma/rdma_netlink.h, - * however they are so recent that most systems do not expose them yet. - */ -#ifndef HAVE_RDMA_NL_NLDEV -#define RDMA_NL_NLDEV 5 -#endif -#ifndef HAVE_RDMA_NLDEV_CMD_GET -#define RDMA_NLDEV_CMD_GET 1 -#endif -#ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET -#define RDMA_NLDEV_CMD_PORT_GET 5 -#endif -#ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX -#define RDMA_NLDEV_ATTR_DEV_INDEX 1 -#endif -#ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME -#define RDMA_NLDEV_ATTR_DEV_NAME 2 -#endif -#ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX -#define RDMA_NLDEV_ATTR_PORT_INDEX 3 -#endif -#ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX -#define RDMA_NLDEV_ATTR_NDEV_INDEX 50 -#endif - -/* These are normally found in linux/if_link.h. */ -#ifndef HAVE_IFLA_NUM_VF -#define IFLA_NUM_VF 21 -#endif -#ifndef HAVE_IFLA_EXT_MASK -#define IFLA_EXT_MASK 29 -#endif -#ifndef HAVE_IFLA_PHYS_SWITCH_ID -#define IFLA_PHYS_SWITCH_ID 36 -#endif -#ifndef HAVE_IFLA_PHYS_PORT_NAME -#define IFLA_PHYS_PORT_NAME 38 -#endif - -/* Add/remove MAC address through Netlink */ -struct mlx5_nl_mac_addr { - struct rte_ether_addr (*mac)[]; - /**< MAC address handled by the device. */ - int mac_n; /**< Number of addresses in the array. */ -}; - -#define MLX5_NL_CMD_GET_IB_NAME (1 << 0) -#define MLX5_NL_CMD_GET_IB_INDEX (1 << 1) -#define MLX5_NL_CMD_GET_NET_INDEX (1 << 2) -#define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3) - -/** Data structure used by mlx5_nl_cmdget_cb(). */ -struct mlx5_nl_ifindex_data { - const char *name; /**< IB device name (in). */ - uint32_t flags; /**< found attribute flags (out). */ - uint32_t ibindex; /**< IB device index (out). */ - uint32_t ifindex; /**< Network interface index (out). */ - uint32_t portnum; /**< IB device max port number (out). */ -}; - -rte_atomic32_t atomic_sn = RTE_ATOMIC32_INIT(0); - -/* Generate Netlink sequence number. */ -#define MLX5_NL_SN_GENERATE ((uint32_t)rte_atomic32_add_return(&atomic_sn, 1)) - -/** - * Opens a Netlink socket. - * - * @param protocol - * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA). - * - * @return - * A file descriptor on success, a negative errno value otherwise and - * rte_errno is set. - */ -int -mlx5_nl_init(int protocol) -{ - int fd; - int sndbuf_size = MLX5_SEND_BUF_SIZE; - int rcvbuf_size = MLX5_RECV_BUF_SIZE; - struct sockaddr_nl local = { - .nl_family = AF_NETLINK, - }; - int ret; - - fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol); - if (fd == -1) { - rte_errno = errno; - return -rte_errno; - } - ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int)); - if (ret == -1) { - rte_errno = errno; - goto error; - } - ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int)); - if (ret == -1) { - rte_errno = errno; - goto error; - } - ret = bind(fd, (struct sockaddr *)&local, sizeof(local)); - if (ret == -1) { - rte_errno = errno; - goto error; - } - return fd; -error: - close(fd); - return -rte_errno; -} - -/** - * Send a request message to the kernel on the Netlink socket. - * - * @param[in] nlsk_fd - * Netlink socket file descriptor. - * @param[in] nh - * The Netlink message send to the kernel. - * @param[in] ssn - * Sequence number. - * @param[in] req - * Pointer to the request structure. - * @param[in] len - * Length of the request in bytes. - * - * @return - * The number of sent bytes on success, a negative errno value otherwise and - * rte_errno is set. - */ -static int -mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req, - int len) -{ - struct sockaddr_nl sa = { - .nl_family = AF_NETLINK, - }; - struct iovec iov[2] = { - { .iov_base = nh, .iov_len = sizeof(*nh), }, - { .iov_base = req, .iov_len = len, }, - }; - struct msghdr msg = { - .msg_name = &sa, - .msg_namelen = sizeof(sa), - .msg_iov = iov, - .msg_iovlen = 2, - }; - int send_bytes; - - nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ - nh->nlmsg_seq = sn; - send_bytes = sendmsg(nlsk_fd, &msg, 0); - if (send_bytes < 0) { - rte_errno = errno; - return -rte_errno; - } - return send_bytes; -} - -/** - * Send a message to the kernel on the Netlink socket. - * - * @param[in] nlsk_fd - * The Netlink socket file descriptor used for communication. - * @param[in] nh - * The Netlink message send to the kernel. - * @param[in] sn - * Sequence number. - * - * @return - * The number of sent bytes on success, a negative errno value otherwise and - * rte_errno is set. - */ -static int -mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn) -{ - struct sockaddr_nl sa = { - .nl_family = AF_NETLINK, - }; - struct iovec iov = { - .iov_base = nh, - .iov_len = nh->nlmsg_len, - }; - struct msghdr msg = { - .msg_name = &sa, - .msg_namelen = sizeof(sa), - .msg_iov = &iov, - .msg_iovlen = 1, - }; - int send_bytes; - - nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ - nh->nlmsg_seq = sn; - send_bytes = sendmsg(nlsk_fd, &msg, 0); - if (send_bytes < 0) { - rte_errno = errno; - return -rte_errno; - } - return send_bytes; -} - -/** - * Receive a message from the kernel on the Netlink socket, following - * mlx5_nl_send(). - * - * @param[in] nlsk_fd - * The Netlink socket file descriptor used for communication. - * @param[in] sn - * Sequence number. - * @param[in] cb - * The callback function to call for each Netlink message received. - * @param[in, out] arg - * Custom arguments for the callback. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -static int -mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), - void *arg) -{ - struct sockaddr_nl sa; - char buf[MLX5_RECV_BUF_SIZE]; - struct iovec iov = { - .iov_base = buf, - .iov_len = sizeof(buf), - }; - struct msghdr msg = { - .msg_name = &sa, - .msg_namelen = sizeof(sa), - .msg_iov = &iov, - /* One message at a time */ - .msg_iovlen = 1, - }; - int multipart = 0; - int ret = 0; - - do { - struct nlmsghdr *nh; - int recv_bytes = 0; - - do { - recv_bytes = recvmsg(nlsk_fd, &msg, 0); - if (recv_bytes == -1) { - rte_errno = errno; - return -rte_errno; - } - nh = (struct nlmsghdr *)buf; - } while (nh->nlmsg_seq != sn); - for (; - NLMSG_OK(nh, (unsigned int)recv_bytes); - nh = NLMSG_NEXT(nh, recv_bytes)) { - if (nh->nlmsg_type == NLMSG_ERROR) { - struct nlmsgerr *err_data = NLMSG_DATA(nh); - - if (err_data->error < 0) { - rte_errno = -err_data->error; - return -rte_errno; - } - /* Ack message. */ - return 0; - } - /* Multi-part msgs and their trailing DONE message. */ - if (nh->nlmsg_flags & NLM_F_MULTI) { - if (nh->nlmsg_type == NLMSG_DONE) - return 0; - multipart = 1; - } - if (cb) { - ret = cb(nh, arg); - if (ret < 0) - return ret; - } - } - } while (multipart); - return ret; -} - -/** - * Parse Netlink message to retrieve the bridge MAC address. - * - * @param nh - * Pointer to Netlink Message Header. - * @param arg - * PMD data register with this callback. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -static int -mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg) -{ - struct mlx5_nl_mac_addr *data = arg; - struct ndmsg *r = NLMSG_DATA(nh); - struct rtattr *attribute; - int len; - - len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)); - for (attribute = MLX5_NDA_RTA(r); - RTA_OK(attribute, len); - attribute = RTA_NEXT(attribute, len)) { - if (attribute->rta_type == NDA_LLADDR) { - if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) { - DRV_LOG(WARNING, - "not enough room to finalize the" - " request"); - rte_errno = ENOMEM; - return -rte_errno; - } -#ifndef NDEBUG - char m[18]; - - rte_ether_format_addr(m, 18, RTA_DATA(attribute)); - DRV_LOG(DEBUG, "bridge MAC address %s", m); -#endif - memcpy(&(*data->mac)[data->mac_n++], - RTA_DATA(attribute), RTE_ETHER_ADDR_LEN); - } - } - return 0; -} - -/** - * Get bridge MAC addresses. - * - * @param[in] nlsk_fd - * Netlink socket file descriptor. - * @param[in] iface_idx - * Net device interface index. - * @param mac[out] - * Pointer to the array table of MAC addresses to fill. - * Its size should be of MLX5_MAX_MAC_ADDRESSES. - * @param mac_n[out] - * Number of entries filled in MAC array. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -static int -mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx, - struct rte_ether_addr (*mac)[], int *mac_n) -{ - struct { - struct nlmsghdr hdr; - struct ifinfomsg ifm; - } req = { - .hdr = { - .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), - .nlmsg_type = RTM_GETNEIGH, - .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, - }, - .ifm = { - .ifi_family = PF_BRIDGE, - .ifi_index = iface_idx, - }, - }; - struct mlx5_nl_mac_addr data = { - .mac = mac, - .mac_n = 0, - }; - uint32_t sn = MLX5_NL_SN_GENERATE; - int ret; - - if (nlsk_fd == -1) - return 0; - ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm, - sizeof(struct ifinfomsg)); - if (ret < 0) - goto error; - ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data); - if (ret < 0) - goto error; - *mac_n = data.mac_n; - return 0; -error: - DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s", - iface_idx, strerror(rte_errno)); - return -rte_errno; -} - -/** - * Modify the MAC address neighbour table with Netlink. - * - * @param[in] nlsk_fd - * Netlink socket file descriptor. - * @param[in] iface_idx - * Net device interface index. - * @param mac - * MAC address to consider. - * @param add - * 1 to add the MAC address, 0 to remove the MAC address. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -static int -mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, - struct rte_ether_addr *mac, int add) -{ - struct { - struct nlmsghdr hdr; - struct ndmsg ndm; - struct rtattr rta; - uint8_t buffer[RTE_ETHER_ADDR_LEN]; - } req = { - .hdr = { - .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), - .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | - NLM_F_EXCL | NLM_F_ACK, - .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH, - }, - .ndm = { - .ndm_family = PF_BRIDGE, - .ndm_state = NUD_NOARP | NUD_PERMANENT, - .ndm_ifindex = iface_idx, - .ndm_flags = NTF_SELF, - }, - .rta = { - .rta_type = NDA_LLADDR, - .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN), - }, - }; - uint32_t sn = MLX5_NL_SN_GENERATE; - int ret; - - if (nlsk_fd == -1) - return 0; - memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN); - req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + - RTA_ALIGN(req.rta.rta_len); - ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); - if (ret < 0) - goto error; - ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); - if (ret < 0) - goto error; - return 0; -error: - DRV_LOG(DEBUG, - "Interface %u cannot %s MAC address" - " %02X:%02X:%02X:%02X:%02X:%02X %s", - iface_idx, - add ? "add" : "remove", - mac->addr_bytes[0], mac->addr_bytes[1], - mac->addr_bytes[2], mac->addr_bytes[3], - mac->addr_bytes[4], mac->addr_bytes[5], - strerror(rte_errno)); - return -rte_errno; -} - -/** - * Modify the VF MAC address neighbour table with Netlink. - * - * @param[in] nlsk_fd - * Netlink socket file descriptor. - * @param[in] iface_idx - * Net device interface index. - * @param mac - * MAC address to consider. - * @param vf_index - * VF index. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -int -mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, - struct rte_ether_addr *mac, int vf_index) -{ - int ret; - struct { - struct nlmsghdr hdr; - struct ifinfomsg ifm; - struct rtattr vf_list_rta; - struct rtattr vf_info_rta; - struct rtattr vf_mac_rta; - struct ifla_vf_mac ivm; - } req = { - .hdr = { - .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), - .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, - .nlmsg_type = RTM_BASE, - }, - .ifm = { - .ifi_index = iface_idx, - }, - .vf_list_rta = { - .rta_type = IFLA_VFINFO_LIST, - .rta_len = RTA_ALIGN(RTA_LENGTH(0)), - }, - .vf_info_rta = { - .rta_type = IFLA_VF_INFO, - .rta_len = RTA_ALIGN(RTA_LENGTH(0)), - }, - .vf_mac_rta = { - .rta_type = IFLA_VF_MAC, - }, - }; - struct ifla_vf_mac ivm = { - .vf = vf_index, - }; - uint32_t sn = MLX5_NL_SN_GENERATE; - - memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN); - memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm)); - - req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm)); - req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + - RTA_ALIGN(req.vf_list_rta.rta_len) + - RTA_ALIGN(req.vf_info_rta.rta_len) + - RTA_ALIGN(req.vf_mac_rta.rta_len); - req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), - &req.vf_list_rta); - req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr), - &req.vf_info_rta); - - if (nlsk_fd < 0) - return -1; - ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); - if (ret < 0) - goto error; - ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL); - if (ret < 0) - goto error; - return 0; -error: - DRV_LOG(ERR, - "representor %u cannot set VF MAC address " - "%02X:%02X:%02X:%02X:%02X:%02X : %s", - vf_index, - mac->addr_bytes[0], mac->addr_bytes[1], - mac->addr_bytes[2], mac->addr_bytes[3], - mac->addr_bytes[4], mac->addr_bytes[5], - strerror(rte_errno)); - return -rte_errno; -} - -/** - * Add a MAC address. - * - * @param[in] nlsk_fd - * Netlink socket file descriptor. - * @param[in] iface_idx - * Net device interface index. - * @param mac_own - * BITFIELD_DECLARE array to store the mac. - * @param mac - * MAC address to register. - * @param index - * MAC address index. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -int -mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, - uint64_t *mac_own, struct rte_ether_addr *mac, - uint32_t index) -{ - int ret; - - ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1); - if (!ret) - BITFIELD_SET(mac_own, index); - if (ret == -EEXIST) - return 0; - return ret; -} - -/** - * Remove a MAC address. - * - * @param[in] nlsk_fd - * Netlink socket file descriptor. - * @param[in] iface_idx - * Net device interface index. - * @param mac_own - * BITFIELD_DECLARE array to store the mac. - * @param mac - * MAC address to remove. - * @param index - * MAC address index. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -int -mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own, - struct rte_ether_addr *mac, uint32_t index) -{ - BITFIELD_RESET(mac_own, index); - return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0); -} - -/** - * Synchronize Netlink bridge table to the internal table. - * - * @param[in] nlsk_fd - * Netlink socket file descriptor. - * @param[in] iface_idx - * Net device interface index. - * @param mac_addrs - * Mac addresses array to sync. - * @param n - * @p mac_addrs array size. - */ -void -mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx, - struct rte_ether_addr *mac_addrs, int n) -{ - struct rte_ether_addr macs[n]; - int macs_n = 0; - int i; - int ret; - - ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n); - if (ret) - return; - for (i = 0; i != macs_n; ++i) { - int j; - - /* Verify the address is not in the array yet. */ - for (j = 0; j != n; ++j) - if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j])) - break; - if (j != n) - continue; - /* Find the first entry available. */ - for (j = 0; j != n; ++j) { - if (rte_is_zero_ether_addr(&mac_addrs[j])) { - mac_addrs[j] = macs[i]; - break; - } - } - } -} - -/** - * Flush all added MAC addresses. - * - * @param[in] nlsk_fd - * Netlink socket file descriptor. - * @param[in] iface_idx - * Net device interface index. - * @param[in] mac_addrs - * Mac addresses array to flush. - * @param n - * @p mac_addrs array size. - * @param mac_own - * BITFIELD_DECLARE array to store the mac. - */ -void -mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx, - struct rte_ether_addr *mac_addrs, int n, - uint64_t *mac_own) -{ - int i; - - for (i = n - 1; i >= 0; --i) { - struct rte_ether_addr *m = &mac_addrs[i]; - - if (BITFIELD_ISSET(mac_own, i)) - mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m, - i); - } -} - -/** - * Enable promiscuous / all multicast mode through Netlink. - * - * @param[in] nlsk_fd - * Netlink socket file descriptor. - * @param[in] iface_idx - * Net device interface index. - * @param flags - * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti. - * @param enable - * Nonzero to enable, disable otherwise. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -static int -mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags, - int enable) -{ - struct { - struct nlmsghdr hdr; - struct ifinfomsg ifi; - } req = { - .hdr = { - .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), - .nlmsg_type = RTM_NEWLINK, - .nlmsg_flags = NLM_F_REQUEST, - }, - .ifi = { - .ifi_flags = enable ? flags : 0, - .ifi_change = flags, - .ifi_index = iface_idx, - }, - }; - uint32_t sn = MLX5_NL_SN_GENERATE; - int ret; - - assert(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI))); - if (nlsk_fd < 0) - return 0; - ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn); - if (ret < 0) - return ret; - return 0; -} - -/** - * Enable promiscuous mode through Netlink. - * - * @param[in] nlsk_fd - * Netlink socket file descriptor. - * @param[in] iface_idx - * Net device interface index. - * @param enable - * Nonzero to enable, disable otherwise. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -int -mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable) -{ - int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable); - - if (ret) - DRV_LOG(DEBUG, - "Interface %u cannot %s promisc mode: Netlink error %s", - iface_idx, enable ? "enable" : "disable", - strerror(rte_errno)); - return ret; -} - -/** - * Enable all multicast mode through Netlink. - * - * @param[in] nlsk_fd - * Netlink socket file descriptor. - * @param[in] iface_idx - * Net device interface index. - * @param enable - * Nonzero to enable, disable otherwise. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -int -mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable) -{ - int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI, - enable); - - if (ret) - DRV_LOG(DEBUG, - "Interface %u cannot %s allmulti : Netlink error %s", - iface_idx, enable ? "enable" : "disable", - strerror(rte_errno)); - return ret; -} - -/** - * Process network interface information from Netlink message. - * - * @param nh - * Pointer to Netlink message header. - * @param arg - * Opaque data pointer for this callback. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -static int -mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg) -{ - struct mlx5_nl_ifindex_data *data = arg; - struct mlx5_nl_ifindex_data local = { - .flags = 0, - }; - size_t off = NLMSG_HDRLEN; - - if (nh->nlmsg_type != - RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) && - nh->nlmsg_type != - RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET)) - goto error; - while (off < nh->nlmsg_len) { - struct nlattr *na = (void *)((uintptr_t)nh + off); - void *payload = (void *)((uintptr_t)na + NLA_HDRLEN); - - if (na->nla_len > nh->nlmsg_len - off) - goto error; - switch (na->nla_type) { - case RDMA_NLDEV_ATTR_DEV_INDEX: - local.ibindex = *(uint32_t *)payload; - local.flags |= MLX5_NL_CMD_GET_IB_INDEX; - break; - case RDMA_NLDEV_ATTR_DEV_NAME: - if (!strcmp(payload, data->name)) - local.flags |= MLX5_NL_CMD_GET_IB_NAME; - break; - case RDMA_NLDEV_ATTR_NDEV_INDEX: - local.ifindex = *(uint32_t *)payload; - local.flags |= MLX5_NL_CMD_GET_NET_INDEX; - break; - case RDMA_NLDEV_ATTR_PORT_INDEX: - local.portnum = *(uint32_t *)payload; - local.flags |= MLX5_NL_CMD_GET_PORT_INDEX; - break; - default: - break; - } - off += NLA_ALIGN(na->nla_len); - } - /* - * It is possible to have multiple messages for all - * Infiniband devices in the system with appropriate name. - * So we should gather parameters locally and copy to - * query context only in case of coinciding device name. - */ - if (local.flags & MLX5_NL_CMD_GET_IB_NAME) { - data->flags = local.flags; - data->ibindex = local.ibindex; - data->ifindex = local.ifindex; - data->portnum = local.portnum; - } - return 0; -error: - rte_errno = EINVAL; - return -rte_errno; -} - -/** - * Get index of network interface associated with some IB device. - * - * This is the only somewhat safe method to avoid resorting to heuristics - * when faced with port representors. Unfortunately it requires at least - * Linux 4.17. - * - * @param nl - * Netlink socket of the RDMA kind (NETLINK_RDMA). - * @param[in] name - * IB device name. - * @param[in] pindex - * IB device port index, starting from 1 - * @return - * A valid (nonzero) interface index on success, 0 otherwise and rte_errno - * is set. - */ -unsigned int -mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex) -{ - struct mlx5_nl_ifindex_data data = { - .name = name, - .flags = 0, - .ibindex = 0, /* Determined during first pass. */ - .ifindex = 0, /* Determined during second pass. */ - }; - union { - struct nlmsghdr nh; - uint8_t buf[NLMSG_HDRLEN + - NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) + - NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))]; - } req = { - .nh = { - .nlmsg_len = NLMSG_LENGTH(0), - .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, - RDMA_NLDEV_CMD_GET), - .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, - }, - }; - struct nlattr *na; - uint32_t sn = MLX5_NL_SN_GENERATE; - int ret; - - ret = mlx5_nl_send(nl, &req.nh, sn); - if (ret < 0) - return 0; - ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); - if (ret < 0) - return 0; - if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || - !(data.flags & MLX5_NL_CMD_GET_IB_INDEX)) - goto error; - data.flags = 0; - sn = MLX5_NL_SN_GENERATE; - req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, - RDMA_NLDEV_CMD_PORT_GET); - req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; - req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN); - na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN); - na->nla_len = NLA_HDRLEN + sizeof(data.ibindex); - na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX; - memcpy((void *)((uintptr_t)na + NLA_HDRLEN), - &data.ibindex, sizeof(data.ibindex)); - na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len)); - na->nla_len = NLA_HDRLEN + sizeof(pindex); - na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX; - memcpy((void *)((uintptr_t)na + NLA_HDRLEN), - &pindex, sizeof(pindex)); - ret = mlx5_nl_send(nl, &req.nh, sn); - if (ret < 0) - return 0; - ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); - if (ret < 0) - return 0; - if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || - !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || - !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) || - !data.ifindex) - goto error; - return data.ifindex; -error: - rte_errno = ENODEV; - return 0; -} - -/** - * Get the number of physical ports of given IB device. - * - * @param nl - * Netlink socket of the RDMA kind (NETLINK_RDMA). - * @param[in] name - * IB device name. - * - * @return - * A valid (nonzero) number of ports on success, 0 otherwise - * and rte_errno is set. - */ -unsigned int -mlx5_nl_portnum(int nl, const char *name) -{ - struct mlx5_nl_ifindex_data data = { - .flags = 0, - .name = name, - .ifindex = 0, - .portnum = 0, - }; - struct nlmsghdr req = { - .nlmsg_len = NLMSG_LENGTH(0), - .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, - RDMA_NLDEV_CMD_GET), - .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, - }; - uint32_t sn = MLX5_NL_SN_GENERATE; - int ret; - - ret = mlx5_nl_send(nl, &req, sn); - if (ret < 0) - return 0; - ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data); - if (ret < 0) - return 0; - if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) || - !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) || - !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) { - rte_errno = ENODEV; - return 0; - } - if (!data.portnum) - rte_errno = EINVAL; - return data.portnum; -} - -/** - * Analyze gathered port parameters via Netlink to recognize master - * and representor devices for E-Switch configuration. - * - * @param[in] num_vf_set - * flag of presence of number of VFs port attribute. - * @param[inout] switch_info - * Port information, including port name as a number and port name - * type if recognized - * - * @return - * master and representor flags are set in switch_info according to - * recognized parameters (if any). - */ -static void -mlx5_nl_check_switch_info(bool num_vf_set, - struct mlx5_switch_info *switch_info) -{ - switch (switch_info->name_type) { - case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: - /* - * Name is not recognized, assume the master, - * check the number of VFs key presence. - */ - switch_info->master = num_vf_set; - break; - case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: - /* - * Name is not set, this assumes the legacy naming - * schema for master, just check if there is a - * number of VFs key. - */ - switch_info->master = num_vf_set; - break; - case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: - /* New uplink naming schema recognized. */ - switch_info->master = 1; - break; - case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: - /* Legacy representors naming schema. */ - switch_info->representor = !num_vf_set; - break; - case MLX5_PHYS_PORT_NAME_TYPE_PFVF: - /* New representors naming schema. */ - switch_info->representor = 1; - break; - } -} - -/** - * Process switch information from Netlink message. - * - * @param nh - * Pointer to Netlink message header. - * @param arg - * Opaque data pointer for this callback. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -static int -mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) -{ - struct mlx5_switch_info info = { - .master = 0, - .representor = 0, - .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, - .port_name = 0, - .switch_id = 0, - }; - size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); - bool switch_id_set = false; - bool num_vf_set = false; - - if (nh->nlmsg_type != RTM_NEWLINK) - goto error; - while (off < nh->nlmsg_len) { - struct rtattr *ra = (void *)((uintptr_t)nh + off); - void *payload = RTA_DATA(ra); - unsigned int i; - - if (ra->rta_len > nh->nlmsg_len - off) - goto error; - switch (ra->rta_type) { - case IFLA_NUM_VF: - num_vf_set = true; - break; - case IFLA_PHYS_PORT_NAME: - mlx5_translate_port_name((char *)payload, &info); - break; - case IFLA_PHYS_SWITCH_ID: - info.switch_id = 0; - for (i = 0; i < RTA_PAYLOAD(ra); ++i) { - info.switch_id <<= 8; - info.switch_id |= ((uint8_t *)payload)[i]; - } - switch_id_set = true; - break; - } - off += RTA_ALIGN(ra->rta_len); - } - if (switch_id_set) { - /* We have some E-Switch configuration. */ - mlx5_nl_check_switch_info(num_vf_set, &info); - } - assert(!(info.master && info.representor)); - memcpy(arg, &info, sizeof(info)); - return 0; -error: - rte_errno = EINVAL; - return -rte_errno; -} - -/** - * Get switch information associated with network interface. - * - * @param nl - * Netlink socket of the ROUTE kind (NETLINK_ROUTE). - * @param ifindex - * Network interface index. - * @param[out] info - * Switch information object, populated in case of success. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -int -mlx5_nl_switch_info(int nl, unsigned int ifindex, - struct mlx5_switch_info *info) -{ - struct { - struct nlmsghdr nh; - struct ifinfomsg info; - struct rtattr rta; - uint32_t extmask; - } req = { - .nh = { - .nlmsg_len = NLMSG_LENGTH - (sizeof(req.info) + - RTA_LENGTH(sizeof(uint32_t))), - .nlmsg_type = RTM_GETLINK, - .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, - }, - .info = { - .ifi_family = AF_UNSPEC, - .ifi_index = ifindex, - }, - .rta = { - .rta_type = IFLA_EXT_MASK, - .rta_len = RTA_LENGTH(sizeof(int32_t)), - }, - .extmask = RTE_LE32(1), - }; - uint32_t sn = MLX5_NL_SN_GENERATE; - int ret; - - ret = mlx5_nl_send(nl, &req.nh, sn); - if (ret >= 0) - ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info); - if (info->master && info->representor) { - DRV_LOG(ERR, "ifindex %u device is recognized as master" - " and as representor", ifindex); - rte_errno = ENODEV; - ret = -rte_errno; - } - return ret; -} - -/* - * Delete VLAN network device by ifindex. - * - * @param[in] tcf - * Context object initialized by mlx5_nl_vlan_vmwa_init(). - * @param[in] ifindex - * Interface index of network device to delete. - */ -void -mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa, - uint32_t ifindex) -{ - uint32_t sn = MLX5_NL_SN_GENERATE; - int ret; - struct { - struct nlmsghdr nh; - struct ifinfomsg info; - } req = { - .nh = { - .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), - .nlmsg_type = RTM_DELLINK, - .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, - }, - .info = { - .ifi_family = AF_UNSPEC, - .ifi_index = ifindex, - }, - }; - - if (ifindex) { - ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn); - if (ret >= 0) - ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); - if (ret < 0) - DRV_LOG(WARNING, "netlink: error deleting VLAN WA" - " ifindex %u, %d", ifindex, ret); - } -} - -/* Set of subroutines to build Netlink message. */ -static struct nlattr * -nl_msg_tail(struct nlmsghdr *nlh) -{ - return (struct nlattr *) - (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); -} - -static void -nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen) -{ - struct nlattr *nla = nl_msg_tail(nlh); - - nla->nla_type = type; - nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr) + alen); - nlh->nlmsg_len = NLMSG_ALIGN(nlh->nlmsg_len) + nla->nla_len; - - if (alen) - memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen); -} - -static struct nlattr * -nl_attr_nest_start(struct nlmsghdr *nlh, int type) -{ - struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh); - - nl_attr_put(nlh, type, NULL, 0); - return nest; -} - -static void -nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest) -{ - nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest; -} - -/* - * Create network VLAN device with specified VLAN tag. - * - * @param[in] tcf - * Context object initialized by mlx5_nl_vlan_vmwa_init(). - * @param[in] ifindex - * Base network interface index. - * @param[in] tag - * VLAN tag for VLAN network device to create. - */ -uint32_t -mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa, - uint32_t ifindex, uint16_t tag) -{ - struct nlmsghdr *nlh; - struct ifinfomsg *ifm; - char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32]; - - alignas(RTE_CACHE_LINE_SIZE) - uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + - NLMSG_ALIGN(sizeof(struct ifinfomsg)) + - NLMSG_ALIGN(sizeof(struct nlattr)) * 8 + - NLMSG_ALIGN(sizeof(uint32_t)) + - NLMSG_ALIGN(sizeof(name)) + - NLMSG_ALIGN(sizeof("vlan")) + - NLMSG_ALIGN(sizeof(uint32_t)) + - NLMSG_ALIGN(sizeof(uint16_t)) + 16]; - struct nlattr *na_info; - struct nlattr *na_vlan; - uint32_t sn = MLX5_NL_SN_GENERATE; - int ret; - - memset(buf, 0, sizeof(buf)); - nlh = (struct nlmsghdr *)buf; - nlh->nlmsg_len = sizeof(struct nlmsghdr); - nlh->nlmsg_type = RTM_NEWLINK; - nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | - NLM_F_EXCL | NLM_F_ACK; - ifm = (struct ifinfomsg *)nl_msg_tail(nlh); - nlh->nlmsg_len += sizeof(struct ifinfomsg); - ifm->ifi_family = AF_UNSPEC; - ifm->ifi_type = 0; - ifm->ifi_index = 0; - ifm->ifi_flags = IFF_UP; - ifm->ifi_change = 0xffffffff; - nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex)); - ret = snprintf(name, sizeof(name), "%s.%u.%u", - MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag); - nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1); - na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO); - nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan")); - na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA); - nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag)); - nl_attr_nest_end(nlh, na_vlan); - nl_attr_nest_end(nlh, na_info); - assert(sizeof(buf) >= nlh->nlmsg_len); - ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn); - if (ret >= 0) - ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL); - if (ret < 0) { - DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name, - ret); - } - // Try to get ifindex of created or pre-existing device. - ret = if_nametoindex(name); - if (!ret) { - DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name, - errno); - return 0; - } - return ret; -} diff --git a/drivers/net/mlx5/mlx5_nl.h b/drivers/net/mlx5/mlx5_nl.h deleted file mode 100644 index 9be87c016e..0000000000 --- a/drivers/net/mlx5/mlx5_nl.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright 2019 Mellanox Technologies, Ltd - */ - -#ifndef RTE_PMD_MLX5_NL_H_ -#define RTE_PMD_MLX5_NL_H_ - -#include - - -/* Recognized Infiniband device physical port name types. */ -enum mlx5_nl_phys_port_name_type { - MLX5_PHYS_PORT_NAME_TYPE_NOTSET = 0, /* Not set. */ - MLX5_PHYS_PORT_NAME_TYPE_LEGACY, /* before kernel ver < 5.0 */ - MLX5_PHYS_PORT_NAME_TYPE_UPLINK, /* p0, kernel ver >= 5.0 */ - MLX5_PHYS_PORT_NAME_TYPE_PFVF, /* pf0vf0, kernel ver >= 5.0 */ - MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN, /* Unrecognized. */ -}; - -/** Switch information returned by mlx5_nl_switch_info(). */ -struct mlx5_switch_info { - uint32_t master:1; /**< Master device. */ - uint32_t representor:1; /**< Representor device. */ - enum mlx5_nl_phys_port_name_type name_type; /** < Port name type. */ - int32_t pf_num; /**< PF number (valid for pfxvfx format only). */ - int32_t port_name; /**< Representor port name. */ - uint64_t switch_id; /**< Switch identifier. */ -}; - -/* VLAN netdev for VLAN workaround. */ -struct mlx5_nl_vlan_dev { - uint32_t refcnt; - uint32_t ifindex; /**< Own interface index. */ -}; - -/* - * Array of VLAN devices created on the base of VF - * used for workaround in virtual environments. - */ -struct mlx5_nl_vlan_vmwa_context { - int nl_socket; - uint32_t vf_ifindex; - struct mlx5_nl_vlan_dev vlan_dev[4096]; -}; - - -int mlx5_nl_init(int protocol); -int mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own, - struct rte_ether_addr *mac, uint32_t index); -int mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, - uint64_t *mac_own, struct rte_ether_addr *mac, - uint32_t index); -void mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx, - struct rte_ether_addr *mac_addrs, int n); -void mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx, - struct rte_ether_addr *mac_addrs, int n, - uint64_t *mac_own); -int mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable); -int mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable); -unsigned int mlx5_nl_portnum(int nl, const char *name); -unsigned int mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex); -int mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx, - struct rte_ether_addr *mac, int vf_index); -int mlx5_nl_switch_info(int nl, unsigned int ifindex, - struct mlx5_switch_info *info); - -void mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa, - uint32_t ifindex); -uint32_t mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa, - uint32_t ifindex, uint16_t tag); - -#endif /* RTE_PMD_MLX5_NL_H_ */ diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c index fc1a91c303..8e63b674ee 100644 --- a/drivers/net/mlx5/mlx5_vlan.c +++ b/drivers/net/mlx5/mlx5_vlan.c @@ -33,11 +33,11 @@ #include #include +#include #include "mlx5.h" #include "mlx5_autoconf.h" #include "mlx5_rxtx.h" -#include "mlx5_nl.h" #include "mlx5_utils.h" /**