1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
7 #include <linux/if_link.h>
8 #include <linux/rtnetlink.h>
9 #include <linux/genetlink.h>
11 #include <rdma/rdma_netlink.h>
17 #include <sys/socket.h>
20 #include <rte_errno.h>
23 #include "../mlx5_common_log.h"
24 #include "mlx5_malloc.h"
26 #include <linux/devlink.h>
30 /* Size of the buffer to receive kernel messages */
31 #define MLX5_NL_BUF_SIZE (32 * 1024)
32 /* Send buffer size for the Netlink socket */
33 #define MLX5_SEND_BUF_SIZE 32768
34 /* Receive buffer size for the Netlink socket */
35 #define MLX5_RECV_BUF_SIZE 32768
36 /* Maximal physical port name length. */
37 #define MLX5_PHYS_PORT_NAME_MAX 128
39 /** Parameters of VLAN devices created by driver. */
40 #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx"
42 * Define NDA_RTA as defined in iproute2 sources.
44 * see in iproute2 sources file include/libnetlink.h
47 #define MLX5_NDA_RTA(r) \
48 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
51 * Define NLMSG_TAIL as defined in iproute2 sources.
53 * see in iproute2 sources file include/libnetlink.h
56 #define NLMSG_TAIL(nmsg) \
57 ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
60 * The following definitions are normally found in rdma/rdma_netlink.h,
61 * however they are so recent that most systems do not expose them yet.
63 #ifndef HAVE_RDMA_NL_NLDEV
64 #define RDMA_NL_NLDEV 5
66 #ifndef HAVE_RDMA_NLDEV_CMD_GET
67 #define RDMA_NLDEV_CMD_GET 1
69 #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET
70 #define RDMA_NLDEV_CMD_PORT_GET 5
72 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX
73 #define RDMA_NLDEV_ATTR_DEV_INDEX 1
75 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME
76 #define RDMA_NLDEV_ATTR_DEV_NAME 2
78 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX
79 #define RDMA_NLDEV_ATTR_PORT_INDEX 3
81 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_STATE
82 #define RDMA_NLDEV_ATTR_PORT_STATE 12
84 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX
85 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50
88 /* These are normally found in linux/if_link.h. */
89 #ifndef HAVE_IFLA_NUM_VF
90 #define IFLA_NUM_VF 21
92 #ifndef HAVE_IFLA_EXT_MASK
93 #define IFLA_EXT_MASK 29
95 #ifndef HAVE_IFLA_PHYS_SWITCH_ID
96 #define IFLA_PHYS_SWITCH_ID 36
98 #ifndef HAVE_IFLA_PHYS_PORT_NAME
99 #define IFLA_PHYS_PORT_NAME 38
103 * Some Devlink defines may be missed in old kernel versions,
104 * adjust used defines.
106 #ifndef DEVLINK_GENL_NAME
107 #define DEVLINK_GENL_NAME "devlink"
109 #ifndef DEVLINK_GENL_VERSION
110 #define DEVLINK_GENL_VERSION 1
112 #ifndef DEVLINK_ATTR_BUS_NAME
113 #define DEVLINK_ATTR_BUS_NAME 1
115 #ifndef DEVLINK_ATTR_DEV_NAME
116 #define DEVLINK_ATTR_DEV_NAME 2
118 #ifndef DEVLINK_ATTR_PARAM
119 #define DEVLINK_ATTR_PARAM 80
121 #ifndef DEVLINK_ATTR_PARAM_NAME
122 #define DEVLINK_ATTR_PARAM_NAME 81
124 #ifndef DEVLINK_ATTR_PARAM_TYPE
125 #define DEVLINK_ATTR_PARAM_TYPE 83
127 #ifndef DEVLINK_ATTR_PARAM_VALUES_LIST
128 #define DEVLINK_ATTR_PARAM_VALUES_LIST 84
130 #ifndef DEVLINK_ATTR_PARAM_VALUE
131 #define DEVLINK_ATTR_PARAM_VALUE 85
133 #ifndef DEVLINK_ATTR_PARAM_VALUE_DATA
134 #define DEVLINK_ATTR_PARAM_VALUE_DATA 86
136 #ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE
137 #define DEVLINK_ATTR_PARAM_VALUE_CMODE 87
139 #ifndef DEVLINK_PARAM_CMODE_DRIVERINIT
140 #define DEVLINK_PARAM_CMODE_DRIVERINIT 1
142 #ifndef DEVLINK_CMD_RELOAD
143 #define DEVLINK_CMD_RELOAD 37
145 #ifndef DEVLINK_CMD_PARAM_GET
146 #define DEVLINK_CMD_PARAM_GET 38
148 #ifndef DEVLINK_CMD_PARAM_SET
149 #define DEVLINK_CMD_PARAM_SET 39
155 /* Add/remove MAC address through Netlink */
156 struct mlx5_nl_mac_addr {
157 struct rte_ether_addr (*mac)[];
158 /**< MAC address handled by the device. */
159 int mac_n; /**< Number of addresses in the array. */
162 #define MLX5_NL_CMD_GET_IB_NAME (1 << 0)
163 #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1)
164 #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2)
165 #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3)
166 #define MLX5_NL_CMD_GET_PORT_STATE (1 << 4)
168 /** Data structure used by mlx5_nl_cmdget_cb(). */
169 struct mlx5_nl_port_info {
170 const char *name; /**< IB device name (in). */
171 uint32_t flags; /**< found attribute flags (out). */
172 uint32_t ibindex; /**< IB device index (out). */
173 uint32_t ifindex; /**< Network interface index (out). */
174 uint32_t portnum; /**< IB device max port number (out). */
175 uint16_t state; /**< IB device port state (out). */
180 /* Generate Netlink sequence number. */
181 #define MLX5_NL_SN_GENERATE __atomic_add_fetch(&atomic_sn, 1, __ATOMIC_RELAXED)
184 * Opens a Netlink socket.
187 * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA).
190 * A file descriptor on success, a negative errno value otherwise and
194 mlx5_nl_init(int protocol)
199 struct sockaddr_nl local = {
200 .nl_family = AF_NETLINK,
204 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);
209 opt_size = sizeof(buf_size);
210 ret = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf_size, &opt_size);
215 DRV_LOG(DEBUG, "Netlink socket send buffer: %d", buf_size);
216 if (buf_size < MLX5_SEND_BUF_SIZE) {
217 ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF,
218 &buf_size, sizeof(buf_size));
224 opt_size = sizeof(buf_size);
225 ret = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &buf_size, &opt_size);
230 DRV_LOG(DEBUG, "Netlink socket recv buffer: %d", buf_size);
231 if (buf_size < MLX5_RECV_BUF_SIZE) {
232 ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
233 &buf_size, sizeof(buf_size));
239 ret = bind(fd, (struct sockaddr *)&local, sizeof(local));
251 * Send a request message to the kernel on the Netlink socket.
254 * Netlink socket file descriptor.
256 * The Netlink message send to the kernel.
260 * Pointer to the request structure.
262 * Length of the request in bytes.
265 * The number of sent bytes on success, a negative errno value otherwise and
269 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req,
272 struct sockaddr_nl sa = {
273 .nl_family = AF_NETLINK,
275 struct iovec iov[2] = {
276 { .iov_base = nh, .iov_len = sizeof(*nh), },
277 { .iov_base = req, .iov_len = len, },
279 struct msghdr msg = {
281 .msg_namelen = sizeof(sa),
287 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
289 send_bytes = sendmsg(nlsk_fd, &msg, 0);
290 if (send_bytes < 0) {
298 * Send a message to the kernel on the Netlink socket.
301 * The Netlink socket file descriptor used for communication.
303 * The Netlink message send to the kernel.
308 * The number of sent bytes on success, a negative errno value otherwise and
312 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn)
314 struct sockaddr_nl sa = {
315 .nl_family = AF_NETLINK,
319 .iov_len = nh->nlmsg_len,
321 struct msghdr msg = {
323 .msg_namelen = sizeof(sa),
329 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
331 send_bytes = sendmsg(nlsk_fd, &msg, 0);
332 if (send_bytes < 0) {
340 * Receive a message from the kernel on the Netlink socket, following
344 * The Netlink socket file descriptor used for communication.
348 * The callback function to call for each Netlink message received.
349 * @param[in, out] arg
350 * Custom arguments for the callback.
353 * 0 on success, a negative errno value otherwise and rte_errno is set.
356 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg),
359 struct sockaddr_nl sa;
361 struct msghdr msg = {
363 .msg_namelen = sizeof(sa),
365 /* One message at a time */
377 /* Query length of incoming message. */
380 recv_bytes = recvmsg(nlsk_fd, &msg,
381 MSG_PEEK | MSG_TRUNC);
382 if (recv_bytes < 0) {
387 if (recv_bytes == 0) {
392 /* Allocate buffer to fetch the message. */
393 if (recv_bytes < MLX5_RECV_BUF_SIZE)
394 recv_bytes = MLX5_RECV_BUF_SIZE;
396 buf = mlx5_malloc(0, recv_bytes, 0, SOCKET_ID_ANY);
402 /* Fetch the message. */
404 iov.iov_len = recv_bytes;
405 recv_bytes = recvmsg(nlsk_fd, &msg, 0);
406 if (recv_bytes == -1) {
411 nh = (struct nlmsghdr *)buf;
412 } while (nh->nlmsg_seq != sn);
414 NLMSG_OK(nh, (unsigned int)recv_bytes);
415 nh = NLMSG_NEXT(nh, recv_bytes)) {
416 if (nh->nlmsg_type == NLMSG_ERROR) {
417 struct nlmsgerr *err_data = NLMSG_DATA(nh);
419 if (err_data->error < 0) {
420 rte_errno = -err_data->error;
428 /* Multi-part msgs and their trailing DONE message. */
429 if (nh->nlmsg_flags & NLM_F_MULTI) {
430 if (nh->nlmsg_type == NLMSG_DONE) {
449 * Parse Netlink message to retrieve the bridge MAC address.
452 * Pointer to Netlink Message Header.
454 * PMD data register with this callback.
457 * 0 on success, a negative errno value otherwise and rte_errno is set.
460 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg)
462 struct mlx5_nl_mac_addr *data = arg;
463 struct ndmsg *r = NLMSG_DATA(nh);
464 struct rtattr *attribute;
467 len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r));
468 for (attribute = MLX5_NDA_RTA(r);
469 RTA_OK(attribute, len);
470 attribute = RTA_NEXT(attribute, len)) {
471 if (attribute->rta_type == NDA_LLADDR) {
472 if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) {
474 "not enough room to finalize the"
479 #ifdef RTE_LIBRTE_MLX5_DEBUG
480 char m[RTE_ETHER_ADDR_FMT_SIZE];
482 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE,
483 RTA_DATA(attribute));
484 DRV_LOG(DEBUG, "bridge MAC address %s", m);
486 memcpy(&(*data->mac)[data->mac_n++],
487 RTA_DATA(attribute), RTE_ETHER_ADDR_LEN);
494 * Get bridge MAC addresses.
497 * Netlink socket file descriptor.
498 * @param[in] iface_idx
499 * Net device interface index.
501 * Pointer to the array table of MAC addresses to fill.
502 * Its size should be of MLX5_MAX_MAC_ADDRESSES.
504 * Number of entries filled in MAC array.
507 * 0 on success, a negative errno value otherwise and rte_errno is set.
510 mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx,
511 struct rte_ether_addr (*mac)[], int *mac_n)
515 struct ifinfomsg ifm;
518 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
519 .nlmsg_type = RTM_GETNEIGH,
520 .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
523 .ifi_family = PF_BRIDGE,
524 .ifi_index = iface_idx,
527 struct mlx5_nl_mac_addr data = {
531 uint32_t sn = MLX5_NL_SN_GENERATE;
536 ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm,
537 sizeof(struct ifinfomsg));
540 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data);
546 DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s",
547 iface_idx, strerror(rte_errno));
552 * Modify the MAC address neighbour table with Netlink.
555 * Netlink socket file descriptor.
556 * @param[in] iface_idx
557 * Net device interface index.
559 * MAC address to consider.
561 * 1 to add the MAC address, 0 to remove the MAC address.
564 * 0 on success, a negative errno value otherwise and rte_errno is set.
567 mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
568 struct rte_ether_addr *mac, int add)
574 uint8_t buffer[RTE_ETHER_ADDR_LEN];
577 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
578 .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
579 NLM_F_EXCL | NLM_F_ACK,
580 .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH,
583 .ndm_family = PF_BRIDGE,
584 .ndm_state = NUD_NOARP | NUD_PERMANENT,
585 .ndm_ifindex = iface_idx,
586 .ndm_flags = NTF_SELF,
589 .rta_type = NDA_LLADDR,
590 .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN),
593 uint32_t sn = MLX5_NL_SN_GENERATE;
598 memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN);
599 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
600 RTA_ALIGN(req.rta.rta_len);
601 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
604 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
609 #ifdef RTE_LIBRTE_MLX5_DEBUG
611 char m[RTE_ETHER_ADDR_FMT_SIZE];
613 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac);
615 "Interface %u cannot %s MAC address %s %s",
617 add ? "add" : "remove", m, strerror(rte_errno));
624 * Modify the VF MAC address neighbour table with Netlink.
627 * Netlink socket file descriptor.
628 * @param[in] iface_idx
629 * Net device interface index.
631 * MAC address to consider.
636 * 0 on success, a negative errno value otherwise and rte_errno is set.
639 mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
640 struct rte_ether_addr *mac, int vf_index)
645 struct ifinfomsg ifm;
646 struct rtattr vf_list_rta;
647 struct rtattr vf_info_rta;
648 struct rtattr vf_mac_rta;
649 struct ifla_vf_mac ivm;
652 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
653 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
654 .nlmsg_type = RTM_BASE,
657 .ifi_index = iface_idx,
660 .rta_type = IFLA_VFINFO_LIST,
661 .rta_len = RTA_ALIGN(RTA_LENGTH(0)),
664 .rta_type = IFLA_VF_INFO,
665 .rta_len = RTA_ALIGN(RTA_LENGTH(0)),
668 .rta_type = IFLA_VF_MAC,
671 struct ifla_vf_mac ivm = {
674 uint32_t sn = MLX5_NL_SN_GENERATE;
676 memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN);
677 memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm));
679 req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm));
680 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
681 RTA_ALIGN(req.vf_list_rta.rta_len) +
682 RTA_ALIGN(req.vf_info_rta.rta_len) +
683 RTA_ALIGN(req.vf_mac_rta.rta_len);
684 req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
686 req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
691 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
694 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
700 "representor %u cannot set VF MAC address "
701 RTE_ETHER_ADDR_PRT_FMT " : %s",
703 RTE_ETHER_ADDR_BYTES(mac),
704 strerror(rte_errno));
712 * Netlink socket file descriptor.
713 * @param[in] iface_idx
714 * Net device interface index.
716 * BITFIELD_DECLARE array to store the mac.
718 * MAC address to register.
723 * 0 on success, a negative errno value otherwise and rte_errno is set.
726 mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx,
727 uint64_t *mac_own, struct rte_ether_addr *mac,
732 ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1);
734 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
735 if (index >= MLX5_MAX_MAC_ADDRESSES)
738 BITFIELD_SET(mac_own, index);
746 * Remove a MAC address.
749 * Netlink socket file descriptor.
750 * @param[in] iface_idx
751 * Net device interface index.
753 * BITFIELD_DECLARE array to store the mac.
755 * MAC address to remove.
760 * 0 on success, a negative errno value otherwise and rte_errno is set.
763 mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own,
764 struct rte_ether_addr *mac, uint32_t index)
766 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
767 if (index >= MLX5_MAX_MAC_ADDRESSES)
770 BITFIELD_RESET(mac_own, index);
771 return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0);
775 * Synchronize Netlink bridge table to the internal table.
778 * Netlink socket file descriptor.
779 * @param[in] iface_idx
780 * Net device interface index.
782 * Mac addresses array to sync.
784 * @p mac_addrs array size.
787 mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx,
788 struct rte_ether_addr *mac_addrs, int n)
790 struct rte_ether_addr macs[n];
795 memset(macs, 0, n * sizeof(macs[0]));
796 ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n);
799 for (i = 0; i != macs_n; ++i) {
802 /* Verify the address is not in the array yet. */
803 for (j = 0; j != n; ++j)
804 if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j]))
808 if (rte_is_multicast_ether_addr(&macs[i])) {
809 /* Find the first entry available. */
810 for (j = MLX5_MAX_UC_MAC_ADDRESSES; j != n; ++j) {
811 if (rte_is_zero_ether_addr(&mac_addrs[j])) {
812 mac_addrs[j] = macs[i];
817 /* Find the first entry available. */
818 for (j = 0; j != MLX5_MAX_UC_MAC_ADDRESSES; ++j) {
819 if (rte_is_zero_ether_addr(&mac_addrs[j])) {
820 mac_addrs[j] = macs[i];
829 * Flush all added MAC addresses.
832 * Netlink socket file descriptor.
833 * @param[in] iface_idx
834 * Net device interface index.
835 * @param[in] mac_addrs
836 * Mac addresses array to flush.
838 * @p mac_addrs array size.
840 * BITFIELD_DECLARE array to store the mac.
843 mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx,
844 struct rte_ether_addr *mac_addrs, int n,
849 if (n <= 0 || n > MLX5_MAX_MAC_ADDRESSES)
852 for (i = n - 1; i >= 0; --i) {
853 struct rte_ether_addr *m = &mac_addrs[i];
855 if (BITFIELD_ISSET(mac_own, i))
856 mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m,
862 * Enable promiscuous / all multicast mode through Netlink.
865 * Netlink socket file descriptor.
866 * @param[in] iface_idx
867 * Net device interface index.
869 * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti.
871 * Nonzero to enable, disable otherwise.
874 * 0 on success, a negative errno value otherwise and rte_errno is set.
877 mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags,
882 struct ifinfomsg ifi;
885 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
886 .nlmsg_type = RTM_NEWLINK,
887 .nlmsg_flags = NLM_F_REQUEST,
890 .ifi_flags = enable ? flags : 0,
892 .ifi_index = iface_idx,
895 uint32_t sn = MLX5_NL_SN_GENERATE;
898 MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI)));
901 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
908 * Enable promiscuous mode through Netlink.
911 * Netlink socket file descriptor.
912 * @param[in] iface_idx
913 * Net device interface index.
915 * Nonzero to enable, disable otherwise.
918 * 0 on success, a negative errno value otherwise and rte_errno is set.
921 mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable)
923 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable);
927 "Interface %u cannot %s promisc mode: Netlink error %s",
928 iface_idx, enable ? "enable" : "disable",
929 strerror(rte_errno));
934 * Enable all multicast mode through Netlink.
937 * Netlink socket file descriptor.
938 * @param[in] iface_idx
939 * Net device interface index.
941 * Nonzero to enable, disable otherwise.
944 * 0 on success, a negative errno value otherwise and rte_errno is set.
947 mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable)
949 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI,
954 "Interface %u cannot %s allmulti : Netlink error %s",
955 iface_idx, enable ? "enable" : "disable",
956 strerror(rte_errno));
961 * Process network interface information from Netlink message.
964 * Pointer to Netlink message header.
966 * Opaque data pointer for this callback.
969 * 0 on success, a negative errno value otherwise and rte_errno is set.
972 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
974 struct mlx5_nl_port_info *data = arg;
975 struct mlx5_nl_port_info local = {
978 size_t off = NLMSG_HDRLEN;
980 if (nh->nlmsg_type !=
981 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) &&
983 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET))
985 while (off < nh->nlmsg_len) {
986 struct nlattr *na = (void *)((uintptr_t)nh + off);
987 void *payload = (void *)((uintptr_t)na + NLA_HDRLEN);
989 if (na->nla_len > nh->nlmsg_len - off)
991 switch (na->nla_type) {
992 case RDMA_NLDEV_ATTR_DEV_INDEX:
993 local.ibindex = *(uint32_t *)payload;
994 local.flags |= MLX5_NL_CMD_GET_IB_INDEX;
996 case RDMA_NLDEV_ATTR_DEV_NAME:
997 if (!strcmp(payload, data->name))
998 local.flags |= MLX5_NL_CMD_GET_IB_NAME;
1000 case RDMA_NLDEV_ATTR_NDEV_INDEX:
1001 local.ifindex = *(uint32_t *)payload;
1002 local.flags |= MLX5_NL_CMD_GET_NET_INDEX;
1004 case RDMA_NLDEV_ATTR_PORT_INDEX:
1005 local.portnum = *(uint32_t *)payload;
1006 local.flags |= MLX5_NL_CMD_GET_PORT_INDEX;
1008 case RDMA_NLDEV_ATTR_PORT_STATE:
1009 local.state = *(uint8_t *)payload;
1010 local.flags |= MLX5_NL_CMD_GET_PORT_STATE;
1015 off += NLA_ALIGN(na->nla_len);
1018 * It is possible to have multiple messages for all
1019 * Infiniband devices in the system with appropriate name.
1020 * So we should gather parameters locally and copy to
1021 * query context only in case of coinciding device name.
1023 if (local.flags & MLX5_NL_CMD_GET_IB_NAME) {
1024 data->flags = local.flags;
1025 data->ibindex = local.ibindex;
1026 data->ifindex = local.ifindex;
1027 data->portnum = local.portnum;
1028 data->state = local.state;
1037 * Get port info of network interface associated with some IB device.
1039 * This is the only somewhat safe method to avoid resorting to heuristics
1040 * when faced with port representors. Unfortunately it requires at least
1044 * Netlink socket of the RDMA kind (NETLINK_RDMA).
1046 * IB device port index, starting from 1
1048 * Pointer to port info.
1050 * 0 on success, negative on error and rte_errno is set.
1053 mlx5_nl_port_info(int nl, uint32_t pindex, struct mlx5_nl_port_info *data)
1057 uint8_t buf[NLMSG_HDRLEN +
1058 NLA_HDRLEN + NLA_ALIGN(sizeof(data->ibindex)) +
1059 NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))];
1062 .nlmsg_len = NLMSG_LENGTH(0),
1063 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1064 RDMA_NLDEV_CMD_GET),
1065 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
1069 uint32_t sn = MLX5_NL_SN_GENERATE;
1072 ret = mlx5_nl_send(nl, &req.nh, sn);
1075 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, data);
1078 if (!(data->flags & MLX5_NL_CMD_GET_IB_NAME) ||
1079 !(data->flags & MLX5_NL_CMD_GET_IB_INDEX))
1082 sn = MLX5_NL_SN_GENERATE;
1083 req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1084 RDMA_NLDEV_CMD_PORT_GET);
1085 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1086 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN);
1087 na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN);
1088 na->nla_len = NLA_HDRLEN + sizeof(data->ibindex);
1089 na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX;
1090 memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
1091 &data->ibindex, sizeof(data->ibindex));
1092 na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len));
1093 na->nla_len = NLA_HDRLEN + sizeof(pindex);
1094 na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX;
1095 memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
1096 &pindex, sizeof(pindex));
1097 ret = mlx5_nl_send(nl, &req.nh, sn);
1100 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, data);
1103 if (!(data->flags & MLX5_NL_CMD_GET_IB_NAME) ||
1104 !(data->flags & MLX5_NL_CMD_GET_IB_INDEX) ||
1105 !(data->flags & MLX5_NL_CMD_GET_NET_INDEX) ||
1115 * Get index of network interface associated with some IB device.
1117 * This is the only somewhat safe method to avoid resorting to heuristics
1118 * when faced with port representors. Unfortunately it requires at least
1122 * Netlink socket of the RDMA kind (NETLINK_RDMA).
1126 * IB device port index, starting from 1
1128 * A valid (nonzero) interface index on success, 0 otherwise and rte_errno
1132 mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
1134 struct mlx5_nl_port_info data = {
1139 if (mlx5_nl_port_info(nl, pindex, &data) < 0)
1141 return data.ifindex;
1145 * Get IB device port state.
1147 * This is the only somewhat safe method to get info for port number >= 255.
1148 * Unfortunately it requires at least Linux 4.17.
1151 * Netlink socket of the RDMA kind (NETLINK_RDMA).
1155 * IB device port index, starting from 1
1157 * Port state (ibv_port_state) on success, negative on error
1158 * and rte_errno is set.
1161 mlx5_nl_port_state(int nl, const char *name, uint32_t pindex)
1163 struct mlx5_nl_port_info data = {
1168 if (mlx5_nl_port_info(nl, pindex, &data) < 0)
1170 if ((data.flags & MLX5_NL_CMD_GET_PORT_STATE) == 0) {
1171 rte_errno = ENOTSUP;
1174 return (int)data.state;
1178 * Get the number of physical ports of given IB device.
1181 * Netlink socket of the RDMA kind (NETLINK_RDMA).
1186 * A valid (nonzero) number of ports on success, 0 otherwise
1187 * and rte_errno is set.
1190 mlx5_nl_portnum(int nl, const char *name)
1192 struct mlx5_nl_port_info data = {
1198 struct nlmsghdr req = {
1199 .nlmsg_len = NLMSG_LENGTH(0),
1200 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1201 RDMA_NLDEV_CMD_GET),
1202 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
1204 uint32_t sn = MLX5_NL_SN_GENERATE;
1207 ret = mlx5_nl_send(nl, &req, sn);
1210 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1213 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1214 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
1215 !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) {
1221 return data.portnum;
1225 * Analyze gathered port parameters via Netlink to recognize master
1226 * and representor devices for E-Switch configuration.
1228 * @param[in] num_vf_set
1229 * flag of presence of number of VFs port attribute.
1230 * @param[inout] switch_info
1231 * Port information, including port name as a number and port name
1232 * type if recognized
1235 * master and representor flags are set in switch_info according to
1236 * recognized parameters (if any).
1239 mlx5_nl_check_switch_info(bool num_vf_set,
1240 struct mlx5_switch_info *switch_info)
1242 switch (switch_info->name_type) {
1243 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
1245 * Name is not recognized, assume the master,
1246 * check the number of VFs key presence.
1248 switch_info->master = num_vf_set;
1250 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
1252 * Name is not set, this assumes the legacy naming
1253 * schema for master, just check if there is a
1254 * number of VFs key.
1256 switch_info->master = num_vf_set;
1258 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
1259 /* New uplink naming schema recognized. */
1260 switch_info->master = 1;
1262 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
1263 /* Legacy representors naming schema. */
1264 switch_info->representor = !num_vf_set;
1266 case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
1268 case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
1270 case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
1271 /* New representors naming schema. */
1272 switch_info->representor = 1;
1278 * Process switch information from Netlink message.
1281 * Pointer to Netlink message header.
1283 * Opaque data pointer for this callback.
1286 * 0 on success, a negative errno value otherwise and rte_errno is set.
1289 mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg)
1291 struct mlx5_switch_info info = {
1294 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
1298 size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
1299 bool switch_id_set = false;
1300 bool num_vf_set = false;
1303 if (nh->nlmsg_type != RTM_NEWLINK)
1305 while (off < nh->nlmsg_len) {
1306 struct rtattr *ra = (void *)((uintptr_t)nh + off);
1307 void *payload = RTA_DATA(ra);
1310 if (ra->rta_len > nh->nlmsg_len - off)
1312 switch (ra->rta_type) {
1316 case IFLA_PHYS_PORT_NAME:
1317 len = RTA_PAYLOAD(ra);
1318 /* Some kernels do not pad attributes with zero. */
1319 if (len > 0 && len < MLX5_PHYS_PORT_NAME_MAX) {
1320 char name[MLX5_PHYS_PORT_NAME_MAX];
1323 * We can't just patch the message with padding
1324 * zero - it might corrupt the following items
1325 * in the message, we have to copy the string
1326 * by attribute length and pad the copied one.
1328 memcpy(name, payload, len);
1330 mlx5_translate_port_name(name, &info);
1333 MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
1336 case IFLA_PHYS_SWITCH_ID:
1338 for (i = 0; i < RTA_PAYLOAD(ra); ++i) {
1339 info.switch_id <<= 8;
1340 info.switch_id |= ((uint8_t *)payload)[i];
1342 switch_id_set = true;
1345 off += RTA_ALIGN(ra->rta_len);
1347 if (switch_id_set) {
1348 /* We have some E-Switch configuration. */
1349 mlx5_nl_check_switch_info(num_vf_set, &info);
1351 MLX5_ASSERT(!(info.master && info.representor));
1352 memcpy(arg, &info, sizeof(info));
1360 * Get switch information associated with network interface.
1363 * Netlink socket of the ROUTE kind (NETLINK_ROUTE).
1365 * Network interface index.
1367 * Switch information object, populated in case of success.
1370 * 0 on success, a negative errno value otherwise and rte_errno is set.
1373 mlx5_nl_switch_info(int nl, unsigned int ifindex,
1374 struct mlx5_switch_info *info)
1378 struct ifinfomsg info;
1383 .nlmsg_len = NLMSG_LENGTH
1385 RTA_LENGTH(sizeof(uint32_t))),
1386 .nlmsg_type = RTM_GETLINK,
1387 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
1390 .ifi_family = AF_UNSPEC,
1391 .ifi_index = ifindex,
1394 .rta_type = IFLA_EXT_MASK,
1395 .rta_len = RTA_LENGTH(sizeof(int32_t)),
1397 .extmask = RTE_LE32(1),
1399 uint32_t sn = MLX5_NL_SN_GENERATE;
1402 ret = mlx5_nl_send(nl, &req.nh, sn);
1404 ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info);
1405 if (info->master && info->representor) {
1406 DRV_LOG(ERR, "ifindex %u device is recognized as master"
1407 " and as representor", ifindex);
1415 * Delete VLAN network device by ifindex.
1418 * Context object initialized by mlx5_nl_vlan_vmwa_init().
1419 * @param[in] ifindex
1420 * Interface index of network device to delete.
1423 mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa,
1426 uint32_t sn = MLX5_NL_SN_GENERATE;
1430 struct ifinfomsg info;
1433 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
1434 .nlmsg_type = RTM_DELLINK,
1435 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
1438 .ifi_family = AF_UNSPEC,
1439 .ifi_index = ifindex,
1444 ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn);
1446 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
1448 DRV_LOG(WARNING, "netlink: error deleting VLAN WA"
1449 " ifindex %u, %d", ifindex, ret);
1453 /* Set of subroutines to build Netlink message. */
1454 static struct nlattr *
1455 nl_msg_tail(struct nlmsghdr *nlh)
1457 return (struct nlattr *)
1458 (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len));
1462 nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen)
1464 struct nlattr *nla = nl_msg_tail(nlh);
1466 nla->nla_type = type;
1467 nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen;
1468 nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len);
1471 memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen);
1474 static struct nlattr *
1475 nl_attr_nest_start(struct nlmsghdr *nlh, int type)
1477 struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh);
1479 nl_attr_put(nlh, type, NULL, 0);
1484 nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest)
1486 nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest;
1490 * Create network VLAN device with specified VLAN tag.
1493 * Context object initialized by mlx5_nl_vlan_vmwa_init().
1494 * @param[in] ifindex
1495 * Base network interface index.
1497 * VLAN tag for VLAN network device to create.
1500 mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa,
1501 uint32_t ifindex, uint16_t tag)
1503 struct nlmsghdr *nlh;
1504 struct ifinfomsg *ifm;
1505 char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32];
1508 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1509 NLMSG_ALIGN(sizeof(struct ifinfomsg)) +
1510 NLMSG_ALIGN(sizeof(struct nlattr)) * 8 +
1511 NLMSG_ALIGN(sizeof(uint32_t)) +
1512 NLMSG_ALIGN(sizeof(name)) +
1513 NLMSG_ALIGN(sizeof("vlan")) +
1514 NLMSG_ALIGN(sizeof(uint32_t)) +
1515 NLMSG_ALIGN(sizeof(uint16_t)) + 16];
1516 struct nlattr *na_info;
1517 struct nlattr *na_vlan;
1518 uint32_t sn = MLX5_NL_SN_GENERATE;
1521 memset(buf, 0, sizeof(buf));
1522 nlh = (struct nlmsghdr *)buf;
1523 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1524 nlh->nlmsg_type = RTM_NEWLINK;
1525 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
1526 NLM_F_EXCL | NLM_F_ACK;
1527 ifm = (struct ifinfomsg *)nl_msg_tail(nlh);
1528 nlh->nlmsg_len += sizeof(struct ifinfomsg);
1529 ifm->ifi_family = AF_UNSPEC;
1532 ifm->ifi_flags = IFF_UP;
1533 ifm->ifi_change = 0xffffffff;
1534 nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex));
1535 ret = snprintf(name, sizeof(name), "%s.%u.%u",
1536 MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag);
1537 nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1);
1538 na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO);
1539 nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan"));
1540 na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA);
1541 nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag));
1542 nl_attr_nest_end(nlh, na_vlan);
1543 nl_attr_nest_end(nlh, na_info);
1544 MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len);
1545 ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn);
1547 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
1549 DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name,
1552 /* Try to get ifindex of created or pre-existing device. */
1553 ret = if_nametoindex(name);
1555 DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name,
1563 * Parse Netlink message to retrieve the general family ID.
1566 * Pointer to Netlink Message Header.
1568 * PMD data register with this callback.
1571 * 0 on success, a negative errno value otherwise and rte_errno is set.
1574 mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg)
1577 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
1578 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
1579 NLMSG_ALIGN(sizeof(struct genlmsghdr)));
1581 for (; nla->nla_len && nla < tail;
1582 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) {
1583 if (nla->nla_type == CTRL_ATTR_FAMILY_ID) {
1584 *(uint16_t *)arg = *(uint16_t *)(nla + 1);
1591 #define MLX5_NL_MAX_ATTR_SIZE 100
1593 * Get generic netlink family ID.
1595 * @param[in] nlsk_fd
1596 * Netlink socket file descriptor.
1601 * ID >= 0 on success and @p enable is updated, a negative errno value
1602 * otherwise and rte_errno is set.
1605 mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name)
1607 struct nlmsghdr *nlh;
1608 struct genlmsghdr *genl;
1609 uint32_t sn = MLX5_NL_SN_GENERATE;
1610 int name_size = strlen(name) + 1;
1613 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1614 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1615 NLMSG_ALIGN(sizeof(struct nlattr)) +
1616 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)];
1618 memset(buf, 0, sizeof(buf));
1619 nlh = (struct nlmsghdr *)buf;
1620 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1621 nlh->nlmsg_type = GENL_ID_CTRL;
1622 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1623 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1624 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1625 genl->cmd = CTRL_CMD_GETFAMILY;
1627 nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size);
1628 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1630 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id);
1632 DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name,
1636 DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id);
1641 * Get Devlink family ID.
1643 * @param[in] nlsk_fd
1644 * Netlink socket file descriptor.
1647 * ID >= 0 on success and @p enable is updated, a negative errno value
1648 * otherwise and rte_errno is set.
1652 mlx5_nl_devlink_family_id_get(int nlsk_fd)
1654 return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME);
1658 * Parse Netlink message to retrieve the ROCE enable status.
1661 * Pointer to Netlink Message Header.
1663 * PMD data register with this callback.
1666 * 0 on success, a negative errno value otherwise and rte_errno is set.
1669 mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg)
1674 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
1675 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
1676 NLMSG_ALIGN(sizeof(struct genlmsghdr)));
1678 while (nla->nla_len && nla < tail) {
1679 switch (nla->nla_type) {
1680 /* Expected nested attributes case. */
1681 case DEVLINK_ATTR_PARAM:
1682 case DEVLINK_ATTR_PARAM_VALUES_LIST:
1683 case DEVLINK_ATTR_PARAM_VALUE:
1687 case DEVLINK_ATTR_PARAM_VALUE_DATA:
1691 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len));
1699 * Get ROCE enable status through Netlink.
1701 * @param[in] nlsk_fd
1702 * Netlink socket file descriptor.
1703 * @param[in] family_id
1704 * the Devlink family ID.
1706 * The device PCI address.
1707 * @param[out] enable
1708 * Where to store the enable status.
1711 * 0 on success and @p enable is updated, a negative errno value otherwise
1712 * and rte_errno is set.
1715 mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr,
1718 struct nlmsghdr *nlh;
1719 struct genlmsghdr *genl;
1720 uint32_t sn = MLX5_NL_SN_GENERATE;
1723 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1724 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1725 NLMSG_ALIGN(sizeof(struct nlattr)) * 4 +
1726 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4];
1728 memset(buf, 0, sizeof(buf));
1729 nlh = (struct nlmsghdr *)buf;
1730 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1731 nlh->nlmsg_type = family_id;
1732 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1733 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1734 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1735 genl->cmd = DEVLINK_CMD_PARAM_GET;
1736 genl->version = DEVLINK_GENL_VERSION;
1737 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1738 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1739 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
1740 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1742 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en);
1744 DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.",
1749 DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".",
1750 cur_en ? "en" : "dis", pci_addr);
1755 * Reload mlx5 device kernel driver through Netlink.
1757 * @param[in] nlsk_fd
1758 * Netlink socket file descriptor.
1759 * @param[in] family_id
1760 * the Devlink family ID.
1762 * The device PCI address.
1763 * @param[out] enable
1764 * The enable status to set.
1767 * 0 on success, a negative errno value otherwise and rte_errno is set.
1770 mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr)
1772 struct nlmsghdr *nlh;
1773 struct genlmsghdr *genl;
1774 uint32_t sn = MLX5_NL_SN_GENERATE;
1776 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1777 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1778 NLMSG_ALIGN(sizeof(struct nlattr)) * 2 +
1779 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2];
1781 memset(buf, 0, sizeof(buf));
1782 nlh = (struct nlmsghdr *)buf;
1783 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1784 nlh->nlmsg_type = family_id;
1785 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1786 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1787 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1788 genl->cmd = DEVLINK_CMD_RELOAD;
1789 genl->version = DEVLINK_GENL_VERSION;
1790 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1791 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1792 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1794 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
1796 DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d",
1800 DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.",
1806 * Set ROCE enable status through Netlink.
1808 * @param[in] nlsk_fd
1809 * Netlink socket file descriptor.
1810 * @param[in] family_id
1811 * the Devlink family ID.
1813 * The device PCI address.
1814 * @param[out] enable
1815 * The enable status to set.
1818 * 0 on success, a negative errno value otherwise and rte_errno is set.
1821 mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr,
1824 struct nlmsghdr *nlh;
1825 struct genlmsghdr *genl;
1826 uint32_t sn = MLX5_NL_SN_GENERATE;
1828 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1829 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1830 NLMSG_ALIGN(sizeof(struct nlattr)) * 6 +
1831 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6];
1832 uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT;
1833 uint8_t ptype = NLA_FLAG;
1836 memset(buf, 0, sizeof(buf));
1837 nlh = (struct nlmsghdr *)buf;
1838 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1839 nlh->nlmsg_type = family_id;
1840 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1841 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1842 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1843 genl->cmd = DEVLINK_CMD_PARAM_SET;
1844 genl->version = DEVLINK_GENL_VERSION;
1845 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1846 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1847 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
1848 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode));
1849 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype));
1851 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0);
1852 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1854 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
1856 DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:"
1857 " %d.", enable ? "en" : "dis", pci_addr, ret);
1860 DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.",
1861 pci_addr, enable ? "en" : "dis");
1862 /* Now, need to reload the driver. */
1863 return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr);