1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
7 #include <linux/if_link.h>
8 #include <linux/rtnetlink.h>
9 #include <linux/genetlink.h>
11 #include <rdma/rdma_netlink.h>
17 #include <sys/socket.h>
20 #include <rte_errno.h>
23 #include "../mlx5_common_log.h"
24 #include "mlx5_malloc.h"
26 #include <linux/devlink.h>
30 /* Size of the buffer to receive kernel messages */
31 #define MLX5_NL_BUF_SIZE (32 * 1024)
32 /* Send buffer size for the Netlink socket */
33 #define MLX5_SEND_BUF_SIZE 32768
34 /* Receive buffer size for the Netlink socket */
35 #define MLX5_RECV_BUF_SIZE 32768
36 /* Maximal physical port name length. */
37 #define MLX5_PHYS_PORT_NAME_MAX 128
39 /** Parameters of VLAN devices created by driver. */
40 #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx"
42 * Define NDA_RTA as defined in iproute2 sources.
44 * see in iproute2 sources file include/libnetlink.h
47 #define MLX5_NDA_RTA(r) \
48 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
51 * Define NLMSG_TAIL as defined in iproute2 sources.
53 * see in iproute2 sources file include/libnetlink.h
56 #define NLMSG_TAIL(nmsg) \
57 ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
60 * The following definitions are normally found in rdma/rdma_netlink.h,
61 * however they are so recent that most systems do not expose them yet.
63 #ifndef HAVE_RDMA_NL_NLDEV
64 #define RDMA_NL_NLDEV 5
66 #ifndef HAVE_RDMA_NLDEV_CMD_GET
67 #define RDMA_NLDEV_CMD_GET 1
69 #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET
70 #define RDMA_NLDEV_CMD_PORT_GET 5
72 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX
73 #define RDMA_NLDEV_ATTR_DEV_INDEX 1
75 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME
76 #define RDMA_NLDEV_ATTR_DEV_NAME 2
78 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX
79 #define RDMA_NLDEV_ATTR_PORT_INDEX 3
81 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX
82 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50
85 /* These are normally found in linux/if_link.h. */
86 #ifndef HAVE_IFLA_NUM_VF
87 #define IFLA_NUM_VF 21
89 #ifndef HAVE_IFLA_EXT_MASK
90 #define IFLA_EXT_MASK 29
92 #ifndef HAVE_IFLA_PHYS_SWITCH_ID
93 #define IFLA_PHYS_SWITCH_ID 36
95 #ifndef HAVE_IFLA_PHYS_PORT_NAME
96 #define IFLA_PHYS_PORT_NAME 38
100 * Some Devlink defines may be missed in old kernel versions,
101 * adjust used defines.
103 #ifndef DEVLINK_GENL_NAME
104 #define DEVLINK_GENL_NAME "devlink"
106 #ifndef DEVLINK_GENL_VERSION
107 #define DEVLINK_GENL_VERSION 1
109 #ifndef DEVLINK_ATTR_BUS_NAME
110 #define DEVLINK_ATTR_BUS_NAME 1
112 #ifndef DEVLINK_ATTR_DEV_NAME
113 #define DEVLINK_ATTR_DEV_NAME 2
115 #ifndef DEVLINK_ATTR_PARAM
116 #define DEVLINK_ATTR_PARAM 80
118 #ifndef DEVLINK_ATTR_PARAM_NAME
119 #define DEVLINK_ATTR_PARAM_NAME 81
121 #ifndef DEVLINK_ATTR_PARAM_TYPE
122 #define DEVLINK_ATTR_PARAM_TYPE 83
124 #ifndef DEVLINK_ATTR_PARAM_VALUES_LIST
125 #define DEVLINK_ATTR_PARAM_VALUES_LIST 84
127 #ifndef DEVLINK_ATTR_PARAM_VALUE
128 #define DEVLINK_ATTR_PARAM_VALUE 85
130 #ifndef DEVLINK_ATTR_PARAM_VALUE_DATA
131 #define DEVLINK_ATTR_PARAM_VALUE_DATA 86
133 #ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE
134 #define DEVLINK_ATTR_PARAM_VALUE_CMODE 87
136 #ifndef DEVLINK_PARAM_CMODE_DRIVERINIT
137 #define DEVLINK_PARAM_CMODE_DRIVERINIT 1
139 #ifndef DEVLINK_CMD_RELOAD
140 #define DEVLINK_CMD_RELOAD 37
142 #ifndef DEVLINK_CMD_PARAM_GET
143 #define DEVLINK_CMD_PARAM_GET 38
145 #ifndef DEVLINK_CMD_PARAM_SET
146 #define DEVLINK_CMD_PARAM_SET 39
152 /* Add/remove MAC address through Netlink */
153 struct mlx5_nl_mac_addr {
154 struct rte_ether_addr (*mac)[];
155 /**< MAC address handled by the device. */
156 int mac_n; /**< Number of addresses in the array. */
159 #define MLX5_NL_CMD_GET_IB_NAME (1 << 0)
160 #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1)
161 #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2)
162 #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3)
164 /** Data structure used by mlx5_nl_cmdget_cb(). */
165 struct mlx5_nl_ifindex_data {
166 const char *name; /**< IB device name (in). */
167 uint32_t flags; /**< found attribute flags (out). */
168 uint32_t ibindex; /**< IB device index (out). */
169 uint32_t ifindex; /**< Network interface index (out). */
170 uint32_t portnum; /**< IB device max port number (out). */
175 /* Generate Netlink sequence number. */
176 #define MLX5_NL_SN_GENERATE __atomic_add_fetch(&atomic_sn, 1, __ATOMIC_RELAXED)
179 * Opens a Netlink socket.
182 * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA).
185 * A file descriptor on success, a negative errno value otherwise and
189 mlx5_nl_init(int protocol)
194 struct sockaddr_nl local = {
195 .nl_family = AF_NETLINK,
199 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);
204 opt_size = sizeof(buf_size);
205 ret = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf_size, &opt_size);
210 DRV_LOG(DEBUG, "Netlink socket send buffer: %d", buf_size);
211 if (buf_size < MLX5_SEND_BUF_SIZE) {
212 ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF,
213 &buf_size, sizeof(buf_size));
219 opt_size = sizeof(buf_size);
220 ret = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &buf_size, &opt_size);
225 DRV_LOG(DEBUG, "Netlink socket recv buffer: %d", buf_size);
226 if (buf_size < MLX5_RECV_BUF_SIZE) {
227 ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
228 &buf_size, sizeof(buf_size));
234 ret = bind(fd, (struct sockaddr *)&local, sizeof(local));
246 * Send a request message to the kernel on the Netlink socket.
249 * Netlink socket file descriptor.
251 * The Netlink message send to the kernel.
255 * Pointer to the request structure.
257 * Length of the request in bytes.
260 * The number of sent bytes on success, a negative errno value otherwise and
264 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req,
267 struct sockaddr_nl sa = {
268 .nl_family = AF_NETLINK,
270 struct iovec iov[2] = {
271 { .iov_base = nh, .iov_len = sizeof(*nh), },
272 { .iov_base = req, .iov_len = len, },
274 struct msghdr msg = {
276 .msg_namelen = sizeof(sa),
282 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
284 send_bytes = sendmsg(nlsk_fd, &msg, 0);
285 if (send_bytes < 0) {
293 * Send a message to the kernel on the Netlink socket.
296 * The Netlink socket file descriptor used for communication.
298 * The Netlink message send to the kernel.
303 * The number of sent bytes on success, a negative errno value otherwise and
307 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn)
309 struct sockaddr_nl sa = {
310 .nl_family = AF_NETLINK,
314 .iov_len = nh->nlmsg_len,
316 struct msghdr msg = {
318 .msg_namelen = sizeof(sa),
324 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
326 send_bytes = sendmsg(nlsk_fd, &msg, 0);
327 if (send_bytes < 0) {
335 * Receive a message from the kernel on the Netlink socket, following
339 * The Netlink socket file descriptor used for communication.
343 * The callback function to call for each Netlink message received.
344 * @param[in, out] arg
345 * Custom arguments for the callback.
348 * 0 on success, a negative errno value otherwise and rte_errno is set.
351 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg),
354 struct sockaddr_nl sa;
356 struct msghdr msg = {
358 .msg_namelen = sizeof(sa),
360 /* One message at a time */
372 /* Query length of incoming message. */
375 recv_bytes = recvmsg(nlsk_fd, &msg,
376 MSG_PEEK | MSG_TRUNC);
377 if (recv_bytes < 0) {
382 if (recv_bytes == 0) {
387 /* Allocate buffer to fetch the message. */
388 if (recv_bytes < MLX5_RECV_BUF_SIZE)
389 recv_bytes = MLX5_RECV_BUF_SIZE;
391 buf = mlx5_malloc(0, recv_bytes, 0, SOCKET_ID_ANY);
397 /* Fetch the message. */
399 iov.iov_len = recv_bytes;
400 recv_bytes = recvmsg(nlsk_fd, &msg, 0);
401 if (recv_bytes == -1) {
406 nh = (struct nlmsghdr *)buf;
407 } while (nh->nlmsg_seq != sn);
409 NLMSG_OK(nh, (unsigned int)recv_bytes);
410 nh = NLMSG_NEXT(nh, recv_bytes)) {
411 if (nh->nlmsg_type == NLMSG_ERROR) {
412 struct nlmsgerr *err_data = NLMSG_DATA(nh);
414 if (err_data->error < 0) {
415 rte_errno = -err_data->error;
423 /* Multi-part msgs and their trailing DONE message. */
424 if (nh->nlmsg_flags & NLM_F_MULTI) {
425 if (nh->nlmsg_type == NLMSG_DONE) {
444 * Parse Netlink message to retrieve the bridge MAC address.
447 * Pointer to Netlink Message Header.
449 * PMD data register with this callback.
452 * 0 on success, a negative errno value otherwise and rte_errno is set.
455 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg)
457 struct mlx5_nl_mac_addr *data = arg;
458 struct ndmsg *r = NLMSG_DATA(nh);
459 struct rtattr *attribute;
462 len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r));
463 for (attribute = MLX5_NDA_RTA(r);
464 RTA_OK(attribute, len);
465 attribute = RTA_NEXT(attribute, len)) {
466 if (attribute->rta_type == NDA_LLADDR) {
467 if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) {
469 "not enough room to finalize the"
474 #ifdef RTE_LIBRTE_MLX5_DEBUG
475 char m[RTE_ETHER_ADDR_FMT_SIZE];
477 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE,
478 RTA_DATA(attribute));
479 DRV_LOG(DEBUG, "bridge MAC address %s", m);
481 memcpy(&(*data->mac)[data->mac_n++],
482 RTA_DATA(attribute), RTE_ETHER_ADDR_LEN);
489 * Get bridge MAC addresses.
492 * Netlink socket file descriptor.
493 * @param[in] iface_idx
494 * Net device interface index.
496 * Pointer to the array table of MAC addresses to fill.
497 * Its size should be of MLX5_MAX_MAC_ADDRESSES.
499 * Number of entries filled in MAC array.
502 * 0 on success, a negative errno value otherwise and rte_errno is set.
505 mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx,
506 struct rte_ether_addr (*mac)[], int *mac_n)
510 struct ifinfomsg ifm;
513 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
514 .nlmsg_type = RTM_GETNEIGH,
515 .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
518 .ifi_family = PF_BRIDGE,
519 .ifi_index = iface_idx,
522 struct mlx5_nl_mac_addr data = {
526 uint32_t sn = MLX5_NL_SN_GENERATE;
531 ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm,
532 sizeof(struct ifinfomsg));
535 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data);
541 DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s",
542 iface_idx, strerror(rte_errno));
547 * Modify the MAC address neighbour table with Netlink.
550 * Netlink socket file descriptor.
551 * @param[in] iface_idx
552 * Net device interface index.
554 * MAC address to consider.
556 * 1 to add the MAC address, 0 to remove the MAC address.
559 * 0 on success, a negative errno value otherwise and rte_errno is set.
562 mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
563 struct rte_ether_addr *mac, int add)
569 uint8_t buffer[RTE_ETHER_ADDR_LEN];
572 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
573 .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
574 NLM_F_EXCL | NLM_F_ACK,
575 .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH,
578 .ndm_family = PF_BRIDGE,
579 .ndm_state = NUD_NOARP | NUD_PERMANENT,
580 .ndm_ifindex = iface_idx,
581 .ndm_flags = NTF_SELF,
584 .rta_type = NDA_LLADDR,
585 .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN),
588 uint32_t sn = MLX5_NL_SN_GENERATE;
593 memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN);
594 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
595 RTA_ALIGN(req.rta.rta_len);
596 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
599 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
604 #ifdef RTE_LIBRTE_MLX5_DEBUG
606 char m[RTE_ETHER_ADDR_FMT_SIZE];
608 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac);
610 "Interface %u cannot %s MAC address %s %s",
612 add ? "add" : "remove", m, strerror(rte_errno));
619 * Modify the VF MAC address neighbour table with Netlink.
622 * Netlink socket file descriptor.
623 * @param[in] iface_idx
624 * Net device interface index.
626 * MAC address to consider.
631 * 0 on success, a negative errno value otherwise and rte_errno is set.
634 mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
635 struct rte_ether_addr *mac, int vf_index)
640 struct ifinfomsg ifm;
641 struct rtattr vf_list_rta;
642 struct rtattr vf_info_rta;
643 struct rtattr vf_mac_rta;
644 struct ifla_vf_mac ivm;
647 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
648 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
649 .nlmsg_type = RTM_BASE,
652 .ifi_index = iface_idx,
655 .rta_type = IFLA_VFINFO_LIST,
656 .rta_len = RTA_ALIGN(RTA_LENGTH(0)),
659 .rta_type = IFLA_VF_INFO,
660 .rta_len = RTA_ALIGN(RTA_LENGTH(0)),
663 .rta_type = IFLA_VF_MAC,
666 struct ifla_vf_mac ivm = {
669 uint32_t sn = MLX5_NL_SN_GENERATE;
671 memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN);
672 memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm));
674 req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm));
675 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
676 RTA_ALIGN(req.vf_list_rta.rta_len) +
677 RTA_ALIGN(req.vf_info_rta.rta_len) +
678 RTA_ALIGN(req.vf_mac_rta.rta_len);
679 req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
681 req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
686 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
689 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
695 "representor %u cannot set VF MAC address "
696 "%02X:%02X:%02X:%02X:%02X:%02X : %s",
698 mac->addr_bytes[0], mac->addr_bytes[1],
699 mac->addr_bytes[2], mac->addr_bytes[3],
700 mac->addr_bytes[4], mac->addr_bytes[5],
701 strerror(rte_errno));
709 * Netlink socket file descriptor.
710 * @param[in] iface_idx
711 * Net device interface index.
713 * BITFIELD_DECLARE array to store the mac.
715 * MAC address to register.
720 * 0 on success, a negative errno value otherwise and rte_errno is set.
723 mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx,
724 uint64_t *mac_own, struct rte_ether_addr *mac,
729 ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1);
731 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
732 if (index >= MLX5_MAX_MAC_ADDRESSES)
735 BITFIELD_SET(mac_own, index);
743 * Remove a MAC address.
746 * Netlink socket file descriptor.
747 * @param[in] iface_idx
748 * Net device interface index.
750 * BITFIELD_DECLARE array to store the mac.
752 * MAC address to remove.
757 * 0 on success, a negative errno value otherwise and rte_errno is set.
760 mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own,
761 struct rte_ether_addr *mac, uint32_t index)
763 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
764 if (index >= MLX5_MAX_MAC_ADDRESSES)
767 BITFIELD_RESET(mac_own, index);
768 return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0);
772 * Synchronize Netlink bridge table to the internal table.
775 * Netlink socket file descriptor.
776 * @param[in] iface_idx
777 * Net device interface index.
779 * Mac addresses array to sync.
781 * @p mac_addrs array size.
784 mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx,
785 struct rte_ether_addr *mac_addrs, int n)
787 struct rte_ether_addr macs[n];
792 memset(macs, 0, n * sizeof(macs[0]));
793 ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n);
796 for (i = 0; i != macs_n; ++i) {
799 /* Verify the address is not in the array yet. */
800 for (j = 0; j != n; ++j)
801 if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j]))
805 if (rte_is_multicast_ether_addr(&macs[i])) {
806 /* Find the first entry available. */
807 for (j = MLX5_MAX_UC_MAC_ADDRESSES; j != n; ++j) {
808 if (rte_is_zero_ether_addr(&mac_addrs[j])) {
809 mac_addrs[j] = macs[i];
814 /* Find the first entry available. */
815 for (j = 0; j != MLX5_MAX_UC_MAC_ADDRESSES; ++j) {
816 if (rte_is_zero_ether_addr(&mac_addrs[j])) {
817 mac_addrs[j] = macs[i];
826 * Flush all added MAC addresses.
829 * Netlink socket file descriptor.
830 * @param[in] iface_idx
831 * Net device interface index.
832 * @param[in] mac_addrs
833 * Mac addresses array to flush.
835 * @p mac_addrs array size.
837 * BITFIELD_DECLARE array to store the mac.
840 mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx,
841 struct rte_ether_addr *mac_addrs, int n,
846 if (n <= 0 || n > MLX5_MAX_MAC_ADDRESSES)
849 for (i = n - 1; i >= 0; --i) {
850 struct rte_ether_addr *m = &mac_addrs[i];
852 if (BITFIELD_ISSET(mac_own, i))
853 mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m,
859 * Enable promiscuous / all multicast mode through Netlink.
862 * Netlink socket file descriptor.
863 * @param[in] iface_idx
864 * Net device interface index.
866 * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti.
868 * Nonzero to enable, disable otherwise.
871 * 0 on success, a negative errno value otherwise and rte_errno is set.
874 mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags,
879 struct ifinfomsg ifi;
882 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
883 .nlmsg_type = RTM_NEWLINK,
884 .nlmsg_flags = NLM_F_REQUEST,
887 .ifi_flags = enable ? flags : 0,
889 .ifi_index = iface_idx,
892 uint32_t sn = MLX5_NL_SN_GENERATE;
895 MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI)));
898 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
905 * Enable promiscuous mode through Netlink.
908 * Netlink socket file descriptor.
909 * @param[in] iface_idx
910 * Net device interface index.
912 * Nonzero to enable, disable otherwise.
915 * 0 on success, a negative errno value otherwise and rte_errno is set.
918 mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable)
920 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable);
924 "Interface %u cannot %s promisc mode: Netlink error %s",
925 iface_idx, enable ? "enable" : "disable",
926 strerror(rte_errno));
931 * Enable all multicast mode through Netlink.
934 * Netlink socket file descriptor.
935 * @param[in] iface_idx
936 * Net device interface index.
938 * Nonzero to enable, disable otherwise.
941 * 0 on success, a negative errno value otherwise and rte_errno is set.
944 mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable)
946 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI,
951 "Interface %u cannot %s allmulti : Netlink error %s",
952 iface_idx, enable ? "enable" : "disable",
953 strerror(rte_errno));
958 * Process network interface information from Netlink message.
961 * Pointer to Netlink message header.
963 * Opaque data pointer for this callback.
966 * 0 on success, a negative errno value otherwise and rte_errno is set.
969 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
971 struct mlx5_nl_ifindex_data *data = arg;
972 struct mlx5_nl_ifindex_data local = {
975 size_t off = NLMSG_HDRLEN;
977 if (nh->nlmsg_type !=
978 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) &&
980 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET))
982 while (off < nh->nlmsg_len) {
983 struct nlattr *na = (void *)((uintptr_t)nh + off);
984 void *payload = (void *)((uintptr_t)na + NLA_HDRLEN);
986 if (na->nla_len > nh->nlmsg_len - off)
988 switch (na->nla_type) {
989 case RDMA_NLDEV_ATTR_DEV_INDEX:
990 local.ibindex = *(uint32_t *)payload;
991 local.flags |= MLX5_NL_CMD_GET_IB_INDEX;
993 case RDMA_NLDEV_ATTR_DEV_NAME:
994 if (!strcmp(payload, data->name))
995 local.flags |= MLX5_NL_CMD_GET_IB_NAME;
997 case RDMA_NLDEV_ATTR_NDEV_INDEX:
998 local.ifindex = *(uint32_t *)payload;
999 local.flags |= MLX5_NL_CMD_GET_NET_INDEX;
1001 case RDMA_NLDEV_ATTR_PORT_INDEX:
1002 local.portnum = *(uint32_t *)payload;
1003 local.flags |= MLX5_NL_CMD_GET_PORT_INDEX;
1008 off += NLA_ALIGN(na->nla_len);
1011 * It is possible to have multiple messages for all
1012 * Infiniband devices in the system with appropriate name.
1013 * So we should gather parameters locally and copy to
1014 * query context only in case of coinciding device name.
1016 if (local.flags & MLX5_NL_CMD_GET_IB_NAME) {
1017 data->flags = local.flags;
1018 data->ibindex = local.ibindex;
1019 data->ifindex = local.ifindex;
1020 data->portnum = local.portnum;
1029 * Get index of network interface associated with some IB device.
1031 * This is the only somewhat safe method to avoid resorting to heuristics
1032 * when faced with port representors. Unfortunately it requires at least
1036 * Netlink socket of the RDMA kind (NETLINK_RDMA).
1040 * IB device port index, starting from 1
1042 * A valid (nonzero) interface index on success, 0 otherwise and rte_errno
1046 mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
1048 struct mlx5_nl_ifindex_data data = {
1051 .ibindex = 0, /* Determined during first pass. */
1052 .ifindex = 0, /* Determined during second pass. */
1056 uint8_t buf[NLMSG_HDRLEN +
1057 NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) +
1058 NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))];
1061 .nlmsg_len = NLMSG_LENGTH(0),
1062 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1063 RDMA_NLDEV_CMD_GET),
1064 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
1068 uint32_t sn = MLX5_NL_SN_GENERATE;
1071 ret = mlx5_nl_send(nl, &req.nh, sn);
1074 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1077 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1078 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX))
1081 sn = MLX5_NL_SN_GENERATE;
1082 req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1083 RDMA_NLDEV_CMD_PORT_GET);
1084 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1085 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN);
1086 na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN);
1087 na->nla_len = NLA_HDRLEN + sizeof(data.ibindex);
1088 na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX;
1089 memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
1090 &data.ibindex, sizeof(data.ibindex));
1091 na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len));
1092 na->nla_len = NLA_HDRLEN + sizeof(pindex);
1093 na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX;
1094 memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
1095 &pindex, sizeof(pindex));
1096 ret = mlx5_nl_send(nl, &req.nh, sn);
1099 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1102 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1103 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
1104 !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) ||
1107 return data.ifindex;
1114 * Get the number of physical ports of given IB device.
1117 * Netlink socket of the RDMA kind (NETLINK_RDMA).
1122 * A valid (nonzero) number of ports on success, 0 otherwise
1123 * and rte_errno is set.
1126 mlx5_nl_portnum(int nl, const char *name)
1128 struct mlx5_nl_ifindex_data data = {
1134 struct nlmsghdr req = {
1135 .nlmsg_len = NLMSG_LENGTH(0),
1136 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1137 RDMA_NLDEV_CMD_GET),
1138 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
1140 uint32_t sn = MLX5_NL_SN_GENERATE;
1143 ret = mlx5_nl_send(nl, &req, sn);
1146 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1149 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1150 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
1151 !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) {
1157 return data.portnum;
1161 * Analyze gathered port parameters via Netlink to recognize master
1162 * and representor devices for E-Switch configuration.
1164 * @param[in] num_vf_set
1165 * flag of presence of number of VFs port attribute.
1166 * @param[inout] switch_info
1167 * Port information, including port name as a number and port name
1168 * type if recognized
1171 * master and representor flags are set in switch_info according to
1172 * recognized parameters (if any).
1175 mlx5_nl_check_switch_info(bool num_vf_set,
1176 struct mlx5_switch_info *switch_info)
1178 switch (switch_info->name_type) {
1179 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
1181 * Name is not recognized, assume the master,
1182 * check the number of VFs key presence.
1184 switch_info->master = num_vf_set;
1186 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
1188 * Name is not set, this assumes the legacy naming
1189 * schema for master, just check if there is a
1190 * number of VFs key.
1192 switch_info->master = num_vf_set;
1194 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
1195 /* New uplink naming schema recognized. */
1196 switch_info->master = 1;
1198 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
1199 /* Legacy representors naming schema. */
1200 switch_info->representor = !num_vf_set;
1202 case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
1204 case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
1206 case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
1207 /* New representors naming schema. */
1208 switch_info->representor = 1;
1214 * Process switch information from Netlink message.
1217 * Pointer to Netlink message header.
1219 * Opaque data pointer for this callback.
1222 * 0 on success, a negative errno value otherwise and rte_errno is set.
1225 mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg)
1227 struct mlx5_switch_info info = {
1230 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
1234 size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
1235 bool switch_id_set = false;
1236 bool num_vf_set = false;
1239 if (nh->nlmsg_type != RTM_NEWLINK)
1241 while (off < nh->nlmsg_len) {
1242 struct rtattr *ra = (void *)((uintptr_t)nh + off);
1243 void *payload = RTA_DATA(ra);
1246 if (ra->rta_len > nh->nlmsg_len - off)
1248 switch (ra->rta_type) {
1252 case IFLA_PHYS_PORT_NAME:
1253 len = RTA_PAYLOAD(ra);
1254 /* Some kernels do not pad attributes with zero. */
1255 if (len > 0 && len < MLX5_PHYS_PORT_NAME_MAX) {
1256 char name[MLX5_PHYS_PORT_NAME_MAX];
1259 * We can't just patch the message with padding
1260 * zero - it might corrupt the following items
1261 * in the message, we have to copy the string
1262 * by attribute length and pad the copied one.
1264 memcpy(name, payload, len);
1266 mlx5_translate_port_name(name, &info);
1269 MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
1272 case IFLA_PHYS_SWITCH_ID:
1274 for (i = 0; i < RTA_PAYLOAD(ra); ++i) {
1275 info.switch_id <<= 8;
1276 info.switch_id |= ((uint8_t *)payload)[i];
1278 switch_id_set = true;
1281 off += RTA_ALIGN(ra->rta_len);
1283 if (switch_id_set) {
1284 /* We have some E-Switch configuration. */
1285 mlx5_nl_check_switch_info(num_vf_set, &info);
1287 MLX5_ASSERT(!(info.master && info.representor));
1288 memcpy(arg, &info, sizeof(info));
1296 * Get switch information associated with network interface.
1299 * Netlink socket of the ROUTE kind (NETLINK_ROUTE).
1301 * Network interface index.
1303 * Switch information object, populated in case of success.
1306 * 0 on success, a negative errno value otherwise and rte_errno is set.
1309 mlx5_nl_switch_info(int nl, unsigned int ifindex,
1310 struct mlx5_switch_info *info)
1314 struct ifinfomsg info;
1319 .nlmsg_len = NLMSG_LENGTH
1321 RTA_LENGTH(sizeof(uint32_t))),
1322 .nlmsg_type = RTM_GETLINK,
1323 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
1326 .ifi_family = AF_UNSPEC,
1327 .ifi_index = ifindex,
1330 .rta_type = IFLA_EXT_MASK,
1331 .rta_len = RTA_LENGTH(sizeof(int32_t)),
1333 .extmask = RTE_LE32(1),
1335 uint32_t sn = MLX5_NL_SN_GENERATE;
1338 ret = mlx5_nl_send(nl, &req.nh, sn);
1340 ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info);
1341 if (info->master && info->representor) {
1342 DRV_LOG(ERR, "ifindex %u device is recognized as master"
1343 " and as representor", ifindex);
1351 * Delete VLAN network device by ifindex.
1354 * Context object initialized by mlx5_nl_vlan_vmwa_init().
1355 * @param[in] ifindex
1356 * Interface index of network device to delete.
1359 mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa,
1362 uint32_t sn = MLX5_NL_SN_GENERATE;
1366 struct ifinfomsg info;
1369 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
1370 .nlmsg_type = RTM_DELLINK,
1371 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
1374 .ifi_family = AF_UNSPEC,
1375 .ifi_index = ifindex,
1380 ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn);
1382 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
1384 DRV_LOG(WARNING, "netlink: error deleting VLAN WA"
1385 " ifindex %u, %d", ifindex, ret);
1389 /* Set of subroutines to build Netlink message. */
1390 static struct nlattr *
1391 nl_msg_tail(struct nlmsghdr *nlh)
1393 return (struct nlattr *)
1394 (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len));
1398 nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen)
1400 struct nlattr *nla = nl_msg_tail(nlh);
1402 nla->nla_type = type;
1403 nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen;
1404 nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len);
1407 memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen);
1410 static struct nlattr *
1411 nl_attr_nest_start(struct nlmsghdr *nlh, int type)
1413 struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh);
1415 nl_attr_put(nlh, type, NULL, 0);
1420 nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest)
1422 nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest;
1426 * Create network VLAN device with specified VLAN tag.
1429 * Context object initialized by mlx5_nl_vlan_vmwa_init().
1430 * @param[in] ifindex
1431 * Base network interface index.
1433 * VLAN tag for VLAN network device to create.
1436 mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa,
1437 uint32_t ifindex, uint16_t tag)
1439 struct nlmsghdr *nlh;
1440 struct ifinfomsg *ifm;
1441 char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32];
1444 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1445 NLMSG_ALIGN(sizeof(struct ifinfomsg)) +
1446 NLMSG_ALIGN(sizeof(struct nlattr)) * 8 +
1447 NLMSG_ALIGN(sizeof(uint32_t)) +
1448 NLMSG_ALIGN(sizeof(name)) +
1449 NLMSG_ALIGN(sizeof("vlan")) +
1450 NLMSG_ALIGN(sizeof(uint32_t)) +
1451 NLMSG_ALIGN(sizeof(uint16_t)) + 16];
1452 struct nlattr *na_info;
1453 struct nlattr *na_vlan;
1454 uint32_t sn = MLX5_NL_SN_GENERATE;
1457 memset(buf, 0, sizeof(buf));
1458 nlh = (struct nlmsghdr *)buf;
1459 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1460 nlh->nlmsg_type = RTM_NEWLINK;
1461 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
1462 NLM_F_EXCL | NLM_F_ACK;
1463 ifm = (struct ifinfomsg *)nl_msg_tail(nlh);
1464 nlh->nlmsg_len += sizeof(struct ifinfomsg);
1465 ifm->ifi_family = AF_UNSPEC;
1468 ifm->ifi_flags = IFF_UP;
1469 ifm->ifi_change = 0xffffffff;
1470 nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex));
1471 ret = snprintf(name, sizeof(name), "%s.%u.%u",
1472 MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag);
1473 nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1);
1474 na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO);
1475 nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan"));
1476 na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA);
1477 nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag));
1478 nl_attr_nest_end(nlh, na_vlan);
1479 nl_attr_nest_end(nlh, na_info);
1480 MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len);
1481 ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn);
1483 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
1485 DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name,
1488 /* Try to get ifindex of created or pre-existing device. */
1489 ret = if_nametoindex(name);
1491 DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name,
1499 * Parse Netlink message to retrieve the general family ID.
1502 * Pointer to Netlink Message Header.
1504 * PMD data register with this callback.
1507 * 0 on success, a negative errno value otherwise and rte_errno is set.
1510 mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg)
1513 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
1514 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
1515 NLMSG_ALIGN(sizeof(struct genlmsghdr)));
1517 for (; nla->nla_len && nla < tail;
1518 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) {
1519 if (nla->nla_type == CTRL_ATTR_FAMILY_ID) {
1520 *(uint16_t *)arg = *(uint16_t *)(nla + 1);
1527 #define MLX5_NL_MAX_ATTR_SIZE 100
1529 * Get generic netlink family ID.
1531 * @param[in] nlsk_fd
1532 * Netlink socket file descriptor.
1537 * ID >= 0 on success and @p enable is updated, a negative errno value
1538 * otherwise and rte_errno is set.
1541 mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name)
1543 struct nlmsghdr *nlh;
1544 struct genlmsghdr *genl;
1545 uint32_t sn = MLX5_NL_SN_GENERATE;
1546 int name_size = strlen(name) + 1;
1549 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1550 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1551 NLMSG_ALIGN(sizeof(struct nlattr)) +
1552 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)];
1554 memset(buf, 0, sizeof(buf));
1555 nlh = (struct nlmsghdr *)buf;
1556 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1557 nlh->nlmsg_type = GENL_ID_CTRL;
1558 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1559 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1560 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1561 genl->cmd = CTRL_CMD_GETFAMILY;
1563 nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size);
1564 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1566 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id);
1568 DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name,
1572 DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id);
1577 * Get Devlink family ID.
1579 * @param[in] nlsk_fd
1580 * Netlink socket file descriptor.
1583 * ID >= 0 on success and @p enable is updated, a negative errno value
1584 * otherwise and rte_errno is set.
1588 mlx5_nl_devlink_family_id_get(int nlsk_fd)
1590 return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME);
1594 * Parse Netlink message to retrieve the ROCE enable status.
1597 * Pointer to Netlink Message Header.
1599 * PMD data register with this callback.
1602 * 0 on success, a negative errno value otherwise and rte_errno is set.
1605 mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg)
1610 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
1611 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
1612 NLMSG_ALIGN(sizeof(struct genlmsghdr)));
1614 while (nla->nla_len && nla < tail) {
1615 switch (nla->nla_type) {
1616 /* Expected nested attributes case. */
1617 case DEVLINK_ATTR_PARAM:
1618 case DEVLINK_ATTR_PARAM_VALUES_LIST:
1619 case DEVLINK_ATTR_PARAM_VALUE:
1623 case DEVLINK_ATTR_PARAM_VALUE_DATA:
1627 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len));
1635 * Get ROCE enable status through Netlink.
1637 * @param[in] nlsk_fd
1638 * Netlink socket file descriptor.
1639 * @param[in] family_id
1640 * the Devlink family ID.
1642 * The device PCI address.
1643 * @param[out] enable
1644 * Where to store the enable status.
1647 * 0 on success and @p enable is updated, a negative errno value otherwise
1648 * and rte_errno is set.
1651 mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr,
1654 struct nlmsghdr *nlh;
1655 struct genlmsghdr *genl;
1656 uint32_t sn = MLX5_NL_SN_GENERATE;
1659 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1660 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1661 NLMSG_ALIGN(sizeof(struct nlattr)) * 4 +
1662 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4];
1664 memset(buf, 0, sizeof(buf));
1665 nlh = (struct nlmsghdr *)buf;
1666 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1667 nlh->nlmsg_type = family_id;
1668 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1669 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1670 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1671 genl->cmd = DEVLINK_CMD_PARAM_GET;
1672 genl->version = DEVLINK_GENL_VERSION;
1673 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1674 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1675 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
1676 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1678 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en);
1680 DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.",
1685 DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".",
1686 cur_en ? "en" : "dis", pci_addr);
1691 * Reload mlx5 device kernel driver through Netlink.
1693 * @param[in] nlsk_fd
1694 * Netlink socket file descriptor.
1695 * @param[in] family_id
1696 * the Devlink family ID.
1698 * The device PCI address.
1699 * @param[out] enable
1700 * The enable status to set.
1703 * 0 on success, a negative errno value otherwise and rte_errno is set.
1706 mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr)
1708 struct nlmsghdr *nlh;
1709 struct genlmsghdr *genl;
1710 uint32_t sn = MLX5_NL_SN_GENERATE;
1712 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1713 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1714 NLMSG_ALIGN(sizeof(struct nlattr)) * 2 +
1715 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2];
1717 memset(buf, 0, sizeof(buf));
1718 nlh = (struct nlmsghdr *)buf;
1719 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1720 nlh->nlmsg_type = family_id;
1721 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1722 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1723 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1724 genl->cmd = DEVLINK_CMD_RELOAD;
1725 genl->version = DEVLINK_GENL_VERSION;
1726 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1727 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1728 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1730 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
1732 DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d",
1736 DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.",
1742 * Set ROCE enable status through Netlink.
1744 * @param[in] nlsk_fd
1745 * Netlink socket file descriptor.
1746 * @param[in] family_id
1747 * the Devlink family ID.
1749 * The device PCI address.
1750 * @param[out] enable
1751 * The enable status to set.
1754 * 0 on success, a negative errno value otherwise and rte_errno is set.
1757 mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr,
1760 struct nlmsghdr *nlh;
1761 struct genlmsghdr *genl;
1762 uint32_t sn = MLX5_NL_SN_GENERATE;
1764 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1765 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1766 NLMSG_ALIGN(sizeof(struct nlattr)) * 6 +
1767 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6];
1768 uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT;
1769 uint8_t ptype = NLA_FLAG;
1772 memset(buf, 0, sizeof(buf));
1773 nlh = (struct nlmsghdr *)buf;
1774 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1775 nlh->nlmsg_type = family_id;
1776 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1777 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1778 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1779 genl->cmd = DEVLINK_CMD_PARAM_SET;
1780 genl->version = DEVLINK_GENL_VERSION;
1781 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1782 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1783 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
1784 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode));
1785 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype));
1787 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0);
1788 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1790 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
1792 DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:"
1793 " %d.", enable ? "en" : "dis", pci_addr, ret);
1796 DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.",
1797 pci_addr, enable ? "en" : "dis");
1798 /* Now, need to reload the driver. */
1799 return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr);