1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
7 #include <linux/if_link.h>
8 #include <linux/rtnetlink.h>
9 #include <linux/genetlink.h>
11 #include <rdma/rdma_netlink.h>
17 #include <sys/socket.h>
20 #include <rte_errno.h>
21 #include <rte_atomic.h>
24 #include "mlx5_common_utils.h"
25 #include "mlx5_malloc.h"
27 #include <linux/devlink.h>
31 /* Size of the buffer to receive kernel messages */
32 #define MLX5_NL_BUF_SIZE (32 * 1024)
33 /* Send buffer size for the Netlink socket */
34 #define MLX5_SEND_BUF_SIZE 32768
35 /* Receive buffer size for the Netlink socket */
36 #define MLX5_RECV_BUF_SIZE 32768
38 /** Parameters of VLAN devices created by driver. */
39 #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx"
41 * Define NDA_RTA as defined in iproute2 sources.
43 * see in iproute2 sources file include/libnetlink.h
46 #define MLX5_NDA_RTA(r) \
47 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
50 * Define NLMSG_TAIL as defined in iproute2 sources.
52 * see in iproute2 sources file include/libnetlink.h
55 #define NLMSG_TAIL(nmsg) \
56 ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
59 * The following definitions are normally found in rdma/rdma_netlink.h,
60 * however they are so recent that most systems do not expose them yet.
62 #ifndef HAVE_RDMA_NL_NLDEV
63 #define RDMA_NL_NLDEV 5
65 #ifndef HAVE_RDMA_NLDEV_CMD_GET
66 #define RDMA_NLDEV_CMD_GET 1
68 #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET
69 #define RDMA_NLDEV_CMD_PORT_GET 5
71 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX
72 #define RDMA_NLDEV_ATTR_DEV_INDEX 1
74 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME
75 #define RDMA_NLDEV_ATTR_DEV_NAME 2
77 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX
78 #define RDMA_NLDEV_ATTR_PORT_INDEX 3
80 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX
81 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50
84 /* These are normally found in linux/if_link.h. */
85 #ifndef HAVE_IFLA_NUM_VF
86 #define IFLA_NUM_VF 21
88 #ifndef HAVE_IFLA_EXT_MASK
89 #define IFLA_EXT_MASK 29
91 #ifndef HAVE_IFLA_PHYS_SWITCH_ID
92 #define IFLA_PHYS_SWITCH_ID 36
94 #ifndef HAVE_IFLA_PHYS_PORT_NAME
95 #define IFLA_PHYS_PORT_NAME 38
99 * Some Devlink defines may be missed in old kernel versions,
100 * adjust used defines.
102 #ifndef DEVLINK_GENL_NAME
103 #define DEVLINK_GENL_NAME "devlink"
105 #ifndef DEVLINK_GENL_VERSION
106 #define DEVLINK_GENL_VERSION 1
108 #ifndef DEVLINK_ATTR_BUS_NAME
109 #define DEVLINK_ATTR_BUS_NAME 1
111 #ifndef DEVLINK_ATTR_DEV_NAME
112 #define DEVLINK_ATTR_DEV_NAME 2
114 #ifndef DEVLINK_ATTR_PARAM
115 #define DEVLINK_ATTR_PARAM 80
117 #ifndef DEVLINK_ATTR_PARAM_NAME
118 #define DEVLINK_ATTR_PARAM_NAME 81
120 #ifndef DEVLINK_ATTR_PARAM_TYPE
121 #define DEVLINK_ATTR_PARAM_TYPE 83
123 #ifndef DEVLINK_ATTR_PARAM_VALUES_LIST
124 #define DEVLINK_ATTR_PARAM_VALUES_LIST 84
126 #ifndef DEVLINK_ATTR_PARAM_VALUE
127 #define DEVLINK_ATTR_PARAM_VALUE 85
129 #ifndef DEVLINK_ATTR_PARAM_VALUE_DATA
130 #define DEVLINK_ATTR_PARAM_VALUE_DATA 86
132 #ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE
133 #define DEVLINK_ATTR_PARAM_VALUE_CMODE 87
135 #ifndef DEVLINK_PARAM_CMODE_DRIVERINIT
136 #define DEVLINK_PARAM_CMODE_DRIVERINIT 1
138 #ifndef DEVLINK_CMD_RELOAD
139 #define DEVLINK_CMD_RELOAD 37
141 #ifndef DEVLINK_CMD_PARAM_GET
142 #define DEVLINK_CMD_PARAM_GET 38
144 #ifndef DEVLINK_CMD_PARAM_SET
145 #define DEVLINK_CMD_PARAM_SET 39
151 /* Add/remove MAC address through Netlink */
152 struct mlx5_nl_mac_addr {
153 struct rte_ether_addr (*mac)[];
154 /**< MAC address handled by the device. */
155 int mac_n; /**< Number of addresses in the array. */
158 #define MLX5_NL_CMD_GET_IB_NAME (1 << 0)
159 #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1)
160 #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2)
161 #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3)
163 /** Data structure used by mlx5_nl_cmdget_cb(). */
164 struct mlx5_nl_ifindex_data {
165 const char *name; /**< IB device name (in). */
166 uint32_t flags; /**< found attribute flags (out). */
167 uint32_t ibindex; /**< IB device index (out). */
168 uint32_t ifindex; /**< Network interface index (out). */
169 uint32_t portnum; /**< IB device max port number (out). */
172 rte_atomic32_t atomic_sn = RTE_ATOMIC32_INIT(0);
174 /* Generate Netlink sequence number. */
175 #define MLX5_NL_SN_GENERATE ((uint32_t)rte_atomic32_add_return(&atomic_sn, 1))
178 * Opens a Netlink socket.
181 * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA).
184 * A file descriptor on success, a negative errno value otherwise and
188 mlx5_nl_init(int protocol)
191 int sndbuf_size = MLX5_SEND_BUF_SIZE;
192 int rcvbuf_size = MLX5_RECV_BUF_SIZE;
193 struct sockaddr_nl local = {
194 .nl_family = AF_NETLINK,
198 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);
203 ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int));
208 ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int));
213 ret = bind(fd, (struct sockaddr *)&local, sizeof(local));
225 * Send a request message to the kernel on the Netlink socket.
228 * Netlink socket file descriptor.
230 * The Netlink message send to the kernel.
234 * Pointer to the request structure.
236 * Length of the request in bytes.
239 * The number of sent bytes on success, a negative errno value otherwise and
243 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req,
246 struct sockaddr_nl sa = {
247 .nl_family = AF_NETLINK,
249 struct iovec iov[2] = {
250 { .iov_base = nh, .iov_len = sizeof(*nh), },
251 { .iov_base = req, .iov_len = len, },
253 struct msghdr msg = {
255 .msg_namelen = sizeof(sa),
261 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
263 send_bytes = sendmsg(nlsk_fd, &msg, 0);
264 if (send_bytes < 0) {
272 * Send a message to the kernel on the Netlink socket.
275 * The Netlink socket file descriptor used for communication.
277 * The Netlink message send to the kernel.
282 * The number of sent bytes on success, a negative errno value otherwise and
286 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn)
288 struct sockaddr_nl sa = {
289 .nl_family = AF_NETLINK,
293 .iov_len = nh->nlmsg_len,
295 struct msghdr msg = {
297 .msg_namelen = sizeof(sa),
303 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
305 send_bytes = sendmsg(nlsk_fd, &msg, 0);
306 if (send_bytes < 0) {
314 * Receive a message from the kernel on the Netlink socket, following
318 * The Netlink socket file descriptor used for communication.
322 * The callback function to call for each Netlink message received.
323 * @param[in, out] arg
324 * Custom arguments for the callback.
327 * 0 on success, a negative errno value otherwise and rte_errno is set.
330 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg),
333 struct sockaddr_nl sa;
334 void *buf = mlx5_malloc(0, MLX5_RECV_BUF_SIZE, 0, SOCKET_ID_ANY);
337 .iov_len = MLX5_RECV_BUF_SIZE,
339 struct msghdr msg = {
341 .msg_namelen = sizeof(sa),
343 /* One message at a time */
358 recv_bytes = recvmsg(nlsk_fd, &msg, 0);
359 if (recv_bytes == -1) {
364 nh = (struct nlmsghdr *)buf;
365 } while (nh->nlmsg_seq != sn);
367 NLMSG_OK(nh, (unsigned int)recv_bytes);
368 nh = NLMSG_NEXT(nh, recv_bytes)) {
369 if (nh->nlmsg_type == NLMSG_ERROR) {
370 struct nlmsgerr *err_data = NLMSG_DATA(nh);
372 if (err_data->error < 0) {
373 rte_errno = -err_data->error;
381 /* Multi-part msgs and their trailing DONE message. */
382 if (nh->nlmsg_flags & NLM_F_MULTI) {
383 if (nh->nlmsg_type == NLMSG_DONE) {
402 * Parse Netlink message to retrieve the bridge MAC address.
405 * Pointer to Netlink Message Header.
407 * PMD data register with this callback.
410 * 0 on success, a negative errno value otherwise and rte_errno is set.
413 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg)
415 struct mlx5_nl_mac_addr *data = arg;
416 struct ndmsg *r = NLMSG_DATA(nh);
417 struct rtattr *attribute;
420 len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r));
421 for (attribute = MLX5_NDA_RTA(r);
422 RTA_OK(attribute, len);
423 attribute = RTA_NEXT(attribute, len)) {
424 if (attribute->rta_type == NDA_LLADDR) {
425 if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) {
427 "not enough room to finalize the"
432 #ifdef RTE_LIBRTE_MLX5_DEBUG
433 char m[RTE_ETHER_ADDR_FMT_SIZE];
435 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE,
436 RTA_DATA(attribute));
437 DRV_LOG(DEBUG, "bridge MAC address %s", m);
439 memcpy(&(*data->mac)[data->mac_n++],
440 RTA_DATA(attribute), RTE_ETHER_ADDR_LEN);
447 * Get bridge MAC addresses.
450 * Netlink socket file descriptor.
451 * @param[in] iface_idx
452 * Net device interface index.
454 * Pointer to the array table of MAC addresses to fill.
455 * Its size should be of MLX5_MAX_MAC_ADDRESSES.
457 * Number of entries filled in MAC array.
460 * 0 on success, a negative errno value otherwise and rte_errno is set.
463 mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx,
464 struct rte_ether_addr (*mac)[], int *mac_n)
468 struct ifinfomsg ifm;
471 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
472 .nlmsg_type = RTM_GETNEIGH,
473 .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
476 .ifi_family = PF_BRIDGE,
477 .ifi_index = iface_idx,
480 struct mlx5_nl_mac_addr data = {
484 uint32_t sn = MLX5_NL_SN_GENERATE;
489 ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm,
490 sizeof(struct ifinfomsg));
493 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data);
499 DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s",
500 iface_idx, strerror(rte_errno));
505 * Modify the MAC address neighbour table with Netlink.
508 * Netlink socket file descriptor.
509 * @param[in] iface_idx
510 * Net device interface index.
512 * MAC address to consider.
514 * 1 to add the MAC address, 0 to remove the MAC address.
517 * 0 on success, a negative errno value otherwise and rte_errno is set.
520 mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
521 struct rte_ether_addr *mac, int add)
527 uint8_t buffer[RTE_ETHER_ADDR_LEN];
530 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
531 .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
532 NLM_F_EXCL | NLM_F_ACK,
533 .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH,
536 .ndm_family = PF_BRIDGE,
537 .ndm_state = NUD_NOARP | NUD_PERMANENT,
538 .ndm_ifindex = iface_idx,
539 .ndm_flags = NTF_SELF,
542 .rta_type = NDA_LLADDR,
543 .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN),
546 uint32_t sn = MLX5_NL_SN_GENERATE;
551 memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN);
552 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
553 RTA_ALIGN(req.rta.rta_len);
554 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
557 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
562 #ifdef RTE_LIBRTE_MLX5_DEBUG
564 char m[RTE_ETHER_ADDR_FMT_SIZE];
566 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac);
568 "Interface %u cannot %s MAC address %s %s",
570 add ? "add" : "remove", m, strerror(rte_errno));
577 * Modify the VF MAC address neighbour table with Netlink.
580 * Netlink socket file descriptor.
581 * @param[in] iface_idx
582 * Net device interface index.
584 * MAC address to consider.
589 * 0 on success, a negative errno value otherwise and rte_errno is set.
592 mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
593 struct rte_ether_addr *mac, int vf_index)
598 struct ifinfomsg ifm;
599 struct rtattr vf_list_rta;
600 struct rtattr vf_info_rta;
601 struct rtattr vf_mac_rta;
602 struct ifla_vf_mac ivm;
605 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
606 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
607 .nlmsg_type = RTM_BASE,
610 .ifi_index = iface_idx,
613 .rta_type = IFLA_VFINFO_LIST,
614 .rta_len = RTA_ALIGN(RTA_LENGTH(0)),
617 .rta_type = IFLA_VF_INFO,
618 .rta_len = RTA_ALIGN(RTA_LENGTH(0)),
621 .rta_type = IFLA_VF_MAC,
624 struct ifla_vf_mac ivm = {
627 uint32_t sn = MLX5_NL_SN_GENERATE;
629 memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN);
630 memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm));
632 req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm));
633 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
634 RTA_ALIGN(req.vf_list_rta.rta_len) +
635 RTA_ALIGN(req.vf_info_rta.rta_len) +
636 RTA_ALIGN(req.vf_mac_rta.rta_len);
637 req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
639 req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
644 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
647 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
653 "representor %u cannot set VF MAC address "
654 "%02X:%02X:%02X:%02X:%02X:%02X : %s",
656 mac->addr_bytes[0], mac->addr_bytes[1],
657 mac->addr_bytes[2], mac->addr_bytes[3],
658 mac->addr_bytes[4], mac->addr_bytes[5],
659 strerror(rte_errno));
667 * Netlink socket file descriptor.
668 * @param[in] iface_idx
669 * Net device interface index.
671 * BITFIELD_DECLARE array to store the mac.
673 * MAC address to register.
678 * 0 on success, a negative errno value otherwise and rte_errno is set.
681 mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx,
682 uint64_t *mac_own, struct rte_ether_addr *mac,
687 ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1);
689 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
690 if (index >= MLX5_MAX_MAC_ADDRESSES)
693 BITFIELD_SET(mac_own, index);
701 * Remove a MAC address.
704 * Netlink socket file descriptor.
705 * @param[in] iface_idx
706 * Net device interface index.
708 * BITFIELD_DECLARE array to store the mac.
710 * MAC address to remove.
715 * 0 on success, a negative errno value otherwise and rte_errno is set.
718 mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own,
719 struct rte_ether_addr *mac, uint32_t index)
721 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
722 if (index >= MLX5_MAX_MAC_ADDRESSES)
725 BITFIELD_RESET(mac_own, index);
726 return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0);
730 * Synchronize Netlink bridge table to the internal table.
733 * Netlink socket file descriptor.
734 * @param[in] iface_idx
735 * Net device interface index.
737 * Mac addresses array to sync.
739 * @p mac_addrs array size.
742 mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx,
743 struct rte_ether_addr *mac_addrs, int n)
745 struct rte_ether_addr macs[n];
750 ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n);
753 for (i = 0; i != macs_n; ++i) {
756 /* Verify the address is not in the array yet. */
757 for (j = 0; j != n; ++j)
758 if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j]))
762 /* Find the first entry available. */
763 for (j = 0; j != n; ++j) {
764 if (rte_is_zero_ether_addr(&mac_addrs[j])) {
765 mac_addrs[j] = macs[i];
773 * Flush all added MAC addresses.
776 * Netlink socket file descriptor.
777 * @param[in] iface_idx
778 * Net device interface index.
779 * @param[in] mac_addrs
780 * Mac addresses array to flush.
782 * @p mac_addrs array size.
784 * BITFIELD_DECLARE array to store the mac.
787 mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx,
788 struct rte_ether_addr *mac_addrs, int n,
793 if (n <= 0 || n > MLX5_MAX_MAC_ADDRESSES)
796 for (i = n - 1; i >= 0; --i) {
797 struct rte_ether_addr *m = &mac_addrs[i];
799 if (BITFIELD_ISSET(mac_own, i))
800 mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m,
806 * Enable promiscuous / all multicast mode through Netlink.
809 * Netlink socket file descriptor.
810 * @param[in] iface_idx
811 * Net device interface index.
813 * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti.
815 * Nonzero to enable, disable otherwise.
818 * 0 on success, a negative errno value otherwise and rte_errno is set.
821 mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags,
826 struct ifinfomsg ifi;
829 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
830 .nlmsg_type = RTM_NEWLINK,
831 .nlmsg_flags = NLM_F_REQUEST,
834 .ifi_flags = enable ? flags : 0,
836 .ifi_index = iface_idx,
839 uint32_t sn = MLX5_NL_SN_GENERATE;
842 MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI)));
845 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
852 * Enable promiscuous mode through Netlink.
855 * Netlink socket file descriptor.
856 * @param[in] iface_idx
857 * Net device interface index.
859 * Nonzero to enable, disable otherwise.
862 * 0 on success, a negative errno value otherwise and rte_errno is set.
865 mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable)
867 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable);
871 "Interface %u cannot %s promisc mode: Netlink error %s",
872 iface_idx, enable ? "enable" : "disable",
873 strerror(rte_errno));
878 * Enable all multicast mode through Netlink.
881 * Netlink socket file descriptor.
882 * @param[in] iface_idx
883 * Net device interface index.
885 * Nonzero to enable, disable otherwise.
888 * 0 on success, a negative errno value otherwise and rte_errno is set.
891 mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable)
893 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI,
898 "Interface %u cannot %s allmulti : Netlink error %s",
899 iface_idx, enable ? "enable" : "disable",
900 strerror(rte_errno));
905 * Process network interface information from Netlink message.
908 * Pointer to Netlink message header.
910 * Opaque data pointer for this callback.
913 * 0 on success, a negative errno value otherwise and rte_errno is set.
916 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
918 struct mlx5_nl_ifindex_data *data = arg;
919 struct mlx5_nl_ifindex_data local = {
922 size_t off = NLMSG_HDRLEN;
924 if (nh->nlmsg_type !=
925 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) &&
927 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET))
929 while (off < nh->nlmsg_len) {
930 struct nlattr *na = (void *)((uintptr_t)nh + off);
931 void *payload = (void *)((uintptr_t)na + NLA_HDRLEN);
933 if (na->nla_len > nh->nlmsg_len - off)
935 switch (na->nla_type) {
936 case RDMA_NLDEV_ATTR_DEV_INDEX:
937 local.ibindex = *(uint32_t *)payload;
938 local.flags |= MLX5_NL_CMD_GET_IB_INDEX;
940 case RDMA_NLDEV_ATTR_DEV_NAME:
941 if (!strcmp(payload, data->name))
942 local.flags |= MLX5_NL_CMD_GET_IB_NAME;
944 case RDMA_NLDEV_ATTR_NDEV_INDEX:
945 local.ifindex = *(uint32_t *)payload;
946 local.flags |= MLX5_NL_CMD_GET_NET_INDEX;
948 case RDMA_NLDEV_ATTR_PORT_INDEX:
949 local.portnum = *(uint32_t *)payload;
950 local.flags |= MLX5_NL_CMD_GET_PORT_INDEX;
955 off += NLA_ALIGN(na->nla_len);
958 * It is possible to have multiple messages for all
959 * Infiniband devices in the system with appropriate name.
960 * So we should gather parameters locally and copy to
961 * query context only in case of coinciding device name.
963 if (local.flags & MLX5_NL_CMD_GET_IB_NAME) {
964 data->flags = local.flags;
965 data->ibindex = local.ibindex;
966 data->ifindex = local.ifindex;
967 data->portnum = local.portnum;
976 * Get index of network interface associated with some IB device.
978 * This is the only somewhat safe method to avoid resorting to heuristics
979 * when faced with port representors. Unfortunately it requires at least
983 * Netlink socket of the RDMA kind (NETLINK_RDMA).
987 * IB device port index, starting from 1
989 * A valid (nonzero) interface index on success, 0 otherwise and rte_errno
993 mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
995 struct mlx5_nl_ifindex_data data = {
998 .ibindex = 0, /* Determined during first pass. */
999 .ifindex = 0, /* Determined during second pass. */
1003 uint8_t buf[NLMSG_HDRLEN +
1004 NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) +
1005 NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))];
1008 .nlmsg_len = NLMSG_LENGTH(0),
1009 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1010 RDMA_NLDEV_CMD_GET),
1011 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
1015 uint32_t sn = MLX5_NL_SN_GENERATE;
1018 ret = mlx5_nl_send(nl, &req.nh, sn);
1021 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1024 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1025 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX))
1028 sn = MLX5_NL_SN_GENERATE;
1029 req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1030 RDMA_NLDEV_CMD_PORT_GET);
1031 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1032 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN);
1033 na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN);
1034 na->nla_len = NLA_HDRLEN + sizeof(data.ibindex);
1035 na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX;
1036 memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
1037 &data.ibindex, sizeof(data.ibindex));
1038 na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len));
1039 na->nla_len = NLA_HDRLEN + sizeof(pindex);
1040 na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX;
1041 memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
1042 &pindex, sizeof(pindex));
1043 ret = mlx5_nl_send(nl, &req.nh, sn);
1046 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1049 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1050 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
1051 !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) ||
1054 return data.ifindex;
1061 * Get the number of physical ports of given IB device.
1064 * Netlink socket of the RDMA kind (NETLINK_RDMA).
1069 * A valid (nonzero) number of ports on success, 0 otherwise
1070 * and rte_errno is set.
1073 mlx5_nl_portnum(int nl, const char *name)
1075 struct mlx5_nl_ifindex_data data = {
1081 struct nlmsghdr req = {
1082 .nlmsg_len = NLMSG_LENGTH(0),
1083 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1084 RDMA_NLDEV_CMD_GET),
1085 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
1087 uint32_t sn = MLX5_NL_SN_GENERATE;
1090 ret = mlx5_nl_send(nl, &req, sn);
1093 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1096 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1097 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
1098 !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) {
1104 return data.portnum;
1108 * Analyze gathered port parameters via Netlink to recognize master
1109 * and representor devices for E-Switch configuration.
1111 * @param[in] num_vf_set
1112 * flag of presence of number of VFs port attribute.
1113 * @param[inout] switch_info
1114 * Port information, including port name as a number and port name
1115 * type if recognized
1118 * master and representor flags are set in switch_info according to
1119 * recognized parameters (if any).
1122 mlx5_nl_check_switch_info(bool num_vf_set,
1123 struct mlx5_switch_info *switch_info)
1125 switch (switch_info->name_type) {
1126 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
1128 * Name is not recognized, assume the master,
1129 * check the number of VFs key presence.
1131 switch_info->master = num_vf_set;
1133 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
1135 * Name is not set, this assumes the legacy naming
1136 * schema for master, just check if there is a
1137 * number of VFs key.
1139 switch_info->master = num_vf_set;
1141 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
1142 /* New uplink naming schema recognized. */
1143 switch_info->master = 1;
1145 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
1146 /* Legacy representors naming schema. */
1147 switch_info->representor = !num_vf_set;
1149 case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
1151 case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
1152 /* New representors naming schema. */
1153 switch_info->representor = 1;
1159 * Process switch information from Netlink message.
1162 * Pointer to Netlink message header.
1164 * Opaque data pointer for this callback.
1167 * 0 on success, a negative errno value otherwise and rte_errno is set.
1170 mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg)
1172 struct mlx5_switch_info info = {
1175 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
1179 size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
1180 bool switch_id_set = false;
1181 bool num_vf_set = false;
1183 if (nh->nlmsg_type != RTM_NEWLINK)
1185 while (off < nh->nlmsg_len) {
1186 struct rtattr *ra = (void *)((uintptr_t)nh + off);
1187 void *payload = RTA_DATA(ra);
1190 if (ra->rta_len > nh->nlmsg_len - off)
1192 switch (ra->rta_type) {
1196 case IFLA_PHYS_PORT_NAME:
1197 mlx5_translate_port_name((char *)payload, &info);
1199 case IFLA_PHYS_SWITCH_ID:
1201 for (i = 0; i < RTA_PAYLOAD(ra); ++i) {
1202 info.switch_id <<= 8;
1203 info.switch_id |= ((uint8_t *)payload)[i];
1205 switch_id_set = true;
1208 off += RTA_ALIGN(ra->rta_len);
1210 if (switch_id_set) {
1211 /* We have some E-Switch configuration. */
1212 mlx5_nl_check_switch_info(num_vf_set, &info);
1214 MLX5_ASSERT(!(info.master && info.representor));
1215 memcpy(arg, &info, sizeof(info));
1223 * Get switch information associated with network interface.
1226 * Netlink socket of the ROUTE kind (NETLINK_ROUTE).
1228 * Network interface index.
1230 * Switch information object, populated in case of success.
1233 * 0 on success, a negative errno value otherwise and rte_errno is set.
1236 mlx5_nl_switch_info(int nl, unsigned int ifindex,
1237 struct mlx5_switch_info *info)
1241 struct ifinfomsg info;
1246 .nlmsg_len = NLMSG_LENGTH
1248 RTA_LENGTH(sizeof(uint32_t))),
1249 .nlmsg_type = RTM_GETLINK,
1250 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
1253 .ifi_family = AF_UNSPEC,
1254 .ifi_index = ifindex,
1257 .rta_type = IFLA_EXT_MASK,
1258 .rta_len = RTA_LENGTH(sizeof(int32_t)),
1260 .extmask = RTE_LE32(1),
1262 uint32_t sn = MLX5_NL_SN_GENERATE;
1265 ret = mlx5_nl_send(nl, &req.nh, sn);
1267 ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info);
1268 if (info->master && info->representor) {
1269 DRV_LOG(ERR, "ifindex %u device is recognized as master"
1270 " and as representor", ifindex);
1278 * Delete VLAN network device by ifindex.
1281 * Context object initialized by mlx5_nl_vlan_vmwa_init().
1282 * @param[in] ifindex
1283 * Interface index of network device to delete.
1286 mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa,
1289 uint32_t sn = MLX5_NL_SN_GENERATE;
1293 struct ifinfomsg info;
1296 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
1297 .nlmsg_type = RTM_DELLINK,
1298 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
1301 .ifi_family = AF_UNSPEC,
1302 .ifi_index = ifindex,
1307 ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn);
1309 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
1311 DRV_LOG(WARNING, "netlink: error deleting VLAN WA"
1312 " ifindex %u, %d", ifindex, ret);
1316 /* Set of subroutines to build Netlink message. */
1317 static struct nlattr *
1318 nl_msg_tail(struct nlmsghdr *nlh)
1320 return (struct nlattr *)
1321 (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len));
1325 nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen)
1327 struct nlattr *nla = nl_msg_tail(nlh);
1329 nla->nla_type = type;
1330 nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen;
1331 nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len);
1334 memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen);
1337 static struct nlattr *
1338 nl_attr_nest_start(struct nlmsghdr *nlh, int type)
1340 struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh);
1342 nl_attr_put(nlh, type, NULL, 0);
1347 nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest)
1349 nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest;
1353 * Create network VLAN device with specified VLAN tag.
1356 * Context object initialized by mlx5_nl_vlan_vmwa_init().
1357 * @param[in] ifindex
1358 * Base network interface index.
1360 * VLAN tag for VLAN network device to create.
1363 mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa,
1364 uint32_t ifindex, uint16_t tag)
1366 struct nlmsghdr *nlh;
1367 struct ifinfomsg *ifm;
1368 char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32];
1371 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1372 NLMSG_ALIGN(sizeof(struct ifinfomsg)) +
1373 NLMSG_ALIGN(sizeof(struct nlattr)) * 8 +
1374 NLMSG_ALIGN(sizeof(uint32_t)) +
1375 NLMSG_ALIGN(sizeof(name)) +
1376 NLMSG_ALIGN(sizeof("vlan")) +
1377 NLMSG_ALIGN(sizeof(uint32_t)) +
1378 NLMSG_ALIGN(sizeof(uint16_t)) + 16];
1379 struct nlattr *na_info;
1380 struct nlattr *na_vlan;
1381 uint32_t sn = MLX5_NL_SN_GENERATE;
1384 memset(buf, 0, sizeof(buf));
1385 nlh = (struct nlmsghdr *)buf;
1386 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1387 nlh->nlmsg_type = RTM_NEWLINK;
1388 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
1389 NLM_F_EXCL | NLM_F_ACK;
1390 ifm = (struct ifinfomsg *)nl_msg_tail(nlh);
1391 nlh->nlmsg_len += sizeof(struct ifinfomsg);
1392 ifm->ifi_family = AF_UNSPEC;
1395 ifm->ifi_flags = IFF_UP;
1396 ifm->ifi_change = 0xffffffff;
1397 nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex));
1398 ret = snprintf(name, sizeof(name), "%s.%u.%u",
1399 MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag);
1400 nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1);
1401 na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO);
1402 nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan"));
1403 na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA);
1404 nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag));
1405 nl_attr_nest_end(nlh, na_vlan);
1406 nl_attr_nest_end(nlh, na_info);
1407 MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len);
1408 ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn);
1410 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
1412 DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name,
1415 /* Try to get ifindex of created or pre-existing device. */
1416 ret = if_nametoindex(name);
1418 DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name,
1426 * Parse Netlink message to retrieve the general family ID.
1429 * Pointer to Netlink Message Header.
1431 * PMD data register with this callback.
1434 * 0 on success, a negative errno value otherwise and rte_errno is set.
1437 mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg)
1440 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
1441 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
1442 NLMSG_ALIGN(sizeof(struct genlmsghdr)));
1444 for (; nla->nla_len && nla < tail;
1445 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) {
1446 if (nla->nla_type == CTRL_ATTR_FAMILY_ID) {
1447 *(uint16_t *)arg = *(uint16_t *)(nla + 1);
1454 #define MLX5_NL_MAX_ATTR_SIZE 100
1456 * Get generic netlink family ID.
1458 * @param[in] nlsk_fd
1459 * Netlink socket file descriptor.
1464 * ID >= 0 on success and @p enable is updated, a negative errno value
1465 * otherwise and rte_errno is set.
1468 mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name)
1470 struct nlmsghdr *nlh;
1471 struct genlmsghdr *genl;
1472 uint32_t sn = MLX5_NL_SN_GENERATE;
1473 int name_size = strlen(name) + 1;
1476 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1477 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1478 NLMSG_ALIGN(sizeof(struct nlattr)) +
1479 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)];
1481 memset(buf, 0, sizeof(buf));
1482 nlh = (struct nlmsghdr *)buf;
1483 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1484 nlh->nlmsg_type = GENL_ID_CTRL;
1485 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1486 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1487 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1488 genl->cmd = CTRL_CMD_GETFAMILY;
1490 nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size);
1491 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1493 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id);
1495 DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name,
1499 DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id);
1504 * Get Devlink family ID.
1506 * @param[in] nlsk_fd
1507 * Netlink socket file descriptor.
1510 * ID >= 0 on success and @p enable is updated, a negative errno value
1511 * otherwise and rte_errno is set.
1515 mlx5_nl_devlink_family_id_get(int nlsk_fd)
1517 return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME);
1521 * Parse Netlink message to retrieve the ROCE enable status.
1524 * Pointer to Netlink Message Header.
1526 * PMD data register with this callback.
1529 * 0 on success, a negative errno value otherwise and rte_errno is set.
1532 mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg)
1537 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
1538 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
1539 NLMSG_ALIGN(sizeof(struct genlmsghdr)));
1541 while (nla->nla_len && nla < tail) {
1542 switch (nla->nla_type) {
1543 /* Expected nested attributes case. */
1544 case DEVLINK_ATTR_PARAM:
1545 case DEVLINK_ATTR_PARAM_VALUES_LIST:
1546 case DEVLINK_ATTR_PARAM_VALUE:
1550 case DEVLINK_ATTR_PARAM_VALUE_DATA:
1554 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len));
1562 * Get ROCE enable status through Netlink.
1564 * @param[in] nlsk_fd
1565 * Netlink socket file descriptor.
1566 * @param[in] family_id
1567 * the Devlink family ID.
1569 * The device PCI address.
1570 * @param[out] enable
1571 * Where to store the enable status.
1574 * 0 on success and @p enable is updated, a negative errno value otherwise
1575 * and rte_errno is set.
1578 mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr,
1581 struct nlmsghdr *nlh;
1582 struct genlmsghdr *genl;
1583 uint32_t sn = MLX5_NL_SN_GENERATE;
1586 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1587 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1588 NLMSG_ALIGN(sizeof(struct nlattr)) * 4 +
1589 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4];
1591 memset(buf, 0, sizeof(buf));
1592 nlh = (struct nlmsghdr *)buf;
1593 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1594 nlh->nlmsg_type = family_id;
1595 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1596 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1597 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1598 genl->cmd = DEVLINK_CMD_PARAM_GET;
1599 genl->version = DEVLINK_GENL_VERSION;
1600 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1601 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1602 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
1603 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1605 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en);
1607 DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.",
1612 DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".",
1613 cur_en ? "en" : "dis", pci_addr);
1618 * Reload mlx5 device kernel driver through Netlink.
1620 * @param[in] nlsk_fd
1621 * Netlink socket file descriptor.
1622 * @param[in] family_id
1623 * the Devlink family ID.
1625 * The device PCI address.
1626 * @param[out] enable
1627 * The enable status to set.
1630 * 0 on success, a negative errno value otherwise and rte_errno is set.
1633 mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr)
1635 struct nlmsghdr *nlh;
1636 struct genlmsghdr *genl;
1637 uint32_t sn = MLX5_NL_SN_GENERATE;
1639 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1640 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1641 NLMSG_ALIGN(sizeof(struct nlattr)) * 2 +
1642 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2];
1644 memset(buf, 0, sizeof(buf));
1645 nlh = (struct nlmsghdr *)buf;
1646 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1647 nlh->nlmsg_type = family_id;
1648 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1649 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1650 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1651 genl->cmd = DEVLINK_CMD_RELOAD;
1652 genl->version = DEVLINK_GENL_VERSION;
1653 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1654 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1655 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1657 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
1659 DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d",
1663 DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.",
1669 * Set ROCE enable status through Netlink.
1671 * @param[in] nlsk_fd
1672 * Netlink socket file descriptor.
1673 * @param[in] family_id
1674 * the Devlink family ID.
1676 * The device PCI address.
1677 * @param[out] enable
1678 * The enable status to set.
1681 * 0 on success, a negative errno value otherwise and rte_errno is set.
1684 mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr,
1687 struct nlmsghdr *nlh;
1688 struct genlmsghdr *genl;
1689 uint32_t sn = MLX5_NL_SN_GENERATE;
1691 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1692 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1693 NLMSG_ALIGN(sizeof(struct nlattr)) * 6 +
1694 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6];
1695 uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT;
1696 uint8_t ptype = NLA_FLAG;
1699 memset(buf, 0, sizeof(buf));
1700 nlh = (struct nlmsghdr *)buf;
1701 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1702 nlh->nlmsg_type = family_id;
1703 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1704 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1705 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1706 genl->cmd = DEVLINK_CMD_PARAM_SET;
1707 genl->version = DEVLINK_GENL_VERSION;
1708 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1709 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1710 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
1711 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode));
1712 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype));
1714 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0);
1715 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1717 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
1719 DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:"
1720 " %d.", enable ? "en" : "dis", pci_addr, ret);
1723 DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.",
1724 pci_addr, enable ? "en" : "dis");
1725 /* Now, need to reload the driver. */
1726 return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr);