1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
7 #include <linux/if_link.h>
8 #include <linux/rtnetlink.h>
9 #include <linux/genetlink.h>
11 #include <rdma/rdma_netlink.h>
17 #include <sys/socket.h>
20 #include <rte_errno.h>
23 #include "mlx5_common_utils.h"
24 #include "mlx5_malloc.h"
26 #include <linux/devlink.h>
30 /* Size of the buffer to receive kernel messages */
31 #define MLX5_NL_BUF_SIZE (32 * 1024)
32 /* Send buffer size for the Netlink socket */
33 #define MLX5_SEND_BUF_SIZE 32768
34 /* Receive buffer size for the Netlink socket */
35 #define MLX5_RECV_BUF_SIZE 32768
37 /** Parameters of VLAN devices created by driver. */
38 #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx"
40 * Define NDA_RTA as defined in iproute2 sources.
42 * see in iproute2 sources file include/libnetlink.h
45 #define MLX5_NDA_RTA(r) \
46 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
49 * Define NLMSG_TAIL as defined in iproute2 sources.
51 * see in iproute2 sources file include/libnetlink.h
54 #define NLMSG_TAIL(nmsg) \
55 ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
58 * The following definitions are normally found in rdma/rdma_netlink.h,
59 * however they are so recent that most systems do not expose them yet.
61 #ifndef HAVE_RDMA_NL_NLDEV
62 #define RDMA_NL_NLDEV 5
64 #ifndef HAVE_RDMA_NLDEV_CMD_GET
65 #define RDMA_NLDEV_CMD_GET 1
67 #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET
68 #define RDMA_NLDEV_CMD_PORT_GET 5
70 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX
71 #define RDMA_NLDEV_ATTR_DEV_INDEX 1
73 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME
74 #define RDMA_NLDEV_ATTR_DEV_NAME 2
76 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX
77 #define RDMA_NLDEV_ATTR_PORT_INDEX 3
79 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX
80 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50
83 /* These are normally found in linux/if_link.h. */
84 #ifndef HAVE_IFLA_NUM_VF
85 #define IFLA_NUM_VF 21
87 #ifndef HAVE_IFLA_EXT_MASK
88 #define IFLA_EXT_MASK 29
90 #ifndef HAVE_IFLA_PHYS_SWITCH_ID
91 #define IFLA_PHYS_SWITCH_ID 36
93 #ifndef HAVE_IFLA_PHYS_PORT_NAME
94 #define IFLA_PHYS_PORT_NAME 38
98 * Some Devlink defines may be missed in old kernel versions,
99 * adjust used defines.
101 #ifndef DEVLINK_GENL_NAME
102 #define DEVLINK_GENL_NAME "devlink"
104 #ifndef DEVLINK_GENL_VERSION
105 #define DEVLINK_GENL_VERSION 1
107 #ifndef DEVLINK_ATTR_BUS_NAME
108 #define DEVLINK_ATTR_BUS_NAME 1
110 #ifndef DEVLINK_ATTR_DEV_NAME
111 #define DEVLINK_ATTR_DEV_NAME 2
113 #ifndef DEVLINK_ATTR_PARAM
114 #define DEVLINK_ATTR_PARAM 80
116 #ifndef DEVLINK_ATTR_PARAM_NAME
117 #define DEVLINK_ATTR_PARAM_NAME 81
119 #ifndef DEVLINK_ATTR_PARAM_TYPE
120 #define DEVLINK_ATTR_PARAM_TYPE 83
122 #ifndef DEVLINK_ATTR_PARAM_VALUES_LIST
123 #define DEVLINK_ATTR_PARAM_VALUES_LIST 84
125 #ifndef DEVLINK_ATTR_PARAM_VALUE
126 #define DEVLINK_ATTR_PARAM_VALUE 85
128 #ifndef DEVLINK_ATTR_PARAM_VALUE_DATA
129 #define DEVLINK_ATTR_PARAM_VALUE_DATA 86
131 #ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE
132 #define DEVLINK_ATTR_PARAM_VALUE_CMODE 87
134 #ifndef DEVLINK_PARAM_CMODE_DRIVERINIT
135 #define DEVLINK_PARAM_CMODE_DRIVERINIT 1
137 #ifndef DEVLINK_CMD_RELOAD
138 #define DEVLINK_CMD_RELOAD 37
140 #ifndef DEVLINK_CMD_PARAM_GET
141 #define DEVLINK_CMD_PARAM_GET 38
143 #ifndef DEVLINK_CMD_PARAM_SET
144 #define DEVLINK_CMD_PARAM_SET 39
150 /* Add/remove MAC address through Netlink */
151 struct mlx5_nl_mac_addr {
152 struct rte_ether_addr (*mac)[];
153 /**< MAC address handled by the device. */
154 int mac_n; /**< Number of addresses in the array. */
157 #define MLX5_NL_CMD_GET_IB_NAME (1 << 0)
158 #define MLX5_NL_CMD_GET_IB_INDEX (1 << 1)
159 #define MLX5_NL_CMD_GET_NET_INDEX (1 << 2)
160 #define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3)
162 /** Data structure used by mlx5_nl_cmdget_cb(). */
163 struct mlx5_nl_ifindex_data {
164 const char *name; /**< IB device name (in). */
165 uint32_t flags; /**< found attribute flags (out). */
166 uint32_t ibindex; /**< IB device index (out). */
167 uint32_t ifindex; /**< Network interface index (out). */
168 uint32_t portnum; /**< IB device max port number (out). */
173 /* Generate Netlink sequence number. */
174 #define MLX5_NL_SN_GENERATE __atomic_add_fetch(&atomic_sn, 1, __ATOMIC_RELAXED)
177 * Opens a Netlink socket.
180 * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA).
183 * A file descriptor on success, a negative errno value otherwise and
187 mlx5_nl_init(int protocol)
190 int sndbuf_size = MLX5_SEND_BUF_SIZE;
191 int rcvbuf_size = MLX5_RECV_BUF_SIZE;
192 struct sockaddr_nl local = {
193 .nl_family = AF_NETLINK,
197 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);
202 ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int));
207 ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int));
212 ret = bind(fd, (struct sockaddr *)&local, sizeof(local));
224 * Send a request message to the kernel on the Netlink socket.
227 * Netlink socket file descriptor.
229 * The Netlink message send to the kernel.
233 * Pointer to the request structure.
235 * Length of the request in bytes.
238 * The number of sent bytes on success, a negative errno value otherwise and
242 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req,
245 struct sockaddr_nl sa = {
246 .nl_family = AF_NETLINK,
248 struct iovec iov[2] = {
249 { .iov_base = nh, .iov_len = sizeof(*nh), },
250 { .iov_base = req, .iov_len = len, },
252 struct msghdr msg = {
254 .msg_namelen = sizeof(sa),
260 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
262 send_bytes = sendmsg(nlsk_fd, &msg, 0);
263 if (send_bytes < 0) {
271 * Send a message to the kernel on the Netlink socket.
274 * The Netlink socket file descriptor used for communication.
276 * The Netlink message send to the kernel.
281 * The number of sent bytes on success, a negative errno value otherwise and
285 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn)
287 struct sockaddr_nl sa = {
288 .nl_family = AF_NETLINK,
292 .iov_len = nh->nlmsg_len,
294 struct msghdr msg = {
296 .msg_namelen = sizeof(sa),
302 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
304 send_bytes = sendmsg(nlsk_fd, &msg, 0);
305 if (send_bytes < 0) {
313 * Receive a message from the kernel on the Netlink socket, following
317 * The Netlink socket file descriptor used for communication.
321 * The callback function to call for each Netlink message received.
322 * @param[in, out] arg
323 * Custom arguments for the callback.
326 * 0 on success, a negative errno value otherwise and rte_errno is set.
329 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg),
332 struct sockaddr_nl sa;
333 void *buf = mlx5_malloc(0, MLX5_RECV_BUF_SIZE, 0, SOCKET_ID_ANY);
336 .iov_len = MLX5_RECV_BUF_SIZE,
338 struct msghdr msg = {
340 .msg_namelen = sizeof(sa),
342 /* One message at a time */
357 recv_bytes = recvmsg(nlsk_fd, &msg, 0);
358 if (recv_bytes == -1) {
363 nh = (struct nlmsghdr *)buf;
364 } while (nh->nlmsg_seq != sn);
366 NLMSG_OK(nh, (unsigned int)recv_bytes);
367 nh = NLMSG_NEXT(nh, recv_bytes)) {
368 if (nh->nlmsg_type == NLMSG_ERROR) {
369 struct nlmsgerr *err_data = NLMSG_DATA(nh);
371 if (err_data->error < 0) {
372 rte_errno = -err_data->error;
380 /* Multi-part msgs and their trailing DONE message. */
381 if (nh->nlmsg_flags & NLM_F_MULTI) {
382 if (nh->nlmsg_type == NLMSG_DONE) {
401 * Parse Netlink message to retrieve the bridge MAC address.
404 * Pointer to Netlink Message Header.
406 * PMD data register with this callback.
409 * 0 on success, a negative errno value otherwise and rte_errno is set.
412 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg)
414 struct mlx5_nl_mac_addr *data = arg;
415 struct ndmsg *r = NLMSG_DATA(nh);
416 struct rtattr *attribute;
419 len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r));
420 for (attribute = MLX5_NDA_RTA(r);
421 RTA_OK(attribute, len);
422 attribute = RTA_NEXT(attribute, len)) {
423 if (attribute->rta_type == NDA_LLADDR) {
424 if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) {
426 "not enough room to finalize the"
431 #ifdef RTE_LIBRTE_MLX5_DEBUG
432 char m[RTE_ETHER_ADDR_FMT_SIZE];
434 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE,
435 RTA_DATA(attribute));
436 DRV_LOG(DEBUG, "bridge MAC address %s", m);
438 memcpy(&(*data->mac)[data->mac_n++],
439 RTA_DATA(attribute), RTE_ETHER_ADDR_LEN);
446 * Get bridge MAC addresses.
449 * Netlink socket file descriptor.
450 * @param[in] iface_idx
451 * Net device interface index.
453 * Pointer to the array table of MAC addresses to fill.
454 * Its size should be of MLX5_MAX_MAC_ADDRESSES.
456 * Number of entries filled in MAC array.
459 * 0 on success, a negative errno value otherwise and rte_errno is set.
462 mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx,
463 struct rte_ether_addr (*mac)[], int *mac_n)
467 struct ifinfomsg ifm;
470 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
471 .nlmsg_type = RTM_GETNEIGH,
472 .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
475 .ifi_family = PF_BRIDGE,
476 .ifi_index = iface_idx,
479 struct mlx5_nl_mac_addr data = {
483 uint32_t sn = MLX5_NL_SN_GENERATE;
488 ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm,
489 sizeof(struct ifinfomsg));
492 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data);
498 DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s",
499 iface_idx, strerror(rte_errno));
504 * Modify the MAC address neighbour table with Netlink.
507 * Netlink socket file descriptor.
508 * @param[in] iface_idx
509 * Net device interface index.
511 * MAC address to consider.
513 * 1 to add the MAC address, 0 to remove the MAC address.
516 * 0 on success, a negative errno value otherwise and rte_errno is set.
519 mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
520 struct rte_ether_addr *mac, int add)
526 uint8_t buffer[RTE_ETHER_ADDR_LEN];
529 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
530 .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
531 NLM_F_EXCL | NLM_F_ACK,
532 .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH,
535 .ndm_family = PF_BRIDGE,
536 .ndm_state = NUD_NOARP | NUD_PERMANENT,
537 .ndm_ifindex = iface_idx,
538 .ndm_flags = NTF_SELF,
541 .rta_type = NDA_LLADDR,
542 .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN),
545 uint32_t sn = MLX5_NL_SN_GENERATE;
550 memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN);
551 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
552 RTA_ALIGN(req.rta.rta_len);
553 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
556 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
561 #ifdef RTE_LIBRTE_MLX5_DEBUG
563 char m[RTE_ETHER_ADDR_FMT_SIZE];
565 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac);
567 "Interface %u cannot %s MAC address %s %s",
569 add ? "add" : "remove", m, strerror(rte_errno));
576 * Modify the VF MAC address neighbour table with Netlink.
579 * Netlink socket file descriptor.
580 * @param[in] iface_idx
581 * Net device interface index.
583 * MAC address to consider.
588 * 0 on success, a negative errno value otherwise and rte_errno is set.
591 mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
592 struct rte_ether_addr *mac, int vf_index)
597 struct ifinfomsg ifm;
598 struct rtattr vf_list_rta;
599 struct rtattr vf_info_rta;
600 struct rtattr vf_mac_rta;
601 struct ifla_vf_mac ivm;
604 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
605 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
606 .nlmsg_type = RTM_BASE,
609 .ifi_index = iface_idx,
612 .rta_type = IFLA_VFINFO_LIST,
613 .rta_len = RTA_ALIGN(RTA_LENGTH(0)),
616 .rta_type = IFLA_VF_INFO,
617 .rta_len = RTA_ALIGN(RTA_LENGTH(0)),
620 .rta_type = IFLA_VF_MAC,
623 struct ifla_vf_mac ivm = {
626 uint32_t sn = MLX5_NL_SN_GENERATE;
628 memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN);
629 memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm));
631 req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm));
632 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
633 RTA_ALIGN(req.vf_list_rta.rta_len) +
634 RTA_ALIGN(req.vf_info_rta.rta_len) +
635 RTA_ALIGN(req.vf_mac_rta.rta_len);
636 req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
638 req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
643 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
646 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
652 "representor %u cannot set VF MAC address "
653 "%02X:%02X:%02X:%02X:%02X:%02X : %s",
655 mac->addr_bytes[0], mac->addr_bytes[1],
656 mac->addr_bytes[2], mac->addr_bytes[3],
657 mac->addr_bytes[4], mac->addr_bytes[5],
658 strerror(rte_errno));
666 * Netlink socket file descriptor.
667 * @param[in] iface_idx
668 * Net device interface index.
670 * BITFIELD_DECLARE array to store the mac.
672 * MAC address to register.
677 * 0 on success, a negative errno value otherwise and rte_errno is set.
680 mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx,
681 uint64_t *mac_own, struct rte_ether_addr *mac,
686 ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1);
688 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
689 if (index >= MLX5_MAX_MAC_ADDRESSES)
692 BITFIELD_SET(mac_own, index);
700 * Remove a MAC address.
703 * Netlink socket file descriptor.
704 * @param[in] iface_idx
705 * Net device interface index.
707 * BITFIELD_DECLARE array to store the mac.
709 * MAC address to remove.
714 * 0 on success, a negative errno value otherwise and rte_errno is set.
717 mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own,
718 struct rte_ether_addr *mac, uint32_t index)
720 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
721 if (index >= MLX5_MAX_MAC_ADDRESSES)
724 BITFIELD_RESET(mac_own, index);
725 return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0);
729 * Synchronize Netlink bridge table to the internal table.
732 * Netlink socket file descriptor.
733 * @param[in] iface_idx
734 * Net device interface index.
736 * Mac addresses array to sync.
738 * @p mac_addrs array size.
741 mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx,
742 struct rte_ether_addr *mac_addrs, int n)
744 struct rte_ether_addr macs[n];
749 memset(macs, 0, n * sizeof(macs[0]));
750 ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n);
753 for (i = 0; i != macs_n; ++i) {
756 /* Verify the address is not in the array yet. */
757 for (j = 0; j != n; ++j)
758 if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j]))
762 if (rte_is_multicast_ether_addr(&macs[i])) {
763 /* Find the first entry available. */
764 for (j = MLX5_MAX_UC_MAC_ADDRESSES; j != n; ++j) {
765 if (rte_is_zero_ether_addr(&mac_addrs[j])) {
766 mac_addrs[j] = macs[i];
771 /* Find the first entry available. */
772 for (j = 0; j != MLX5_MAX_UC_MAC_ADDRESSES; ++j) {
773 if (rte_is_zero_ether_addr(&mac_addrs[j])) {
774 mac_addrs[j] = macs[i];
783 * Flush all added MAC addresses.
786 * Netlink socket file descriptor.
787 * @param[in] iface_idx
788 * Net device interface index.
789 * @param[in] mac_addrs
790 * Mac addresses array to flush.
792 * @p mac_addrs array size.
794 * BITFIELD_DECLARE array to store the mac.
797 mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx,
798 struct rte_ether_addr *mac_addrs, int n,
803 if (n <= 0 || n > MLX5_MAX_MAC_ADDRESSES)
806 for (i = n - 1; i >= 0; --i) {
807 struct rte_ether_addr *m = &mac_addrs[i];
809 if (BITFIELD_ISSET(mac_own, i))
810 mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m,
816 * Enable promiscuous / all multicast mode through Netlink.
819 * Netlink socket file descriptor.
820 * @param[in] iface_idx
821 * Net device interface index.
823 * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti.
825 * Nonzero to enable, disable otherwise.
828 * 0 on success, a negative errno value otherwise and rte_errno is set.
831 mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags,
836 struct ifinfomsg ifi;
839 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
840 .nlmsg_type = RTM_NEWLINK,
841 .nlmsg_flags = NLM_F_REQUEST,
844 .ifi_flags = enable ? flags : 0,
846 .ifi_index = iface_idx,
849 uint32_t sn = MLX5_NL_SN_GENERATE;
852 MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI)));
855 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
862 * Enable promiscuous mode through Netlink.
865 * Netlink socket file descriptor.
866 * @param[in] iface_idx
867 * Net device interface index.
869 * Nonzero to enable, disable otherwise.
872 * 0 on success, a negative errno value otherwise and rte_errno is set.
875 mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable)
877 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable);
881 "Interface %u cannot %s promisc mode: Netlink error %s",
882 iface_idx, enable ? "enable" : "disable",
883 strerror(rte_errno));
888 * Enable all multicast mode through Netlink.
891 * Netlink socket file descriptor.
892 * @param[in] iface_idx
893 * Net device interface index.
895 * Nonzero to enable, disable otherwise.
898 * 0 on success, a negative errno value otherwise and rte_errno is set.
901 mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable)
903 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI,
908 "Interface %u cannot %s allmulti : Netlink error %s",
909 iface_idx, enable ? "enable" : "disable",
910 strerror(rte_errno));
915 * Process network interface information from Netlink message.
918 * Pointer to Netlink message header.
920 * Opaque data pointer for this callback.
923 * 0 on success, a negative errno value otherwise and rte_errno is set.
926 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
928 struct mlx5_nl_ifindex_data *data = arg;
929 struct mlx5_nl_ifindex_data local = {
932 size_t off = NLMSG_HDRLEN;
934 if (nh->nlmsg_type !=
935 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) &&
937 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET))
939 while (off < nh->nlmsg_len) {
940 struct nlattr *na = (void *)((uintptr_t)nh + off);
941 void *payload = (void *)((uintptr_t)na + NLA_HDRLEN);
943 if (na->nla_len > nh->nlmsg_len - off)
945 switch (na->nla_type) {
946 case RDMA_NLDEV_ATTR_DEV_INDEX:
947 local.ibindex = *(uint32_t *)payload;
948 local.flags |= MLX5_NL_CMD_GET_IB_INDEX;
950 case RDMA_NLDEV_ATTR_DEV_NAME:
951 if (!strcmp(payload, data->name))
952 local.flags |= MLX5_NL_CMD_GET_IB_NAME;
954 case RDMA_NLDEV_ATTR_NDEV_INDEX:
955 local.ifindex = *(uint32_t *)payload;
956 local.flags |= MLX5_NL_CMD_GET_NET_INDEX;
958 case RDMA_NLDEV_ATTR_PORT_INDEX:
959 local.portnum = *(uint32_t *)payload;
960 local.flags |= MLX5_NL_CMD_GET_PORT_INDEX;
965 off += NLA_ALIGN(na->nla_len);
968 * It is possible to have multiple messages for all
969 * Infiniband devices in the system with appropriate name.
970 * So we should gather parameters locally and copy to
971 * query context only in case of coinciding device name.
973 if (local.flags & MLX5_NL_CMD_GET_IB_NAME) {
974 data->flags = local.flags;
975 data->ibindex = local.ibindex;
976 data->ifindex = local.ifindex;
977 data->portnum = local.portnum;
986 * Get index of network interface associated with some IB device.
988 * This is the only somewhat safe method to avoid resorting to heuristics
989 * when faced with port representors. Unfortunately it requires at least
993 * Netlink socket of the RDMA kind (NETLINK_RDMA).
997 * IB device port index, starting from 1
999 * A valid (nonzero) interface index on success, 0 otherwise and rte_errno
1003 mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
1005 struct mlx5_nl_ifindex_data data = {
1008 .ibindex = 0, /* Determined during first pass. */
1009 .ifindex = 0, /* Determined during second pass. */
1013 uint8_t buf[NLMSG_HDRLEN +
1014 NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) +
1015 NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))];
1018 .nlmsg_len = NLMSG_LENGTH(0),
1019 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1020 RDMA_NLDEV_CMD_GET),
1021 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
1025 uint32_t sn = MLX5_NL_SN_GENERATE;
1028 ret = mlx5_nl_send(nl, &req.nh, sn);
1031 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1034 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1035 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX))
1038 sn = MLX5_NL_SN_GENERATE;
1039 req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1040 RDMA_NLDEV_CMD_PORT_GET);
1041 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1042 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN);
1043 na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN);
1044 na->nla_len = NLA_HDRLEN + sizeof(data.ibindex);
1045 na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX;
1046 memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
1047 &data.ibindex, sizeof(data.ibindex));
1048 na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len));
1049 na->nla_len = NLA_HDRLEN + sizeof(pindex);
1050 na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX;
1051 memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
1052 &pindex, sizeof(pindex));
1053 ret = mlx5_nl_send(nl, &req.nh, sn);
1056 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1059 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1060 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
1061 !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) ||
1064 return data.ifindex;
1071 * Get the number of physical ports of given IB device.
1074 * Netlink socket of the RDMA kind (NETLINK_RDMA).
1079 * A valid (nonzero) number of ports on success, 0 otherwise
1080 * and rte_errno is set.
1083 mlx5_nl_portnum(int nl, const char *name)
1085 struct mlx5_nl_ifindex_data data = {
1091 struct nlmsghdr req = {
1092 .nlmsg_len = NLMSG_LENGTH(0),
1093 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1094 RDMA_NLDEV_CMD_GET),
1095 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
1097 uint32_t sn = MLX5_NL_SN_GENERATE;
1100 ret = mlx5_nl_send(nl, &req, sn);
1103 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1106 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1107 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
1108 !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) {
1114 return data.portnum;
1118 * Analyze gathered port parameters via Netlink to recognize master
1119 * and representor devices for E-Switch configuration.
1121 * @param[in] num_vf_set
1122 * flag of presence of number of VFs port attribute.
1123 * @param[inout] switch_info
1124 * Port information, including port name as a number and port name
1125 * type if recognized
1128 * master and representor flags are set in switch_info according to
1129 * recognized parameters (if any).
1132 mlx5_nl_check_switch_info(bool num_vf_set,
1133 struct mlx5_switch_info *switch_info)
1135 switch (switch_info->name_type) {
1136 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
1138 * Name is not recognized, assume the master,
1139 * check the number of VFs key presence.
1141 switch_info->master = num_vf_set;
1143 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
1145 * Name is not set, this assumes the legacy naming
1146 * schema for master, just check if there is a
1147 * number of VFs key.
1149 switch_info->master = num_vf_set;
1151 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
1152 /* New uplink naming schema recognized. */
1153 switch_info->master = 1;
1155 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
1156 /* Legacy representors naming schema. */
1157 switch_info->representor = !num_vf_set;
1159 case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
1161 case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
1163 case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
1164 /* New representors naming schema. */
1165 switch_info->representor = 1;
1171 * Process switch information from Netlink message.
1174 * Pointer to Netlink message header.
1176 * Opaque data pointer for this callback.
1179 * 0 on success, a negative errno value otherwise and rte_errno is set.
1182 mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg)
1184 struct mlx5_switch_info info = {
1187 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
1191 size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
1192 bool switch_id_set = false;
1193 bool num_vf_set = false;
1195 if (nh->nlmsg_type != RTM_NEWLINK)
1197 while (off < nh->nlmsg_len) {
1198 struct rtattr *ra = (void *)((uintptr_t)nh + off);
1199 void *payload = RTA_DATA(ra);
1202 if (ra->rta_len > nh->nlmsg_len - off)
1204 switch (ra->rta_type) {
1208 case IFLA_PHYS_PORT_NAME:
1209 mlx5_translate_port_name((char *)payload, &info);
1211 case IFLA_PHYS_SWITCH_ID:
1213 for (i = 0; i < RTA_PAYLOAD(ra); ++i) {
1214 info.switch_id <<= 8;
1215 info.switch_id |= ((uint8_t *)payload)[i];
1217 switch_id_set = true;
1220 off += RTA_ALIGN(ra->rta_len);
1222 if (switch_id_set) {
1223 /* We have some E-Switch configuration. */
1224 mlx5_nl_check_switch_info(num_vf_set, &info);
1226 MLX5_ASSERT(!(info.master && info.representor));
1227 memcpy(arg, &info, sizeof(info));
1235 * Get switch information associated with network interface.
1238 * Netlink socket of the ROUTE kind (NETLINK_ROUTE).
1240 * Network interface index.
1242 * Switch information object, populated in case of success.
1245 * 0 on success, a negative errno value otherwise and rte_errno is set.
1248 mlx5_nl_switch_info(int nl, unsigned int ifindex,
1249 struct mlx5_switch_info *info)
1253 struct ifinfomsg info;
1258 .nlmsg_len = NLMSG_LENGTH
1260 RTA_LENGTH(sizeof(uint32_t))),
1261 .nlmsg_type = RTM_GETLINK,
1262 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
1265 .ifi_family = AF_UNSPEC,
1266 .ifi_index = ifindex,
1269 .rta_type = IFLA_EXT_MASK,
1270 .rta_len = RTA_LENGTH(sizeof(int32_t)),
1272 .extmask = RTE_LE32(1),
1274 uint32_t sn = MLX5_NL_SN_GENERATE;
1277 ret = mlx5_nl_send(nl, &req.nh, sn);
1279 ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info);
1280 if (info->master && info->representor) {
1281 DRV_LOG(ERR, "ifindex %u device is recognized as master"
1282 " and as representor", ifindex);
1290 * Delete VLAN network device by ifindex.
1293 * Context object initialized by mlx5_nl_vlan_vmwa_init().
1294 * @param[in] ifindex
1295 * Interface index of network device to delete.
1298 mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa,
1301 uint32_t sn = MLX5_NL_SN_GENERATE;
1305 struct ifinfomsg info;
1308 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
1309 .nlmsg_type = RTM_DELLINK,
1310 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
1313 .ifi_family = AF_UNSPEC,
1314 .ifi_index = ifindex,
1319 ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn);
1321 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
1323 DRV_LOG(WARNING, "netlink: error deleting VLAN WA"
1324 " ifindex %u, %d", ifindex, ret);
1328 /* Set of subroutines to build Netlink message. */
1329 static struct nlattr *
1330 nl_msg_tail(struct nlmsghdr *nlh)
1332 return (struct nlattr *)
1333 (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len));
1337 nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen)
1339 struct nlattr *nla = nl_msg_tail(nlh);
1341 nla->nla_type = type;
1342 nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen;
1343 nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len);
1346 memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen);
1349 static struct nlattr *
1350 nl_attr_nest_start(struct nlmsghdr *nlh, int type)
1352 struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh);
1354 nl_attr_put(nlh, type, NULL, 0);
1359 nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest)
1361 nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest;
1365 * Create network VLAN device with specified VLAN tag.
1368 * Context object initialized by mlx5_nl_vlan_vmwa_init().
1369 * @param[in] ifindex
1370 * Base network interface index.
1372 * VLAN tag for VLAN network device to create.
1375 mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa,
1376 uint32_t ifindex, uint16_t tag)
1378 struct nlmsghdr *nlh;
1379 struct ifinfomsg *ifm;
1380 char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32];
1383 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1384 NLMSG_ALIGN(sizeof(struct ifinfomsg)) +
1385 NLMSG_ALIGN(sizeof(struct nlattr)) * 8 +
1386 NLMSG_ALIGN(sizeof(uint32_t)) +
1387 NLMSG_ALIGN(sizeof(name)) +
1388 NLMSG_ALIGN(sizeof("vlan")) +
1389 NLMSG_ALIGN(sizeof(uint32_t)) +
1390 NLMSG_ALIGN(sizeof(uint16_t)) + 16];
1391 struct nlattr *na_info;
1392 struct nlattr *na_vlan;
1393 uint32_t sn = MLX5_NL_SN_GENERATE;
1396 memset(buf, 0, sizeof(buf));
1397 nlh = (struct nlmsghdr *)buf;
1398 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1399 nlh->nlmsg_type = RTM_NEWLINK;
1400 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
1401 NLM_F_EXCL | NLM_F_ACK;
1402 ifm = (struct ifinfomsg *)nl_msg_tail(nlh);
1403 nlh->nlmsg_len += sizeof(struct ifinfomsg);
1404 ifm->ifi_family = AF_UNSPEC;
1407 ifm->ifi_flags = IFF_UP;
1408 ifm->ifi_change = 0xffffffff;
1409 nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex));
1410 ret = snprintf(name, sizeof(name), "%s.%u.%u",
1411 MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag);
1412 nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1);
1413 na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO);
1414 nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan"));
1415 na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA);
1416 nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag));
1417 nl_attr_nest_end(nlh, na_vlan);
1418 nl_attr_nest_end(nlh, na_info);
1419 MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len);
1420 ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn);
1422 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
1424 DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name,
1427 /* Try to get ifindex of created or pre-existing device. */
1428 ret = if_nametoindex(name);
1430 DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name,
1438 * Parse Netlink message to retrieve the general family ID.
1441 * Pointer to Netlink Message Header.
1443 * PMD data register with this callback.
1446 * 0 on success, a negative errno value otherwise and rte_errno is set.
1449 mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg)
1452 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
1453 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
1454 NLMSG_ALIGN(sizeof(struct genlmsghdr)));
1456 for (; nla->nla_len && nla < tail;
1457 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) {
1458 if (nla->nla_type == CTRL_ATTR_FAMILY_ID) {
1459 *(uint16_t *)arg = *(uint16_t *)(nla + 1);
1466 #define MLX5_NL_MAX_ATTR_SIZE 100
1468 * Get generic netlink family ID.
1470 * @param[in] nlsk_fd
1471 * Netlink socket file descriptor.
1476 * ID >= 0 on success and @p enable is updated, a negative errno value
1477 * otherwise and rte_errno is set.
1480 mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name)
1482 struct nlmsghdr *nlh;
1483 struct genlmsghdr *genl;
1484 uint32_t sn = MLX5_NL_SN_GENERATE;
1485 int name_size = strlen(name) + 1;
1488 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1489 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1490 NLMSG_ALIGN(sizeof(struct nlattr)) +
1491 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)];
1493 memset(buf, 0, sizeof(buf));
1494 nlh = (struct nlmsghdr *)buf;
1495 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1496 nlh->nlmsg_type = GENL_ID_CTRL;
1497 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1498 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1499 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1500 genl->cmd = CTRL_CMD_GETFAMILY;
1502 nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size);
1503 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1505 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id);
1507 DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name,
1511 DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id);
1516 * Get Devlink family ID.
1518 * @param[in] nlsk_fd
1519 * Netlink socket file descriptor.
1522 * ID >= 0 on success and @p enable is updated, a negative errno value
1523 * otherwise and rte_errno is set.
1527 mlx5_nl_devlink_family_id_get(int nlsk_fd)
1529 return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME);
1533 * Parse Netlink message to retrieve the ROCE enable status.
1536 * Pointer to Netlink Message Header.
1538 * PMD data register with this callback.
1541 * 0 on success, a negative errno value otherwise and rte_errno is set.
1544 mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg)
1549 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
1550 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
1551 NLMSG_ALIGN(sizeof(struct genlmsghdr)));
1553 while (nla->nla_len && nla < tail) {
1554 switch (nla->nla_type) {
1555 /* Expected nested attributes case. */
1556 case DEVLINK_ATTR_PARAM:
1557 case DEVLINK_ATTR_PARAM_VALUES_LIST:
1558 case DEVLINK_ATTR_PARAM_VALUE:
1562 case DEVLINK_ATTR_PARAM_VALUE_DATA:
1566 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len));
1574 * Get ROCE enable status through Netlink.
1576 * @param[in] nlsk_fd
1577 * Netlink socket file descriptor.
1578 * @param[in] family_id
1579 * the Devlink family ID.
1581 * The device PCI address.
1582 * @param[out] enable
1583 * Where to store the enable status.
1586 * 0 on success and @p enable is updated, a negative errno value otherwise
1587 * and rte_errno is set.
1590 mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr,
1593 struct nlmsghdr *nlh;
1594 struct genlmsghdr *genl;
1595 uint32_t sn = MLX5_NL_SN_GENERATE;
1598 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1599 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1600 NLMSG_ALIGN(sizeof(struct nlattr)) * 4 +
1601 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4];
1603 memset(buf, 0, sizeof(buf));
1604 nlh = (struct nlmsghdr *)buf;
1605 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1606 nlh->nlmsg_type = family_id;
1607 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1608 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1609 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1610 genl->cmd = DEVLINK_CMD_PARAM_GET;
1611 genl->version = DEVLINK_GENL_VERSION;
1612 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1613 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1614 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
1615 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1617 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en);
1619 DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.",
1624 DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".",
1625 cur_en ? "en" : "dis", pci_addr);
1630 * Reload mlx5 device kernel driver through Netlink.
1632 * @param[in] nlsk_fd
1633 * Netlink socket file descriptor.
1634 * @param[in] family_id
1635 * the Devlink family ID.
1637 * The device PCI address.
1638 * @param[out] enable
1639 * The enable status to set.
1642 * 0 on success, a negative errno value otherwise and rte_errno is set.
1645 mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr)
1647 struct nlmsghdr *nlh;
1648 struct genlmsghdr *genl;
1649 uint32_t sn = MLX5_NL_SN_GENERATE;
1651 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1652 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1653 NLMSG_ALIGN(sizeof(struct nlattr)) * 2 +
1654 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2];
1656 memset(buf, 0, sizeof(buf));
1657 nlh = (struct nlmsghdr *)buf;
1658 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1659 nlh->nlmsg_type = family_id;
1660 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1661 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1662 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1663 genl->cmd = DEVLINK_CMD_RELOAD;
1664 genl->version = DEVLINK_GENL_VERSION;
1665 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1666 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1667 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1669 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
1671 DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d",
1675 DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.",
1681 * Set ROCE enable status through Netlink.
1683 * @param[in] nlsk_fd
1684 * Netlink socket file descriptor.
1685 * @param[in] family_id
1686 * the Devlink family ID.
1688 * The device PCI address.
1689 * @param[out] enable
1690 * The enable status to set.
1693 * 0 on success, a negative errno value otherwise and rte_errno is set.
1696 mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr,
1699 struct nlmsghdr *nlh;
1700 struct genlmsghdr *genl;
1701 uint32_t sn = MLX5_NL_SN_GENERATE;
1703 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1704 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1705 NLMSG_ALIGN(sizeof(struct nlattr)) * 6 +
1706 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6];
1707 uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT;
1708 uint8_t ptype = NLA_FLAG;
1711 memset(buf, 0, sizeof(buf));
1712 nlh = (struct nlmsghdr *)buf;
1713 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1714 nlh->nlmsg_type = family_id;
1715 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1716 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1717 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1718 genl->cmd = DEVLINK_CMD_PARAM_SET;
1719 genl->version = DEVLINK_GENL_VERSION;
1720 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1721 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1722 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
1723 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode));
1724 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype));
1726 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0);
1727 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1729 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
1731 DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:"
1732 " %d.", enable ? "en" : "dis", pci_addr, ret);
1735 DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.",
1736 pci_addr, enable ? "en" : "dis");
1737 /* Now, need to reload the driver. */
1738 return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr);