1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
6 #include <linux/netlink.h>
7 #include <linux/rtnetlink.h>
11 #include "mlx5_utils.h"
13 /* Size of the buffer to receive kernel messages */
14 #define MLX5_NL_BUF_SIZE (32 * 1024)
15 /* Send buffer size for the Netlink socket */
16 #define MLX5_SEND_BUF_SIZE 32768
17 /* Receive buffer size for the Netlink socket */
18 #define MLX5_RECV_BUF_SIZE 32768
21 * Define NDA_RTA as defined in iproute2 sources.
23 * see in iproute2 sources file include/libnetlink.h
26 #define MLX5_NDA_RTA(r) \
27 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
30 /* Add/remove MAC address through Netlink */
31 struct mlx5_nl_mac_addr {
32 struct ether_addr (*mac)[];
33 /**< MAC address handled by the device. */
34 int mac_n; /**< Number of addresses in the array. */
38 * Opens a Netlink socket.
41 * Netlink group value (e.g. RTMGRP_LINK).
44 * A file descriptor on success, a negative errno value otherwise and
48 mlx5_nl_init(uint32_t nl_groups)
51 int sndbuf_size = MLX5_SEND_BUF_SIZE;
52 int rcvbuf_size = MLX5_RECV_BUF_SIZE;
53 struct sockaddr_nl local = {
54 .nl_family = AF_NETLINK,
55 .nl_groups = nl_groups,
59 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
64 ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int));
69 ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int));
74 ret = bind(fd, (struct sockaddr *)&local, sizeof(local));
86 * Send a request message to the kernel on the Netlink socket.
89 * Netlink socket file descriptor.
91 * The Netlink message send to the kernel.
95 * Pointer to the request structure.
97 * Length of the request in bytes.
100 * The number of sent bytes on success, a negative errno value otherwise and
104 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req,
107 struct sockaddr_nl sa = {
108 .nl_family = AF_NETLINK,
110 struct iovec iov[2] = {
111 { .iov_base = nh, .iov_len = sizeof(*nh), },
112 { .iov_base = req, .iov_len = len, },
114 struct msghdr msg = {
116 .msg_namelen = sizeof(sa),
122 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
124 send_bytes = sendmsg(nlsk_fd, &msg, 0);
125 if (send_bytes < 0) {
133 * Send a message to the kernel on the Netlink socket.
136 * The Netlink socket file descriptor used for communication.
138 * The Netlink message send to the kernel.
143 * The number of sent bytes on success, a negative errno value otherwise and
147 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn)
149 struct sockaddr_nl sa = {
150 .nl_family = AF_NETLINK,
154 .iov_len = nh->nlmsg_len,
156 struct msghdr msg = {
158 .msg_namelen = sizeof(sa),
164 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
166 send_bytes = sendmsg(nlsk_fd, &msg, 0);
167 if (send_bytes < 0) {
175 * Receive a message from the kernel on the Netlink socket, following
179 * The Netlink socket file descriptor used for communication.
183 * The callback function to call for each Netlink message received.
184 * @param[in, out] arg
185 * Custom arguments for the callback.
188 * 0 on success, a negative errno value otherwise and rte_errno is set.
191 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg),
194 struct sockaddr_nl sa;
195 char buf[MLX5_RECV_BUF_SIZE];
198 .iov_len = sizeof(buf),
200 struct msghdr msg = {
202 .msg_namelen = sizeof(sa),
204 /* One message at a time */
215 recv_bytes = recvmsg(nlsk_fd, &msg, 0);
216 if (recv_bytes == -1) {
220 nh = (struct nlmsghdr *)buf;
221 } while (nh->nlmsg_seq != sn);
223 NLMSG_OK(nh, (unsigned int)recv_bytes);
224 nh = NLMSG_NEXT(nh, recv_bytes)) {
225 if (nh->nlmsg_type == NLMSG_ERROR) {
226 struct nlmsgerr *err_data = NLMSG_DATA(nh);
228 if (err_data->error < 0) {
229 rte_errno = -err_data->error;
235 /* Multi-part msgs and their trailing DONE message. */
236 if (nh->nlmsg_flags & NLM_F_MULTI) {
237 if (nh->nlmsg_type == NLMSG_DONE)
252 * Parse Netlink message to retrieve the bridge MAC address.
255 * Pointer to Netlink Message Header.
257 * PMD data register with this callback.
260 * 0 on success, a negative errno value otherwise and rte_errno is set.
263 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg)
265 struct mlx5_nl_mac_addr *data = arg;
266 struct ndmsg *r = NLMSG_DATA(nh);
267 struct rtattr *attribute;
270 len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r));
271 for (attribute = MLX5_NDA_RTA(r);
272 RTA_OK(attribute, len);
273 attribute = RTA_NEXT(attribute, len)) {
274 if (attribute->rta_type == NDA_LLADDR) {
275 if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) {
277 "not enough room to finalize the"
285 ether_format_addr(m, 18, RTA_DATA(attribute));
286 DRV_LOG(DEBUG, "bridge MAC address %s", m);
288 memcpy(&(*data->mac)[data->mac_n++],
289 RTA_DATA(attribute), ETHER_ADDR_LEN);
296 * Get bridge MAC addresses.
299 * Pointer to Ethernet device.
301 * Pointer to the array table of MAC addresses to fill.
302 * Its size should be of MLX5_MAX_MAC_ADDRESSES.
304 * Number of entries filled in MAC array.
307 * 0 on success, a negative errno value otherwise and rte_errno is set.
310 mlx5_nl_mac_addr_list(struct rte_eth_dev *dev, struct ether_addr (*mac)[],
313 struct priv *priv = dev->data->dev_private;
314 int iface_idx = mlx5_ifindex(dev);
317 struct ifinfomsg ifm;
320 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
321 .nlmsg_type = RTM_GETNEIGH,
322 .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
325 .ifi_family = PF_BRIDGE,
326 .ifi_index = iface_idx,
329 struct mlx5_nl_mac_addr data = {
335 uint32_t sn = priv->nl_sn++;
337 if (priv->nl_socket == -1)
339 fd = priv->nl_socket;
340 ret = mlx5_nl_request(fd, &req.hdr, sn, &req.ifm,
341 sizeof(struct ifinfomsg));
344 ret = mlx5_nl_recv(fd, sn, mlx5_nl_mac_addr_cb, &data);
350 DRV_LOG(DEBUG, "port %u cannot retrieve MAC address list %s",
351 dev->data->port_id, strerror(rte_errno));
356 * Modify the MAC address neighbour table with Netlink.
359 * Pointer to Ethernet device.
361 * MAC address to consider.
363 * 1 to add the MAC address, 0 to remove the MAC address.
366 * 0 on success, a negative errno value otherwise and rte_errno is set.
369 mlx5_nl_mac_addr_modify(struct rte_eth_dev *dev, struct ether_addr *mac,
372 struct priv *priv = dev->data->dev_private;
373 int iface_idx = mlx5_ifindex(dev);
378 uint8_t buffer[ETHER_ADDR_LEN];
381 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
382 .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
383 NLM_F_EXCL | NLM_F_ACK,
384 .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH,
387 .ndm_family = PF_BRIDGE,
388 .ndm_state = NUD_NOARP | NUD_PERMANENT,
389 .ndm_ifindex = iface_idx,
390 .ndm_flags = NTF_SELF,
393 .rta_type = NDA_LLADDR,
394 .rta_len = RTA_LENGTH(ETHER_ADDR_LEN),
399 uint32_t sn = priv->nl_sn++;
401 if (priv->nl_socket == -1)
403 fd = priv->nl_socket;
404 memcpy(RTA_DATA(&req.rta), mac, ETHER_ADDR_LEN);
405 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
406 RTA_ALIGN(req.rta.rta_len);
407 ret = mlx5_nl_send(fd, &req.hdr, sn);
410 ret = mlx5_nl_recv(fd, sn, NULL, NULL);
416 "port %u cannot %s MAC address %02X:%02X:%02X:%02X:%02X:%02X"
419 add ? "add" : "remove",
420 mac->addr_bytes[0], mac->addr_bytes[1],
421 mac->addr_bytes[2], mac->addr_bytes[3],
422 mac->addr_bytes[4], mac->addr_bytes[5],
423 strerror(rte_errno));
431 * Pointer to Ethernet device.
433 * MAC address to register.
438 * 0 on success, a negative errno value otherwise and rte_errno is set.
441 mlx5_nl_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac,
444 struct priv *priv = dev->data->dev_private;
447 ret = mlx5_nl_mac_addr_modify(dev, mac, 1);
449 BITFIELD_SET(priv->mac_own, index);
456 * Remove a MAC address.
459 * Pointer to Ethernet device.
461 * MAC address to remove.
466 * 0 on success, a negative errno value otherwise and rte_errno is set.
469 mlx5_nl_mac_addr_remove(struct rte_eth_dev *dev, struct ether_addr *mac,
472 struct priv *priv = dev->data->dev_private;
474 BITFIELD_RESET(priv->mac_own, index);
475 return mlx5_nl_mac_addr_modify(dev, mac, 0);
479 * Synchronize Netlink bridge table to the internal table.
482 * Pointer to Ethernet device.
485 mlx5_nl_mac_addr_sync(struct rte_eth_dev *dev)
487 struct ether_addr macs[MLX5_MAX_MAC_ADDRESSES];
492 ret = mlx5_nl_mac_addr_list(dev, &macs, &macs_n);
495 for (i = 0; i != macs_n; ++i) {
498 /* Verify the address is not in the array yet. */
499 for (j = 0; j != MLX5_MAX_MAC_ADDRESSES; ++j)
500 if (is_same_ether_addr(&macs[i],
501 &dev->data->mac_addrs[j]))
503 if (j != MLX5_MAX_MAC_ADDRESSES)
505 /* Find the first entry available. */
506 for (j = 0; j != MLX5_MAX_MAC_ADDRESSES; ++j) {
507 if (is_zero_ether_addr(&dev->data->mac_addrs[j])) {
508 dev->data->mac_addrs[j] = macs[i];
516 * Flush all added MAC addresses.
519 * Pointer to Ethernet device.
522 mlx5_nl_mac_addr_flush(struct rte_eth_dev *dev)
524 struct priv *priv = dev->data->dev_private;
527 for (i = MLX5_MAX_MAC_ADDRESSES - 1; i >= 0; --i) {
528 struct ether_addr *m = &dev->data->mac_addrs[i];
530 if (BITFIELD_ISSET(priv->mac_own, i))
531 mlx5_nl_mac_addr_remove(dev, m, i);
536 * Enable promiscuous / all multicast mode through Netlink.
539 * Pointer to Ethernet device structure.
541 * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti.
543 * Nonzero to enable, disable otherwise.
546 * 0 on success, a negative errno value otherwise and rte_errno is set.
549 mlx5_nl_device_flags(struct rte_eth_dev *dev, uint32_t flags, int enable)
551 struct priv *priv = dev->data->dev_private;
552 int iface_idx = mlx5_ifindex(dev);
555 struct ifinfomsg ifi;
558 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
559 .nlmsg_type = RTM_NEWLINK,
560 .nlmsg_flags = NLM_F_REQUEST,
563 .ifi_flags = enable ? flags : 0,
565 .ifi_index = iface_idx,
571 assert(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI)));
572 if (priv->nl_socket < 0)
574 fd = priv->nl_socket;
575 ret = mlx5_nl_send(fd, &req.hdr, priv->nl_sn++);
582 * Enable promiscuous mode through Netlink.
585 * Pointer to Ethernet device structure.
587 * Nonzero to enable, disable otherwise.
590 * 0 on success, a negative errno value otherwise and rte_errno is set.
593 mlx5_nl_promisc(struct rte_eth_dev *dev, int enable)
595 int ret = mlx5_nl_device_flags(dev, IFF_PROMISC, enable);
599 "port %u cannot %s promisc mode: Netlink error %s",
600 dev->data->port_id, enable ? "enable" : "disable",
601 strerror(rte_errno));
606 * Enable all multicast mode through Netlink.
609 * Pointer to Ethernet device structure.
611 * Nonzero to enable, disable otherwise.
614 * 0 on success, a negative errno value otherwise and rte_errno is set.
617 mlx5_nl_allmulti(struct rte_eth_dev *dev, int enable)
619 int ret = mlx5_nl_device_flags(dev, IFF_ALLMULTI, enable);
623 "port %u cannot %s allmulti mode: Netlink error %s",
624 dev->data->port_id, enable ? "enable" : "disable",
625 strerror(rte_errno));