1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2017 6WIND S.A.
3 * Copyright 2017 Mellanox Technologies, Ltd
8 #include <linux/netlink.h>
10 #include <sys/socket.h>
14 #include <rte_malloc.h>
15 #include <tap_netlink.h>
16 #include <rte_random.h>
20 /* Must be quite large to support dumping a huge list of QDISC or filters. */
21 #define BUF_SIZE (32 * 1024) /* Size of the buffer to receive kernel messages */
22 #define SNDBUF_SIZE 32768 /* Send buffer size for the netlink socket */
23 #define RCVBUF_SIZE 32768 /* Receive buffer size for the netlink socket */
27 struct nested_tail *prev;
31 * Initialize a netlink socket for communicating with the kernel.
34 * Set it to a netlink group value (e.g. RTMGRP_LINK) to receive messages for
35 * specific netlink multicast groups. Otherwise, no subscription will be made.
38 * netlink socket file descriptor on success, -1 otherwise.
41 tap_nl_init(uint32_t nl_groups)
43 int fd, sndbuf_size = SNDBUF_SIZE, rcvbuf_size = RCVBUF_SIZE;
44 struct sockaddr_nl local = {
45 .nl_family = AF_NETLINK,
46 .nl_groups = nl_groups,
48 #ifdef NETLINK_EXT_ACK
52 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
54 TAP_LOG(ERR, "Unable to create a netlink socket");
57 if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int))) {
58 TAP_LOG(ERR, "Unable to set socket buffer send size");
62 if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int))) {
63 TAP_LOG(ERR, "Unable to set socket buffer receive size");
68 #ifdef NETLINK_EXT_ACK
69 /* Ask for extended ACK response. on older kernel will ignore request. */
70 setsockopt(fd, SOL_NETLINK, NETLINK_EXT_ACK, &one, sizeof(one));
73 if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
74 TAP_LOG(ERR, "Unable to bind to the netlink socket");
82 * Clean up a netlink socket once all communicating with the kernel is finished.
85 * The netlink socket file descriptor used for communication.
88 * 0 on success, -1 otherwise.
91 tap_nl_final(int nlsk_fd)
94 TAP_LOG(ERR, "Failed to close netlink socket: %s (%d)",
95 strerror(errno), errno);
102 * Send a message to the kernel on the netlink socket.
105 * The netlink socket file descriptor used for communication.
107 * The netlink message send to the kernel.
110 * the number of sent bytes on success, -1 otherwise.
113 tap_nl_send(int nlsk_fd, struct nlmsghdr *nh)
117 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
118 nh->nlmsg_seq = (uint32_t)rte_rand();
121 send_bytes = send(nlsk_fd, nh, nh->nlmsg_len, 0);
122 if (send_bytes < 0) {
126 TAP_LOG(ERR, "Failed to send netlink message: %s (%d)",
127 strerror(errno), errno);
133 #ifdef NETLINK_EXT_ACK
134 static const struct nlattr *
135 tap_nl_attr_first(const struct nlmsghdr *nh, size_t offset)
137 return (const struct nlattr *)((const char *)nh + NLMSG_SPACE(offset));
140 static const struct nlattr *
141 tap_nl_attr_next(const struct nlattr *attr)
143 return (const struct nlattr *)((const char *)attr
144 + NLMSG_ALIGN(attr->nla_len));
148 tap_nl_attr_ok(const struct nlattr *attr, int len)
150 if (len < (int)sizeof(struct nlattr))
151 return false; /* missing header */
152 if (attr->nla_len < sizeof(struct nlattr))
153 return false; /* attribute length should include itself */
154 if ((int)attr->nla_len > len)
155 return false; /* attribute is truncated */
160 /* Decode extended errors from kernel */
162 tap_nl_dump_ext_ack(const struct nlmsghdr *nh, const struct nlmsgerr *err)
164 const struct nlattr *attr;
165 const char *tail = (const char *)nh + NLMSG_ALIGN(nh->nlmsg_len);
166 size_t hlen = sizeof(*err);
168 /* no TLVs, no extended response */
169 if (!(nh->nlmsg_flags & NLM_F_ACK_TLVS))
172 if (!(nh->nlmsg_flags & NLM_F_CAPPED))
173 hlen += err->msg.nlmsg_len - NLMSG_HDRLEN;
175 for (attr = tap_nl_attr_first(nh, hlen);
176 tap_nl_attr_ok(attr, tail - (const char *)attr);
177 attr = tap_nl_attr_next(attr)) {
178 uint16_t type = attr->nla_type & NLA_TYPE_MASK;
180 if (type == NLMSGERR_ATTR_MSG) {
181 const char *msg = (const char *)attr
182 + NLMSG_ALIGN(sizeof(*attr));
185 TAP_LOG(ERR, "%s", msg);
188 TAP_LOG(WARNING, "%s", msg);
195 * External ACK support was added in Linux kernel 4.17
196 * on older kernels, just ignore that part of message
198 #define tap_nl_dump_ext_ack(nh, err) do { } while (0)
202 * Check that the kernel sends an appropriate ACK in response
203 * to an tap_nl_send().
206 * The netlink socket file descriptor used for communication.
209 * 0 on success, -1 otherwise with errno set.
212 tap_nl_recv_ack(int nlsk_fd)
214 return tap_nl_recv(nlsk_fd, NULL, NULL);
218 * Receive a message from the kernel on the netlink socket, following an
222 * The netlink socket file descriptor used for communication.
224 * The callback function to call for each netlink message received.
225 * @param[in, out] arg
226 * Custom arguments for the callback.
229 * 0 on success, -1 otherwise with errno set.
232 tap_nl_recv(int nlsk_fd, int (*cb)(struct nlmsghdr *, void *arg), void *arg)
243 recv_bytes = recv(nlsk_fd, buf, sizeof(buf), 0);
244 if (recv_bytes < 0) {
250 for (nh = (struct nlmsghdr *)buf;
251 NLMSG_OK(nh, (unsigned int)recv_bytes);
252 nh = NLMSG_NEXT(nh, recv_bytes)) {
253 if (nh->nlmsg_type == NLMSG_ERROR) {
254 struct nlmsgerr *err_data = NLMSG_DATA(nh);
256 tap_nl_dump_ext_ack(nh, err_data);
257 if (err_data->error < 0) {
258 errno = -err_data->error;
264 /* Multi-part msgs and their trailing DONE message. */
265 if (nh->nlmsg_flags & NLM_F_MULTI) {
266 if (nh->nlmsg_type == NLMSG_DONE)
278 * Append a netlink attribute to a message.
281 * The netlink message to parse, received from the kernel.
283 * The type of attribute to append.
284 * @param[in] data_len
285 * The length of the data to append.
287 * The data to append.
290 tap_nlattr_add(struct nlmsghdr *nh, unsigned short type,
291 unsigned int data_len, const void *data)
293 /* see man 3 rtnetlink */
296 rta = (struct rtattr *)NLMSG_TAIL(nh);
297 rta->rta_len = RTA_LENGTH(data_len);
298 rta->rta_type = type;
299 memcpy(RTA_DATA(rta), data, data_len);
300 nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
304 * Append a uint8_t netlink attribute to a message.
307 * The netlink message to parse, received from the kernel.
309 * The type of attribute to append.
311 * The data to append.
314 tap_nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data)
316 tap_nlattr_add(nh, type, sizeof(uint8_t), &data);
320 * Append a uint16_t netlink attribute to a message.
323 * The netlink message to parse, received from the kernel.
325 * The type of attribute to append.
327 * The data to append.
330 tap_nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data)
332 tap_nlattr_add(nh, type, sizeof(uint16_t), &data);
336 * Append a uint16_t netlink attribute to a message.
339 * The netlink message to parse, received from the kernel.
341 * The type of attribute to append.
343 * The data to append.
346 tap_nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data)
348 tap_nlattr_add(nh, type, sizeof(uint32_t), &data);
352 * Start a nested netlink attribute.
353 * It must be followed later by a call to tap_nlattr_nested_finish().
355 * @param[in, out] msg
356 * The netlink message where to edit the nested_tails metadata.
358 * The nested attribute type to append.
361 * -1 if adding a nested netlink attribute failed, 0 otherwise.
364 tap_nlattr_nested_start(struct nlmsg *msg, uint16_t type)
366 struct nested_tail *tail;
368 tail = rte_zmalloc(NULL, sizeof(struct nested_tail), 0);
371 "Couldn't allocate memory for nested netlink attribute");
375 tail->tail = (struct rtattr *)NLMSG_TAIL(&msg->nh);
377 tap_nlattr_add(&msg->nh, type, 0, NULL);
379 tail->prev = msg->nested_tails;
381 msg->nested_tails = tail;
387 * End a nested netlink attribute.
388 * It follows a call to tap_nlattr_nested_start().
389 * In effect, it will modify the nested attribute length to include every bytes
390 * from the nested attribute start, up to here.
392 * @param[in, out] msg
393 * The netlink message where to edit the nested_tails metadata.
396 tap_nlattr_nested_finish(struct nlmsg *msg)
398 struct nested_tail *tail = msg->nested_tails;
400 tail->tail->rta_len = (char *)NLMSG_TAIL(&msg->nh) - (char *)tail->tail;
403 msg->nested_tails = tail->prev;