-/*-
- * BSD LICENSE
- *
- * Copyright 2017 6WIND S.A.
- * Copyright 2017 Mellanox.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of 6WIND S.A. nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox Technologies, Ltd
*/
#include <errno.h>
#include <string.h>
#include <sys/socket.h>
#include <unistd.h>
+#include <stdbool.h>
#include <rte_malloc.h>
#include <tap_netlink.h>
#include <rte_random.h>
+#include "tap_log.h"
+
+/* Compatibility with glibc < 2.24 */
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
/* Must be quite large to support dumping a huge list of QDISC or filters. */
#define BUF_SIZE (32 * 1024) /* Size of the buffer to receive kernel messages */
#define SNDBUF_SIZE 32768 /* Send buffer size for the netlink socket */
* netlink socket file descriptor on success, -1 otherwise.
*/
int
-nl_init(uint32_t nl_groups)
+tap_nl_init(uint32_t nl_groups)
{
int fd, sndbuf_size = SNDBUF_SIZE, rcvbuf_size = RCVBUF_SIZE;
struct sockaddr_nl local = {
.nl_family = AF_NETLINK,
.nl_groups = nl_groups,
};
+#ifdef NETLINK_EXT_ACK
+ int one = 1;
+#endif
fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
if (fd < 0) {
- RTE_LOG(ERR, PMD, "Unable to create a netlink socket\n");
+ TAP_LOG(ERR, "Unable to create a netlink socket");
return -1;
}
if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int))) {
- RTE_LOG(ERR, PMD, "Unable to set socket buffer send size\n");
+ TAP_LOG(ERR, "Unable to set socket buffer send size");
+ close(fd);
return -1;
}
if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int))) {
- RTE_LOG(ERR, PMD, "Unable to set socket buffer receive size\n");
+ TAP_LOG(ERR, "Unable to set socket buffer receive size");
+ close(fd);
return -1;
}
+
+#ifdef NETLINK_EXT_ACK
+ /* Ask for extended ACK response. on older kernel will ignore request. */
+ setsockopt(fd, SOL_NETLINK, NETLINK_EXT_ACK, &one, sizeof(one));
+#endif
+
if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
- RTE_LOG(ERR, PMD, "Unable to bind to the netlink socket\n");
+ TAP_LOG(ERR, "Unable to bind to the netlink socket");
+ close(fd);
return -1;
}
return fd;
* 0 on success, -1 otherwise.
*/
int
-nl_final(int nlsk_fd)
+tap_nl_final(int nlsk_fd)
{
if (close(nlsk_fd)) {
- RTE_LOG(ERR, PMD, "Failed to close netlink socket: %s (%d)\n",
+ TAP_LOG(ERR, "Failed to close netlink socket: %s (%d)",
strerror(errno), errno);
return -1;
}
* the number of sent bytes on success, -1 otherwise.
*/
int
-nl_send(int nlsk_fd, struct nlmsghdr *nh)
+tap_nl_send(int nlsk_fd, struct nlmsghdr *nh)
{
- /* man 7 netlink EXAMPLE */
- struct sockaddr_nl sa = {
- .nl_family = AF_NETLINK,
- };
- struct iovec iov = {
- .iov_base = nh,
- .iov_len = nh->nlmsg_len,
- };
- struct msghdr msg = {
- .msg_name = &sa,
- .msg_namelen = sizeof(sa),
- .msg_iov = &iov,
- .msg_iovlen = 1,
- };
int send_bytes;
nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
nh->nlmsg_seq = (uint32_t)rte_rand();
- send_bytes = sendmsg(nlsk_fd, &msg, 0);
+
+retry:
+ send_bytes = send(nlsk_fd, nh, nh->nlmsg_len, 0);
if (send_bytes < 0) {
- RTE_LOG(ERR, PMD, "Failed to send netlink message: %s (%d)\n",
+ if (errno == EINTR)
+ goto retry;
+
+ TAP_LOG(ERR, "Failed to send netlink message: %s (%d)",
strerror(errno), errno);
return -1;
}
return send_bytes;
}
+#ifdef NETLINK_EXT_ACK
+static const struct nlattr *
+tap_nl_attr_first(const struct nlmsghdr *nh, size_t offset)
+{
+ return (const struct nlattr *)((const char *)nh + NLMSG_SPACE(offset));
+}
+
+static const struct nlattr *
+tap_nl_attr_next(const struct nlattr *attr)
+{
+ return (const struct nlattr *)((const char *)attr
+ + NLMSG_ALIGN(attr->nla_len));
+}
+
+static bool
+tap_nl_attr_ok(const struct nlattr *attr, int len)
+{
+ if (len < (int)sizeof(struct nlattr))
+ return false; /* missing header */
+ if (attr->nla_len < sizeof(struct nlattr))
+ return false; /* attribute length should include itself */
+ if ((int)attr->nla_len > len)
+ return false; /* attribute is truncated */
+ return true;
+}
+
+
+/* Decode extended errors from kernel */
+static void
+tap_nl_dump_ext_ack(const struct nlmsghdr *nh, const struct nlmsgerr *err)
+{
+ const struct nlattr *attr;
+ const char *tail = (const char *)nh + NLMSG_ALIGN(nh->nlmsg_len);
+ size_t hlen = sizeof(*err);
+
+ /* no TLVs, no extended response */
+ if (!(nh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return;
+
+ if (!(nh->nlmsg_flags & NLM_F_CAPPED))
+ hlen += err->msg.nlmsg_len - NLMSG_HDRLEN;
+
+ for (attr = tap_nl_attr_first(nh, hlen);
+ tap_nl_attr_ok(attr, tail - (const char *)attr);
+ attr = tap_nl_attr_next(attr)) {
+ uint16_t type = attr->nla_type & NLA_TYPE_MASK;
+
+ if (type == NLMSGERR_ATTR_MSG) {
+ const char *msg = (const char *)attr
+ + NLMSG_ALIGN(sizeof(*attr));
+
+ if (err->error)
+ TAP_LOG(ERR, "%s", msg);
+ else
+
+ TAP_LOG(WARNING, "%s", msg);
+ break;
+ }
+ }
+}
+#else
+/*
+ * External ACK support was added in Linux kernel 4.17
+ * on older kernels, just ignore that part of message
+ */
+#define tap_nl_dump_ext_ack(nh, err) do { } while (0)
+#endif
+
/**
- * Check that the kernel sends an appropriate ACK in response to an nl_send().
+ * Check that the kernel sends an appropriate ACK in response
+ * to an tap_nl_send().
*
* @param[in] nlsk_fd
* The netlink socket file descriptor used for communication.
*
* @return
- * 0 on success, -1 otherwise.
+ * 0 on success, -1 otherwise with errno set.
*/
int
-nl_recv_ack(int nlsk_fd)
+tap_nl_recv_ack(int nlsk_fd)
{
- return nl_recv(nlsk_fd, NULL, NULL);
+ return tap_nl_recv(nlsk_fd, NULL, NULL);
}
/**
* Receive a message from the kernel on the netlink socket, following an
- * nl_send().
+ * tap_nl_send().
*
* @param[in] nlsk_fd
* The netlink socket file descriptor used for communication.
* Custom arguments for the callback.
*
* @return
- * 0 on success, -1 otherwise.
+ * 0 on success, -1 otherwise with errno set.
*/
int
-nl_recv(int nlsk_fd, int (*cb)(struct nlmsghdr *, void *arg), void *arg)
+tap_nl_recv(int nlsk_fd, int (*cb)(struct nlmsghdr *, void *arg), void *arg)
{
- /* man 7 netlink EXAMPLE */
- struct sockaddr_nl sa;
- struct nlmsghdr *nh;
char buf[BUF_SIZE];
- struct iovec iov = {
- .iov_base = buf,
- .iov_len = sizeof(buf),
- };
- struct msghdr msg = {
- .msg_name = &sa,
- .msg_namelen = sizeof(sa),
- .msg_iov = &iov,
- .msg_iovlen = 1,
- };
- int recv_bytes = 0, done = 0, multipart = 0, error = 0;
+ int multipart = 0;
+ int ret = 0;
-read:
- recv_bytes = recvmsg(nlsk_fd, &msg, 0);
- if (recv_bytes < 0)
- return -1;
- for (nh = (struct nlmsghdr *)buf;
- NLMSG_OK(nh, (unsigned int)recv_bytes);
- nh = NLMSG_NEXT(nh, recv_bytes)) {
- /*
- * Multi-part messages and their following DONE message have the
- * NLM_F_MULTI flag set. Make note, in order to read the DONE
- * message afterwards.
- */
- if (nh->nlmsg_flags & NLM_F_MULTI)
- multipart = 1;
- if (nh->nlmsg_type == NLMSG_ERROR) {
- struct nlmsgerr *err_data = NLMSG_DATA(nh);
-
- if (err_data->error == 0)
- RTE_LOG(DEBUG, PMD, "%s() ack message recvd\n",
- __func__);
- else {
- RTE_LOG(DEBUG, PMD,
- "%s() error message recvd\n", __func__);
- error = 1;
+ do {
+ struct nlmsghdr *nh;
+ int recv_bytes;
+
+retry:
+ recv_bytes = recv(nlsk_fd, buf, sizeof(buf), 0);
+ if (recv_bytes < 0) {
+ if (errno == EINTR)
+ goto retry;
+ return -1;
+ }
+
+ for (nh = (struct nlmsghdr *)buf;
+ NLMSG_OK(nh, (unsigned int)recv_bytes);
+ nh = NLMSG_NEXT(nh, recv_bytes)) {
+ if (nh->nlmsg_type == NLMSG_ERROR) {
+ struct nlmsgerr *err_data = NLMSG_DATA(nh);
+
+ tap_nl_dump_ext_ack(nh, err_data);
+ if (err_data->error < 0) {
+ errno = -err_data->error;
+ return -1;
+ }
+ /* Ack message. */
+ return 0;
+ }
+ /* Multi-part msgs and their trailing DONE message. */
+ if (nh->nlmsg_flags & NLM_F_MULTI) {
+ if (nh->nlmsg_type == NLMSG_DONE)
+ return 0;
+ multipart = 1;
}
+ if (cb)
+ ret = cb(nh, arg);
}
- /* The end of multipart message. */
- if (nh->nlmsg_type == NLMSG_DONE)
- /* No need to call the callback for a DONE message. */
- done = 1;
- else if (cb)
- if (cb(nh, arg) < 0)
- error = 1;
- }
- if (multipart && !done)
- goto read;
- if (error)
- return -1;
- return 0;
+ } while (multipart);
+ return ret;
}
/**
* The data to append.
*/
void
-nlattr_add(struct nlmsghdr *nh, unsigned short type,
+tap_nlattr_add(struct nlmsghdr *nh, unsigned short type,
unsigned int data_len, const void *data)
{
/* see man 3 rtnetlink */
* The data to append.
*/
void
-nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data)
+tap_nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data)
{
- nlattr_add(nh, type, sizeof(uint8_t), &data);
+ tap_nlattr_add(nh, type, sizeof(uint8_t), &data);
}
/**
* The data to append.
*/
void
-nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data)
+tap_nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data)
{
- nlattr_add(nh, type, sizeof(uint16_t), &data);
+ tap_nlattr_add(nh, type, sizeof(uint16_t), &data);
}
/**
* The data to append.
*/
void
-nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data)
+tap_nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data)
{
- nlattr_add(nh, type, sizeof(uint32_t), &data);
+ tap_nlattr_add(nh, type, sizeof(uint32_t), &data);
}
/**
* Start a nested netlink attribute.
- * It must be followed later by a call to nlattr_nested_finish().
+ * It must be followed later by a call to tap_nlattr_nested_finish().
*
* @param[in, out] msg
* The netlink message where to edit the nested_tails metadata.
* -1 if adding a nested netlink attribute failed, 0 otherwise.
*/
int
-nlattr_nested_start(struct nlmsg *msg, uint16_t type)
+tap_nlattr_nested_start(struct nlmsg *msg, uint16_t type)
{
struct nested_tail *tail;
tail = rte_zmalloc(NULL, sizeof(struct nested_tail), 0);
if (!tail) {
- RTE_LOG(ERR, PMD,
- "Couldn't allocate memory for nested netlink"
- " attribute\n");
+ TAP_LOG(ERR,
+ "Couldn't allocate memory for nested netlink attribute");
return -1;
}
tail->tail = (struct rtattr *)NLMSG_TAIL(&msg->nh);
- nlattr_add(&msg->nh, type, 0, NULL);
+ tap_nlattr_add(&msg->nh, type, 0, NULL);
tail->prev = msg->nested_tails;
/**
* End a nested netlink attribute.
- * It follows a call to nlattr_nested_start().
+ * It follows a call to tap_nlattr_nested_start().
* In effect, it will modify the nested attribute length to include every bytes
* from the nested attribute start, up to here.
*
* The netlink message where to edit the nested_tails metadata.
*/
void
-nlattr_nested_finish(struct nlmsg *msg)
+tap_nlattr_nested_finish(struct nlmsg *msg)
{
struct nested_tail *tail = msg->nested_tails;