X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Ftap%2Ftap_netlink.c;h=75af3404b03593fceeed140b60ababf3a490f99d;hb=a8354c99a912e1c2513e113ae07c9e91237f7951;hp=6de896ab17b67824cc982fa585544f0af612dd7b;hpb=c0bddd3a057f1ef34d2d5aa69e7338caa82de2bf;p=dpdk.git diff --git a/drivers/net/tap/tap_netlink.c b/drivers/net/tap/tap_netlink.c index 6de896ab17..75af3404b0 100644 --- a/drivers/net/tap/tap_netlink.c +++ b/drivers/net/tap/tap_netlink.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox Technologies, Ltd */ #include @@ -37,11 +9,19 @@ #include #include #include +#include #include #include #include +#include "tap_log.h" + +/* Compatibility with glibc < 2.24 */ +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif + /* Must be quite large to support dumping a huge list of QDISC or filters. */ #define BUF_SIZE (32 * 1024) /* Size of the buffer to receive kernel messages */ #define SNDBUF_SIZE 32768 /* Send buffer size for the netlink socket */ @@ -63,29 +43,41 @@ struct nested_tail { * netlink socket file descriptor on success, -1 otherwise. */ int -nl_init(uint32_t nl_groups) +tap_nl_init(uint32_t nl_groups) { int fd, sndbuf_size = SNDBUF_SIZE, rcvbuf_size = RCVBUF_SIZE; struct sockaddr_nl local = { .nl_family = AF_NETLINK, .nl_groups = nl_groups, }; +#ifdef NETLINK_EXT_ACK + int one = 1; +#endif fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE); if (fd < 0) { - RTE_LOG(ERR, PMD, "Unable to create a netlink socket\n"); + TAP_LOG(ERR, "Unable to create a netlink socket"); return -1; } if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int))) { - RTE_LOG(ERR, PMD, "Unable to set socket buffer send size\n"); + TAP_LOG(ERR, "Unable to set socket buffer send size"); + close(fd); return -1; } if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int))) { - RTE_LOG(ERR, PMD, "Unable to set socket buffer receive size\n"); + TAP_LOG(ERR, "Unable to set socket buffer receive size"); + close(fd); return -1; } + +#ifdef NETLINK_EXT_ACK + /* Ask for extended ACK response. on older kernel will ignore request. */ + setsockopt(fd, SOL_NETLINK, NETLINK_EXT_ACK, &one, sizeof(one)); +#endif + if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) { - RTE_LOG(ERR, PMD, "Unable to bind to the netlink socket\n"); + TAP_LOG(ERR, "Unable to bind to the netlink socket"); + close(fd); return -1; } return fd; @@ -101,10 +93,10 @@ nl_init(uint32_t nl_groups) * 0 on success, -1 otherwise. */ int -nl_final(int nlsk_fd) +tap_nl_final(int nlsk_fd) { if (close(nlsk_fd)) { - RTE_LOG(ERR, PMD, "Failed to close netlink socket: %s (%d)\n", + TAP_LOG(ERR, "Failed to close netlink socket: %s (%d)", strerror(errno), errno); return -1; } @@ -123,53 +115,113 @@ nl_final(int nlsk_fd) * the number of sent bytes on success, -1 otherwise. */ int -nl_send(int nlsk_fd, struct nlmsghdr *nh) +tap_nl_send(int nlsk_fd, struct nlmsghdr *nh) { - /* man 7 netlink EXAMPLE */ - struct sockaddr_nl sa = { - .nl_family = AF_NETLINK, - }; - struct iovec iov = { - .iov_base = nh, - .iov_len = nh->nlmsg_len, - }; - struct msghdr msg = { - .msg_name = &sa, - .msg_namelen = sizeof(sa), - .msg_iov = &iov, - .msg_iovlen = 1, - }; int send_bytes; nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */ nh->nlmsg_seq = (uint32_t)rte_rand(); - send_bytes = sendmsg(nlsk_fd, &msg, 0); + +retry: + send_bytes = send(nlsk_fd, nh, nh->nlmsg_len, 0); if (send_bytes < 0) { - RTE_LOG(ERR, PMD, "Failed to send netlink message: %s (%d)\n", + if (errno == EINTR) + goto retry; + + TAP_LOG(ERR, "Failed to send netlink message: %s (%d)", strerror(errno), errno); return -1; } return send_bytes; } +#ifdef NETLINK_EXT_ACK +static const struct nlattr * +tap_nl_attr_first(const struct nlmsghdr *nh, size_t offset) +{ + return (const struct nlattr *)((const char *)nh + NLMSG_SPACE(offset)); +} + +static const struct nlattr * +tap_nl_attr_next(const struct nlattr *attr) +{ + return (const struct nlattr *)((const char *)attr + + NLMSG_ALIGN(attr->nla_len)); +} + +static bool +tap_nl_attr_ok(const struct nlattr *attr, int len) +{ + if (len < (int)sizeof(struct nlattr)) + return false; /* missing header */ + if (attr->nla_len < sizeof(struct nlattr)) + return false; /* attribute length should include itself */ + if ((int)attr->nla_len > len) + return false; /* attribute is truncated */ + return true; +} + + +/* Decode extended errors from kernel */ +static void +tap_nl_dump_ext_ack(const struct nlmsghdr *nh, const struct nlmsgerr *err) +{ + const struct nlattr *attr; + const char *tail = (const char *)nh + NLMSG_ALIGN(nh->nlmsg_len); + size_t hlen = sizeof(*err); + + /* no TLVs, no extended response */ + if (!(nh->nlmsg_flags & NLM_F_ACK_TLVS)) + return; + + if (!(nh->nlmsg_flags & NLM_F_CAPPED)) + hlen += err->msg.nlmsg_len - NLMSG_HDRLEN; + + for (attr = tap_nl_attr_first(nh, hlen); + tap_nl_attr_ok(attr, tail - (const char *)attr); + attr = tap_nl_attr_next(attr)) { + uint16_t type = attr->nla_type & NLA_TYPE_MASK; + + if (type == NLMSGERR_ATTR_MSG) { + const char *msg = (const char *)attr + + NLMSG_ALIGN(sizeof(*attr)); + + if (err->error) + TAP_LOG(ERR, "%s", msg); + else + + TAP_LOG(WARNING, "%s", msg); + break; + } + } +} +#else +/* + * External ACK support was added in Linux kernel 4.17 + * on older kernels, just ignore that part of message + */ +#define tap_nl_dump_ext_ack(nh, err) do { } while (0) +#endif + /** - * Check that the kernel sends an appropriate ACK in response to an nl_send(). + * Check that the kernel sends an appropriate ACK in response + * to an tap_nl_send(). * * @param[in] nlsk_fd * The netlink socket file descriptor used for communication. * * @return - * 0 on success, -1 otherwise. + * 0 on success, -1 otherwise with errno set. */ int -nl_recv_ack(int nlsk_fd) +tap_nl_recv_ack(int nlsk_fd) { - return nl_recv(nlsk_fd, NULL, NULL); + return tap_nl_recv(nlsk_fd, NULL, NULL); } /** * Receive a message from the kernel on the netlink socket, following an - * nl_send(). + * tap_nl_send(). * * @param[in] nlsk_fd * The netlink socket file descriptor used for communication. @@ -179,66 +231,52 @@ nl_recv_ack(int nlsk_fd) * Custom arguments for the callback. * * @return - * 0 on success, -1 otherwise. + * 0 on success, -1 otherwise with errno set. */ int -nl_recv(int nlsk_fd, int (*cb)(struct nlmsghdr *, void *arg), void *arg) +tap_nl_recv(int nlsk_fd, int (*cb)(struct nlmsghdr *, void *arg), void *arg) { - /* man 7 netlink EXAMPLE */ - struct sockaddr_nl sa; - struct nlmsghdr *nh; char buf[BUF_SIZE]; - struct iovec iov = { - .iov_base = buf, - .iov_len = sizeof(buf), - }; - struct msghdr msg = { - .msg_name = &sa, - .msg_namelen = sizeof(sa), - .msg_iov = &iov, - .msg_iovlen = 1, - }; - int recv_bytes = 0, done = 0, multipart = 0, error = 0; + int multipart = 0; + int ret = 0; -read: - recv_bytes = recvmsg(nlsk_fd, &msg, 0); - if (recv_bytes < 0) - return -1; - for (nh = (struct nlmsghdr *)buf; - NLMSG_OK(nh, (unsigned int)recv_bytes); - nh = NLMSG_NEXT(nh, recv_bytes)) { - /* - * Multi-part messages and their following DONE message have the - * NLM_F_MULTI flag set. Make note, in order to read the DONE - * message afterwards. - */ - if (nh->nlmsg_flags & NLM_F_MULTI) - multipart = 1; - if (nh->nlmsg_type == NLMSG_ERROR) { - struct nlmsgerr *err_data = NLMSG_DATA(nh); - - if (err_data->error == 0) - RTE_LOG(DEBUG, PMD, "%s() ack message recvd\n", - __func__); - else { - RTE_LOG(DEBUG, PMD, - "%s() error message recvd\n", __func__); - error = 1; + do { + struct nlmsghdr *nh; + int recv_bytes; + +retry: + recv_bytes = recv(nlsk_fd, buf, sizeof(buf), 0); + if (recv_bytes < 0) { + if (errno == EINTR) + goto retry; + return -1; + } + + for (nh = (struct nlmsghdr *)buf; + NLMSG_OK(nh, (unsigned int)recv_bytes); + nh = NLMSG_NEXT(nh, recv_bytes)) { + if (nh->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err_data = NLMSG_DATA(nh); + + tap_nl_dump_ext_ack(nh, err_data); + if (err_data->error < 0) { + errno = -err_data->error; + return -1; + } + /* Ack message. */ + return 0; + } + /* Multi-part msgs and their trailing DONE message. */ + if (nh->nlmsg_flags & NLM_F_MULTI) { + if (nh->nlmsg_type == NLMSG_DONE) + return 0; + multipart = 1; } + if (cb) + ret = cb(nh, arg); } - /* The end of multipart message. */ - if (nh->nlmsg_type == NLMSG_DONE) - /* No need to call the callback for a DONE message. */ - done = 1; - else if (cb) - if (cb(nh, arg) < 0) - error = 1; - } - if (multipart && !done) - goto read; - if (error) - return -1; - return 0; + } while (multipart); + return ret; } /** @@ -254,7 +292,7 @@ read: * The data to append. */ void -nlattr_add(struct nlmsghdr *nh, unsigned short type, +tap_nlattr_add(struct nlmsghdr *nh, unsigned short type, unsigned int data_len, const void *data) { /* see man 3 rtnetlink */ @@ -278,9 +316,9 @@ nlattr_add(struct nlmsghdr *nh, unsigned short type, * The data to append. */ void -nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data) +tap_nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data) { - nlattr_add(nh, type, sizeof(uint8_t), &data); + tap_nlattr_add(nh, type, sizeof(uint8_t), &data); } /** @@ -294,9 +332,9 @@ nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data) * The data to append. */ void -nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data) +tap_nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data) { - nlattr_add(nh, type, sizeof(uint16_t), &data); + tap_nlattr_add(nh, type, sizeof(uint16_t), &data); } /** @@ -310,14 +348,14 @@ nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data) * The data to append. */ void -nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data) +tap_nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data) { - nlattr_add(nh, type, sizeof(uint32_t), &data); + tap_nlattr_add(nh, type, sizeof(uint32_t), &data); } /** * Start a nested netlink attribute. - * It must be followed later by a call to nlattr_nested_finish(). + * It must be followed later by a call to tap_nlattr_nested_finish(). * * @param[in, out] msg * The netlink message where to edit the nested_tails metadata. @@ -328,21 +366,20 @@ nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data) * -1 if adding a nested netlink attribute failed, 0 otherwise. */ int -nlattr_nested_start(struct nlmsg *msg, uint16_t type) +tap_nlattr_nested_start(struct nlmsg *msg, uint16_t type) { struct nested_tail *tail; tail = rte_zmalloc(NULL, sizeof(struct nested_tail), 0); if (!tail) { - RTE_LOG(ERR, PMD, - "Couldn't allocate memory for nested netlink" - " attribute\n"); + TAP_LOG(ERR, + "Couldn't allocate memory for nested netlink attribute"); return -1; } tail->tail = (struct rtattr *)NLMSG_TAIL(&msg->nh); - nlattr_add(&msg->nh, type, 0, NULL); + tap_nlattr_add(&msg->nh, type, 0, NULL); tail->prev = msg->nested_tails; @@ -353,7 +390,7 @@ nlattr_nested_start(struct nlmsg *msg, uint16_t type) /** * End a nested netlink attribute. - * It follows a call to nlattr_nested_start(). + * It follows a call to tap_nlattr_nested_start(). * In effect, it will modify the nested attribute length to include every bytes * from the nested attribute start, up to here. * @@ -361,7 +398,7 @@ nlattr_nested_start(struct nlmsg *msg, uint16_t type) * The netlink message where to edit the nested_tails metadata. */ void -nlattr_nested_finish(struct nlmsg *msg) +tap_nlattr_nested_finish(struct nlmsg *msg) { struct nested_tail *tail = msg->nested_tails;