#
SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap_netlink.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap_tcmsgs.c
# this lib depends upon:
DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/netlink.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <rte_malloc.h>
+#include <tap_netlink.h>
+#include <rte_random.h>
+
+/* Must be quite large to support dumping a huge list of QDISC or filters. */
+#define BUF_SIZE (32 * 1024) /* Size of the buffer to receive kernel messages */
+#define SNDBUF_SIZE 32768 /* Send buffer size for the netlink socket */
+#define RCVBUF_SIZE 32768 /* Receive buffer size for the netlink socket */
+
+struct nested_tail {
+ struct rtattr *tail;
+ struct nested_tail *prev;
+};
+
+/**
+ * Initialize a netlink socket for communicating with the kernel.
+ *
+ * @return
+ * netlink socket file descriptor on success, -1 otherwise.
+ */
+int
+nl_init(void)
+{
+ int fd, sndbuf_size = SNDBUF_SIZE, rcvbuf_size = RCVBUF_SIZE;
+ struct sockaddr_nl local = { .nl_family = AF_NETLINK };
+
+ fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
+ if (fd < 0) {
+ RTE_LOG(ERR, PMD, "Unable to create a netlink socket\n");
+ return -1;
+ }
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int))) {
+ RTE_LOG(ERR, PMD, "Unable to set socket buffer send size\n");
+ return -1;
+ }
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int))) {
+ RTE_LOG(ERR, PMD, "Unable to set socket buffer receive size\n");
+ return -1;
+ }
+ if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
+ RTE_LOG(ERR, PMD, "Unable to bind to the netlink socket\n");
+ return -1;
+ }
+ return fd;
+}
+
+/**
+ * Clean up a netlink socket once all communicating with the kernel is finished.
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ *
+ * @return
+ * 0 on success, -1 otherwise.
+ */
+int
+nl_final(int nlsk_fd)
+{
+ if (close(nlsk_fd)) {
+ RTE_LOG(ERR, PMD, "Failed to close netlink socket: %s (%d)\n",
+ strerror(errno), errno);
+ return -1;
+ }
+ return 0;
+}
+
+/**
+ * Send a message to the kernel on the netlink socket.
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ * @param[in] nh
+ * The netlink message send to the kernel.
+ *
+ * @return
+ * the number of sent bytes on success, -1 otherwise.
+ */
+int
+nl_send(int nlsk_fd, struct nlmsghdr *nh)
+{
+ /* man 7 netlink EXAMPLE */
+ struct sockaddr_nl sa = {
+ .nl_family = AF_NETLINK,
+ };
+ struct iovec iov = {
+ .iov_base = nh,
+ .iov_len = nh->nlmsg_len,
+ };
+ struct msghdr msg = {
+ .msg_name = &sa,
+ .msg_namelen = sizeof(sa),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ int send_bytes;
+
+ nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
+ nh->nlmsg_seq = (uint32_t)rte_rand();
+ send_bytes = sendmsg(nlsk_fd, &msg, 0);
+ if (send_bytes < 0) {
+ RTE_LOG(ERR, PMD, "Failed to send netlink message: %s (%d)\n",
+ strerror(errno), errno);
+ return -1;
+ }
+ return send_bytes;
+}
+
+/**
+ * Check that the kernel sends an appropriate ACK in response to an nl_send().
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ *
+ * @return
+ * 0 on success, -1 otherwise.
+ */
+int
+nl_recv_ack(int nlsk_fd)
+{
+ return nl_recv(nlsk_fd, NULL, NULL);
+}
+
+/**
+ * Receive a message from the kernel on the netlink socket, following an
+ * nl_send().
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ * @param[in] cb
+ * The callback function to call for each netlink message received.
+ * @param[in, out] arg
+ * Custom arguments for the callback.
+ *
+ * @return
+ * 0 on success, -1 otherwise.
+ */
+int
+nl_recv(int nlsk_fd, int (*cb)(struct nlmsghdr *, void *arg), void *arg)
+{
+ /* man 7 netlink EXAMPLE */
+ struct sockaddr_nl sa;
+ struct nlmsghdr *nh;
+ char buf[BUF_SIZE];
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = sizeof(buf),
+ };
+ struct msghdr msg = {
+ .msg_name = &sa,
+ .msg_namelen = sizeof(sa),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ int recv_bytes = 0, done = 0, multipart = 0, error = 0;
+
+read:
+ recv_bytes = recvmsg(nlsk_fd, &msg, 0);
+ if (recv_bytes < 0)
+ return -1;
+ for (nh = (struct nlmsghdr *)buf;
+ NLMSG_OK(nh, (unsigned int)recv_bytes);
+ nh = NLMSG_NEXT(nh, recv_bytes)) {
+ /*
+ * Multi-part messages and their following DONE message have the
+ * NLM_F_MULTI flag set. Make note, in order to read the DONE
+ * message afterwards.
+ */
+ if (nh->nlmsg_flags & NLM_F_MULTI)
+ multipart = 1;
+ if (nh->nlmsg_type == NLMSG_ERROR) {
+ struct nlmsgerr *err_data = NLMSG_DATA(nh);
+
+ if (err_data->error == 0)
+ RTE_LOG(DEBUG, PMD, "%s() ack message recvd\n",
+ __func__);
+ else {
+ RTE_LOG(DEBUG, PMD,
+ "%s() error message recvd\n", __func__);
+ error = 1;
+ }
+ }
+ /* The end of multipart message. */
+ if (nh->nlmsg_type == NLMSG_DONE)
+ /* No need to call the callback for a DONE message. */
+ done = 1;
+ else if (cb)
+ if (cb(nh, arg) < 0)
+ error = 1;
+ }
+ if (multipart && !done)
+ goto read;
+ if (error)
+ return -1;
+ return 0;
+}
+
+/**
+ * Append a netlink attribute to a message.
+ *
+ * @param[in, out] nh
+ * The netlink message to parse, received from the kernel.
+ * @param[in] type
+ * The type of attribute to append.
+ * @param[in] data_len
+ * The length of the data to append.
+ * @param[in] data
+ * The data to append.
+ */
+void
+nlattr_add(struct nlmsghdr *nh, unsigned short type,
+ unsigned int data_len, const void *data)
+{
+ /* see man 3 rtnetlink */
+ struct rtattr *rta;
+
+ rta = (struct rtattr *)NLMSG_TAIL(nh);
+ rta->rta_len = RTA_LENGTH(data_len);
+ rta->rta_type = type;
+ memcpy(RTA_DATA(rta), data, data_len);
+ nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
+}
+
+/**
+ * Append a uint8_t netlink attribute to a message.
+ *
+ * @param[in, out] nh
+ * The netlink message to parse, received from the kernel.
+ * @param[in] type
+ * The type of attribute to append.
+ * @param[in] data
+ * The data to append.
+ */
+void
+nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data)
+{
+ nlattr_add(nh, type, sizeof(uint8_t), &data);
+}
+
+/**
+ * Append a uint16_t netlink attribute to a message.
+ *
+ * @param[in, out] nh
+ * The netlink message to parse, received from the kernel.
+ * @param[in] type
+ * The type of attribute to append.
+ * @param[in] data
+ * The data to append.
+ */
+void
+nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data)
+{
+ nlattr_add(nh, type, sizeof(uint16_t), &data);
+}
+
+/**
+ * Append a uint16_t netlink attribute to a message.
+ *
+ * @param[in, out] nh
+ * The netlink message to parse, received from the kernel.
+ * @param[in] type
+ * The type of attribute to append.
+ * @param[in] data
+ * The data to append.
+ */
+void
+nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data)
+{
+ nlattr_add(nh, type, sizeof(uint32_t), &data);
+}
+
+/**
+ * Start a nested netlink attribute.
+ * It must be followed later by a call to nlattr_nested_finish().
+ *
+ * @param[in, out] msg
+ * The netlink message where to edit the nested_tails metadata.
+ * @param[in] type
+ * The nested attribute type to append.
+ *
+ * @return
+ * -1 if adding a nested netlink attribute failed, 0 otherwise.
+ */
+int
+nlattr_nested_start(struct nlmsg *msg, uint16_t type)
+{
+ struct nested_tail *tail;
+
+ tail = rte_zmalloc(NULL, sizeof(struct nested_tail), 0);
+ if (!tail) {
+ RTE_LOG(ERR, PMD,
+ "Couldn't allocate memory for nested netlink"
+ " attribute\n");
+ return -1;
+ }
+
+ tail->tail = (struct rtattr *)NLMSG_TAIL(&msg->nh);
+
+ nlattr_add(&msg->nh, type, 0, NULL);
+
+ tail->prev = msg->nested_tails;
+
+ msg->nested_tails = tail;
+
+ return 0;
+}
+
+/**
+ * End a nested netlink attribute.
+ * It follows a call to nlattr_nested_start().
+ * In effect, it will modify the nested attribute length to include every bytes
+ * from the nested attribute start, up to here.
+ *
+ * @param[in, out] msg
+ * The netlink message where to edit the nested_tails metadata.
+ */
+void
+nlattr_nested_finish(struct nlmsg *msg)
+{
+ struct nested_tail *tail = msg->nested_tails;
+
+ tail->tail->rta_len = (char *)NLMSG_TAIL(&msg->nh) - (char *)tail->tail;
+
+ if (tail->prev)
+ msg->nested_tails = tail->prev;
+
+ rte_free(tail);
+}
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TAP_NETLINK_H_
+#define _TAP_NETLINK_H_
+
+#include <ctype.h>
+#include <inttypes.h>
+#include <linux/rtnetlink.h>
+#include <linux/netlink.h>
+#include <stdio.h>
+
+#include <rte_log.h>
+
+#define NLMSG_BUF 512
+
+struct nlmsg {
+ struct nlmsghdr nh;
+ struct tcmsg t;
+ char buf[NLMSG_BUF];
+ struct nested_tail *nested_tails;
+};
+
+#define NLMSG_TAIL(nlh) (void *)((char *)(nlh) + NLMSG_ALIGN((nlh)->nlmsg_len))
+
+int nl_init(void);
+int nl_final(int nlsk_fd);
+int nl_send(int nlsk_fd, struct nlmsghdr *nh);
+int nl_recv(int nlsk_fd, int (*callback)(struct nlmsghdr *, void *), void *arg);
+int nl_recv_ack(int nlsk_fd);
+void nlattr_add(struct nlmsghdr *nh, unsigned short type,
+ unsigned int data_len, const void *data);
+void nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data);
+void nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data);
+void nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data);
+int nlattr_nested_start(struct nlmsg *msg, uint16_t type);
+void nlattr_nested_finish(struct nlmsg *msg);
+
+#endif /* _TAP_NETLINK_H_ */
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <inttypes.h>
+#include <linux/netlink.h>
+#include <net/if.h>
+#include <string.h>
+
+#include <rte_log.h>
+#include <tap_tcmsgs.h>
+
+struct qdisc {
+ uint32_t handle;
+ uint32_t parent;
+};
+
+struct list_args {
+ int nlsk_fd;
+ uint16_t ifindex;
+ void *custom_arg;
+};
+
+struct qdisc_custom_arg {
+ uint32_t handle;
+ uint32_t parent;
+ uint8_t exists;
+};
+
+/**
+ * Initialize a netlink message with a TC header.
+ *
+ * @param[in, out] msg
+ * The netlink message to fill.
+ * @param[in] ifindex
+ * The netdevice ifindex where the rule will be applied.
+ * @param[in] type
+ * The type of TC message to create (RTM_NEWTFILTER, RTM_NEWQDISC, etc.).
+ * @param[in] flags
+ * Overrides the default netlink flags for this msg with those specified.
+ */
+void
+tc_init_msg(struct nlmsg *msg, uint16_t ifindex, uint16_t type, uint16_t flags)
+{
+ struct nlmsghdr *n = &msg->nh;
+
+ n->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ n->nlmsg_type = type;
+ if (flags)
+ n->nlmsg_flags = flags;
+ else
+ n->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ msg->t.tcm_family = AF_UNSPEC;
+ msg->t.tcm_ifindex = ifindex;
+}
+
+/**
+ * Delete a specific QDISC identified by its iface, and it's handle and parent.
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ * @param[in] ifindex
+ * The netdevice ifindex on whom the deletion will happen.
+ * @param[in] qinfo
+ * Additional info to identify the QDISC (handle and parent).
+ *
+ * @return
+ * 0 on success, -1 otherwise.
+ */
+static int
+qdisc_del(int nlsk_fd, uint16_t ifindex, struct qdisc *qinfo)
+{
+ struct nlmsg msg;
+ int fd = 0;
+
+ tc_init_msg(&msg, ifindex, RTM_DELQDISC, 0);
+ msg.t.tcm_handle = qinfo->handle;
+ msg.t.tcm_parent = qinfo->parent;
+ /* if no netlink socket is provided, create one */
+ if (!nlsk_fd) {
+ fd = nl_init();
+ if (fd < 0) {
+ RTE_LOG(ERR, PMD,
+ "Could not delete QDISC: null netlink socket\n");
+ return -1;
+ }
+ } else {
+ fd = nlsk_fd;
+ }
+ if (nl_send(fd, &msg.nh) < 0)
+ return -1;
+ if (nl_recv_ack(fd) < 0)
+ return -1;
+ if (!nlsk_fd)
+ return nl_final(fd);
+ return 0;
+}
+
+/**
+ * Add the multiqueue QDISC with MULTIQ_MAJOR_HANDLE handle.
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ * @param[in] ifindex
+ * The netdevice ifindex where to add the multiqueue QDISC.
+ *
+ * @return
+ * -1 if the qdisc cannot be added, and 0 otherwise.
+ */
+int
+qdisc_add_multiq(int nlsk_fd, uint16_t ifindex)
+{
+ struct tc_multiq_qopt opt;
+ struct nlmsg msg;
+
+ tc_init_msg(&msg, ifindex, RTM_NEWQDISC,
+ NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);
+ msg.t.tcm_handle = TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0);
+ msg.t.tcm_parent = TC_H_ROOT;
+ nlattr_add(&msg.nh, TCA_KIND, sizeof("multiq"), "multiq");
+ nlattr_add(&msg.nh, TCA_OPTIONS, sizeof(opt), &opt);
+ if (nl_send(nlsk_fd, &msg.nh) < 0)
+ return -1;
+ if (nl_recv_ack(nlsk_fd) < 0)
+ return -1;
+ return 0;
+}
+
+/**
+ * Add the ingress QDISC with default ffff: handle.
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ * @param[in] ifindex
+ * The netdevice ifindex where the QDISC will be added.
+ *
+ * @return
+ * -1 if the qdisc cannot be added, and 0 otherwise.
+ */
+int
+qdisc_add_ingress(int nlsk_fd, uint16_t ifindex)
+{
+ struct nlmsg msg;
+
+ tc_init_msg(&msg, ifindex, RTM_NEWQDISC,
+ NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);
+ msg.t.tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+ msg.t.tcm_parent = TC_H_INGRESS;
+ nlattr_add(&msg.nh, TCA_KIND, sizeof("ingress"), "ingress");
+ if (nl_send(nlsk_fd, &msg.nh) < 0)
+ return -1;
+ if (nl_recv_ack(nlsk_fd) < 0)
+ return -1;
+ return 0;
+}
+
+/**
+ * Callback function to check for QDISC existence.
+ * If the QDISC is found to exist, increment "exists" in the custom arg.
+ *
+ * @param[in] nh
+ * The netlink message to parse, received from the kernel.
+ * @param[in, out] arg
+ * Custom arguments for the callback.
+ *
+ * @return
+ * 0.
+ */
+static int
+qdisc_exist_cb(struct nlmsghdr *nh, void *arg)
+{
+ struct list_args *args = (struct list_args *)arg;
+ struct qdisc_custom_arg *custom = args->custom_arg;
+ struct tcmsg *t = NLMSG_DATA(nh);
+
+ /* filter by request iface */
+ if (args->ifindex != (unsigned int)t->tcm_ifindex)
+ return 0;
+ if (t->tcm_handle != custom->handle || t->tcm_parent != custom->parent)
+ return 0;
+ custom->exists++;
+ return 0;
+}
+
+/**
+ * Callback function to delete a QDISC.
+ *
+ * @param[in] nh
+ * The netlink message to parse, received from the kernel.
+ * @param[in] arg
+ * Custom arguments for the callback.
+ *
+ * @return
+ * 0.
+ */
+static int
+qdisc_del_cb(struct nlmsghdr *nh, void *arg)
+{
+ struct tcmsg *t = NLMSG_DATA(nh);
+ struct list_args *args = arg;
+
+ struct qdisc qinfo = {
+ .handle = t->tcm_handle,
+ .parent = t->tcm_parent,
+ };
+
+ /* filter out other ifaces' qdiscs */
+ if (args->ifindex != (unsigned int)t->tcm_ifindex)
+ return 0;
+ /*
+ * Use another nlsk_fd (0) to avoid tampering with the current list
+ * iteration.
+ */
+ return qdisc_del(0, args->ifindex, &qinfo);
+}
+
+/**
+ * Iterate over all QDISC, and call the callback() function for each.
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ * @param[in] ifindex
+ * The netdevice ifindex where to find QDISCs.
+ * @param[in] callback
+ * The function to call for each QDISC.
+ * @param[in, out] arg
+ * The arguments to provide the callback function with.
+ *
+ * @return
+ * -1 if either sending the netlink message failed, or if receiving the answer
+ * failed, or finally if the callback returned a negative value for that
+ * answer.
+ * 0 is returned otherwise.
+ */
+static int
+qdisc_iterate(int nlsk_fd, uint16_t ifindex,
+ int (*callback)(struct nlmsghdr *, void *), void *arg)
+{
+ struct nlmsg msg;
+ struct list_args args = {
+ .nlsk_fd = nlsk_fd,
+ .ifindex = ifindex,
+ .custom_arg = arg,
+ };
+
+ tc_init_msg(&msg, ifindex, RTM_GETQDISC, NLM_F_REQUEST | NLM_F_DUMP);
+ if (nl_send(nlsk_fd, &msg.nh) < 0)
+ return -1;
+ if (nl_recv(nlsk_fd, callback, &args) < 0)
+ return -1;
+ return 0;
+}
+
+/**
+ * Check whether a given QDISC already exists for the netdevice.
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ * @param[in] ifindex
+ * The netdevice ifindex to check QDISC existence for.
+ * @param[in] callback
+ * The function to call for each QDISC.
+ * @param[in, out] arg
+ * The arguments to provide the callback function with.
+ *
+ * @return
+ * 1 if the qdisc exists, 0 otherwise.
+ */
+int
+qdisc_exists(int nlsk_fd, uint16_t ifindex, uint32_t handle, uint32_t parent)
+{
+ struct qdisc_custom_arg arg = {
+ .handle = handle,
+ .parent = parent,
+ .exists = 0,
+ };
+
+ qdisc_iterate(nlsk_fd, ifindex, qdisc_exist_cb, &arg);
+ if (arg.exists)
+ return 1;
+ return 0;
+}
+
+/**
+ * Delete all QDISCs for a given netdevice.
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ * @param[in] ifindex
+ * The netdevice ifindex where to find QDISCs.
+ *
+ * @return
+ * -1 if the lookup failed, 0 otherwise.
+ */
+int
+qdisc_flush(int nlsk_fd, uint16_t ifindex)
+{
+ return qdisc_iterate(nlsk_fd, ifindex, qdisc_del_cb, NULL);
+}
+
+/**
+ * Create the multiqueue QDISC, only if it does not exist already.
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ * @param[in] ifindex
+ * The netdevice ifindex where to add the multiqueue QDISC.
+ *
+ * @return
+ * 0 if the qdisc exists or if has been successfully added.
+ * Return -1 otherwise.
+ */
+int
+qdisc_create_multiq(int nlsk_fd, uint16_t ifindex)
+{
+ if (!qdisc_exists(nlsk_fd, ifindex,
+ TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0), TC_H_ROOT)) {
+ if (qdisc_add_multiq(nlsk_fd, ifindex) < 0) {
+ RTE_LOG(ERR, PMD, "Could not add multiq qdisc\n");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Create the ingress QDISC, only if it does not exist already.
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ * @param[in] ifindex
+ * The netdevice ifindex where to add the ingress QDISC.
+ *
+ * @return
+ * 0 if the qdisc exists or if has been successfully added.
+ * Return -1 otherwise.
+ */
+int
+qdisc_create_ingress(int nlsk_fd, uint16_t ifindex)
+{
+ if (!qdisc_exists(nlsk_fd, ifindex,
+ TC_H_MAKE(TC_H_INGRESS, 0), TC_H_INGRESS)) {
+ if (qdisc_add_ingress(nlsk_fd, ifindex) < 0) {
+ RTE_LOG(ERR, PMD, "Could not add ingress qdisc\n");
+ return -1;
+ }
+ }
+ return 0;
+}
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TAP_TCMSGS_H_
+#define _TAP_TCMSGS_H_
+
+#include <linux/if_ether.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_sched.h>
+#include <linux/pkt_cls.h>
+#include <linux/tc_act/tc_mirred.h>
+#include <linux/tc_act/tc_gact.h>
+#include <linux/tc_act/tc_skbedit.h>
+#include <inttypes.h>
+
+#include <rte_ether.h>
+#include <tap_netlink.h>
+
+#define MULTIQ_MAJOR_HANDLE (1 << 16)
+
+void tc_init_msg(struct nlmsg *msg, uint16_t ifindex, uint16_t type,
+ uint16_t flags);
+int qdisc_exists(int nlsk_fd, uint16_t ifindex, uint32_t handle,
+ uint32_t parent);
+int qdisc_list(int nlsk_fd, uint16_t ifindex);
+int qdisc_flush(int nlsk_fd, uint16_t ifindex);
+int qdisc_create_ingress(int nlsk_fd, uint16_t ifindex);
+int qdisc_create_multiq(int nlsk_fd, uint16_t ifindex);
+int qdisc_add_ingress(int nlsk_fd, uint16_t ifindex);
+int qdisc_add_multiq(int nlsk_fd, uint16_t ifindex);
+int filter_list_ingress(int nlsk_fd, uint16_t ifindex);
+
+#endif /* _TAP_TCMSGS_H_ */