net/tap: add multicast addresses management
[dpdk.git] / drivers / net / tap / rte_eth_tap.c
index c0afc2d..0a68481 100644 (file)
@@ -31,6 +31,8 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <rte_atomic.h>
+#include <rte_common.h>
 #include <rte_mbuf.h>
 #include <rte_ethdev.h>
 #include <rte_malloc.h>
 #include <sys/socket.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdint.h>
 #include <unistd.h>
-#include <poll.h>
 #include <arpa/inet.h>
 #include <linux/if.h>
 #include <linux/if_tun.h>
 #define ETH_TAP_IFACE_ARG       "iface"
 #define ETH_TAP_SPEED_ARG       "speed"
 
+#ifdef IFF_MULTI_QUEUE
 #define RTE_PMD_TAP_MAX_QUEUES 16
+#else
+#define RTE_PMD_TAP_MAX_QUEUES 1
+#endif
 
 static struct rte_vdev_driver pmd_tap_drv;
 
@@ -69,6 +77,8 @@ static const char *valid_arguments[] = {
 
 static int tap_unit;
 
+static volatile uint32_t tap_trigger;  /* Rx trigger */
+
 static struct rte_eth_link pmd_link = {
        .link_speed = ETH_SPEED_NUM_10G,
        .link_duplex = ETH_LINK_FULL_DUPLEX,
@@ -86,6 +96,7 @@ struct pkt_stats {
 
 struct rx_queue {
        struct rte_mempool *mp;         /* Mempool for RX packets */
+       uint32_t trigger_seen;          /* Last seen Rx trigger value */
        uint16_t in_port;               /* Port ID */
        int fd;
 
@@ -103,29 +114,43 @@ struct pmd_internals {
        struct ether_addr eth_addr;     /* Mac address of the device port */
 
        int if_index;                   /* IF_INDEX for the port */
-       int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
+       int ioctl_sock;                 /* socket for ioctl calls */
 
        struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];    /* List of RX queues */
        struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];    /* List of TX queues */
 };
 
+static void
+tap_trigger_cb(int sig __rte_unused)
+{
+       /* Valid trigger values are nonzero */
+       tap_trigger = (tap_trigger + 1) | 0x80000000;
+}
+
+static int
+tap_ioctl(struct pmd_internals *pmd, unsigned long request,
+         struct ifreq *ifr, int set);
+
 /* Tun/Tap allocation routine
  *
  * name is the number of the interface to use, unless NULL to take the host
  * supplied name.
  */
 static int
-tun_alloc(char *name)
+tun_alloc(struct pmd_internals *pmd, uint16_t qid)
 {
        struct ifreq ifr;
+#ifdef IFF_MULTI_QUEUE
        unsigned int features;
+#endif
        int fd;
 
        memset(&ifr, 0, sizeof(struct ifreq));
 
        ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
-       if (name && name[0])
-               strncpy(ifr.ifr_name, name, IFNAMSIZ);
+       snprintf(ifr.ifr_name, IFNAMSIZ, "%s", pmd->name);
+
+       RTE_LOG(DEBUG, PMD, "ifr_name '%s'\n", ifr.ifr_name);
 
        fd = open(TUN_TAP_DEV_PATH, O_RDWR);
        if (fd < 0) {
@@ -133,39 +158,29 @@ tun_alloc(char *name)
                goto error;
        }
 
-       /* Grab the TUN features to verify we can work */
+#ifdef IFF_MULTI_QUEUE
+       /* Grab the TUN features to verify we can work multi-queue */
        if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
-               RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
+               RTE_LOG(ERR, PMD, "TAP unable to get TUN/TAP features\n");
                goto error;
        }
-       RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
+       RTE_LOG(DEBUG, PMD, "  TAP Features %08x\n", features);
 
-#ifdef IFF_MULTI_QUEUE
-       if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
-               RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
-               goto error;
-       } else if ((features & IFF_ONE_QUEUE) &&
-                       (RTE_PMD_TAP_MAX_QUEUES == 1)) {
-               ifr.ifr_flags |= IFF_ONE_QUEUE;
-               RTE_LOG(DEBUG, PMD, "Single queue only support\n");
-       } else {
-               ifr.ifr_flags |= IFF_MULTI_QUEUE;
-               RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
+       if (features & IFF_MULTI_QUEUE) {
+               RTE_LOG(DEBUG, PMD, "  Multi-queue support for %d queues\n",
                        RTE_PMD_TAP_MAX_QUEUES);
-       }
-#else
-       if (RTE_PMD_TAP_MAX_QUEUES > 1) {
-               RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
-               goto error;
-       } else {
+               ifr.ifr_flags |= IFF_MULTI_QUEUE;
+       } else
+#endif
+       {
                ifr.ifr_flags |= IFF_ONE_QUEUE;
-               RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+               RTE_LOG(DEBUG, PMD, "  Single queue only support\n");
        }
-#endif
 
-       /* Set the TUN/TAP configuration and get the name if needed */
+       /* Set the TUN/TAP configuration and set the name if needed */
        if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
-               RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n",
+               RTE_LOG(WARNING, PMD,
+                       "Unable to set TUNSETIFF for %s\n",
                        ifr.ifr_name);
                perror("TUNSETIFF");
                goto error;
@@ -173,14 +188,58 @@ tun_alloc(char *name)
 
        /* Always set the file descriptor to non-blocking */
        if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
-               RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
+               RTE_LOG(WARNING, PMD,
+                       "Unable to set %s to nonblocking\n",
+                       ifr.ifr_name);
                perror("F_SETFL, NONBLOCK");
                goto error;
        }
 
-       /* If the name is different that new name as default */
-       if (name && strcmp(name, ifr.ifr_name))
-               snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", ifr.ifr_name);
+       /* Set up trigger to optimize empty Rx bursts */
+       errno = 0;
+       do {
+               struct sigaction sa;
+               int flags = fcntl(fd, F_GETFL);
+
+               if (flags == -1 || sigaction(SIGIO, NULL, &sa) == -1)
+                       break;
+               if (sa.sa_handler != tap_trigger_cb) {
+                       /*
+                        * Make sure SIGIO is not already taken. This is done
+                        * as late as possible to leave the application a
+                        * chance to set up its own signal handler first.
+                        */
+                       if (sa.sa_handler != SIG_IGN &&
+                           sa.sa_handler != SIG_DFL) {
+                               errno = EBUSY;
+                               break;
+                       }
+                       sa = (struct sigaction){
+                               .sa_flags = SA_RESTART,
+                               .sa_handler = tap_trigger_cb,
+                       };
+                       if (sigaction(SIGIO, &sa, NULL) == -1)
+                               break;
+               }
+               /* Enable SIGIO on file descriptor */
+               fcntl(fd, F_SETFL, flags | O_ASYNC);
+               fcntl(fd, F_SETOWN, getpid());
+       } while (0);
+       if (errno) {
+               /* Disable trigger globally in case of error */
+               tap_trigger = 0;
+               RTE_LOG(WARNING, PMD, "Rx trigger disabled: %s\n",
+                       strerror(errno));
+       }
+
+       if (qid == 0) {
+               struct ifreq ifr;
+
+               if (tap_ioctl(pmd, SIOCGIFHWADDR, &ifr, 0) < 0)
+                       goto error;
+               rte_memcpy(&pmd->eth_addr, ifr.ifr_hwaddr.sa_data,
+                          ETHER_ADDR_LEN);
+       }
 
        return fd;
 
@@ -201,12 +260,18 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
        struct rx_queue *rxq = queue;
        uint16_t num_rx;
        unsigned long num_rx_bytes = 0;
+       uint32_t trigger = tap_trigger;
 
+       if (trigger == rxq->trigger_seen)
+               return 0;
+       if (trigger)
+               rxq->trigger_seen = trigger;
+       rte_compiler_barrier();
        for (num_rx = 0; num_rx < nb_pkts; ) {
                /* allocate the next mbuf */
                mbuf = rte_pktmbuf_alloc(rxq->mp);
                if (unlikely(!mbuf)) {
-                       RTE_LOG(WARNING, PMD, "Unable to allocate mbuf\n");
+                       RTE_LOG(WARNING, PMD, "TAP unable to allocate mbuf\n");
                        break;
                }
 
@@ -238,7 +303,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
        struct rte_mbuf *mbuf;
        struct tx_queue *txq = queue;
-       struct pollfd pfd;
        uint16_t num_tx = 0;
        unsigned long num_tx_bytes = 0;
        int i, n;
@@ -246,26 +310,18 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
        if (unlikely(nb_pkts == 0))
                return 0;
 
-       pfd.events = POLLOUT;
-       pfd.fd = txq->fd;
        for (i = 0; i < nb_pkts; i++) {
-               n = poll(&pfd, 1, 0);
-
+               /* copy the tx frame data */
+               mbuf = bufs[num_tx];
+               n = write(txq->fd,
+                         rte_pktmbuf_mtod(mbuf, void *),
+                         rte_pktmbuf_pkt_len(mbuf));
                if (n <= 0)
                        break;
 
-               if (pfd.revents & POLLOUT) {
-                       /* copy the tx frame data */
-                       mbuf = bufs[num_tx];
-                       n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
-                                 rte_pktmbuf_pkt_len(mbuf));
-                       if (n <= 0)
-                               break;
-
-                       num_tx++;
-                       num_tx_bytes += mbuf->pkt_len;
-                       rte_pktmbuf_free(mbuf);
-               }
+               num_tx++;
+               num_tx_bytes += mbuf->pkt_len;
+               rte_pktmbuf_free(mbuf);
        }
 
        txq->stats.opackets += num_tx;
@@ -276,12 +332,65 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 }
 
 static int
-tap_dev_start(struct rte_eth_dev *dev)
+tap_ioctl(struct pmd_internals *pmd, unsigned long request,
+         struct ifreq *ifr, int set)
+{
+       short req_flags = ifr->ifr_flags;
+
+       snprintf(ifr->ifr_name, IFNAMSIZ, "%s", pmd->name);
+       switch (request) {
+       case SIOCSIFFLAGS:
+               /* fetch current flags to leave other flags untouched */
+               if (ioctl(pmd->ioctl_sock, SIOCGIFFLAGS, ifr) < 0)
+                       goto error;
+               if (set)
+                       ifr->ifr_flags |= req_flags;
+               else
+                       ifr->ifr_flags &= ~req_flags;
+               break;
+       case SIOCGIFHWADDR:
+       case SIOCSIFHWADDR:
+       case SIOCSIFMTU:
+               break;
+       default:
+               RTE_LOG(WARNING, PMD, "%s: ioctl() called with wrong arg\n",
+                       pmd->name);
+               return -EINVAL;
+       }
+       if (ioctl(pmd->ioctl_sock, request, ifr) < 0)
+               goto error;
+       return 0;
+
+error:
+       RTE_LOG(ERR, PMD, "%s: ioctl(%lu) failed with error: %s\n",
+               ifr->ifr_name, request, strerror(errno));
+       return -errno;
+}
+
+static int
+tap_link_set_down(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr = { .ifr_flags = IFF_UP };
+
+       dev->data->dev_link.link_status = ETH_LINK_DOWN;
+       return tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0);
+}
+
+static int
+tap_link_set_up(struct rte_eth_dev *dev)
 {
-       /* Force the Link up */
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr = { .ifr_flags = IFF_UP };
+
        dev->data->dev_link.link_status = ETH_LINK_UP;
+       return tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1);
+}
 
-       return 0;
+static int
+tap_dev_start(struct rte_eth_dev *dev)
+{
+       return tap_link_set_up(dev);
 }
 
 /* This function gets called when the current port gets stopped.
@@ -289,14 +398,7 @@ tap_dev_start(struct rte_eth_dev *dev)
 static void
 tap_dev_stop(struct rte_eth_dev *dev)
 {
-       int i;
-       struct pmd_internals *internals = dev->data->dev_private;
-
-       for (i = 0; i < internals->nb_queues; i++)
-               if (internals->fds[i] != -1)
-                       close(internals->fds[i]);
-
-       dev->data->dev_link.link_status = ETH_LINK_DOWN;
+       tap_link_set_down(dev);
 }
 
 static int
@@ -305,6 +407,40 @@ tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
        return 0;
 }
 
+static uint32_t
+tap_dev_speed_capa(void)
+{
+       uint32_t speed = pmd_link.link_speed;
+       uint32_t capa = 0;
+
+       if (speed >= ETH_SPEED_NUM_10M)
+               capa |= ETH_LINK_SPEED_10M;
+       if (speed >= ETH_SPEED_NUM_100M)
+               capa |= ETH_LINK_SPEED_100M;
+       if (speed >= ETH_SPEED_NUM_1G)
+               capa |= ETH_LINK_SPEED_1G;
+       if (speed >= ETH_SPEED_NUM_5G)
+               capa |= ETH_LINK_SPEED_2_5G;
+       if (speed >= ETH_SPEED_NUM_5G)
+               capa |= ETH_LINK_SPEED_5G;
+       if (speed >= ETH_SPEED_NUM_10G)
+               capa |= ETH_LINK_SPEED_10G;
+       if (speed >= ETH_SPEED_NUM_20G)
+               capa |= ETH_LINK_SPEED_20G;
+       if (speed >= ETH_SPEED_NUM_25G)
+               capa |= ETH_LINK_SPEED_25G;
+       if (speed >= ETH_SPEED_NUM_40G)
+               capa |= ETH_LINK_SPEED_40G;
+       if (speed >= ETH_SPEED_NUM_50G)
+               capa |= ETH_LINK_SPEED_50G;
+       if (speed >= ETH_SPEED_NUM_56G)
+               capa |= ETH_LINK_SPEED_56G;
+       if (speed >= ETH_SPEED_NUM_100G)
+               capa |= ETH_LINK_SPEED_100G;
+
+       return capa;
+}
+
 static void
 tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
@@ -317,6 +453,7 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
        dev_info->max_tx_queues = internals->nb_queues;
        dev_info->min_rx_bufsize = 0;
        dev_info->pci_dev = NULL;
+       dev_info->speed_capa = tap_dev_speed_capa();
 }
 
 static void
@@ -335,9 +472,7 @@ tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
                tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
                rx_total += tap_stats->q_ipackets[i];
                rx_bytes_total += tap_stats->q_ibytes[i];
-       }
 
-       for (i = 0; i < imax; i++) {
                tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
                tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
                tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
@@ -362,9 +497,7 @@ tap_stats_reset(struct rte_eth_dev *dev)
        for (i = 0; i < pmd->nb_queues; i++) {
                pmd->rxq[i].stats.ipackets = 0;
                pmd->rxq[i].stats.ibytes = 0;
-       }
 
-       for (i = 0; i < pmd->nb_queues; i++) {
                pmd->txq[i].stats.opackets = 0;
                pmd->txq[i].stats.errs = 0;
                pmd->txq[i].stats.obytes = 0;
@@ -374,6 +507,17 @@ tap_stats_reset(struct rte_eth_dev *dev)
 static void
 tap_dev_close(struct rte_eth_dev *dev __rte_unused)
 {
+       int i;
+       struct pmd_internals *internals = dev->data->dev_private;
+
+       tap_link_set_down(dev);
+
+       for (i = 0; i < internals->nb_queues; i++) {
+               if (internals->rxq[i].fd != -1)
+                       close(internals->rxq[i].fd);
+               internals->rxq[i].fd = -1;
+               internals->txq[i].fd = -1;
+       }
 }
 
 static void
@@ -405,11 +549,72 @@ tap_link_update(struct rte_eth_dev *dev __rte_unused,
        return 0;
 }
 
+static void
+tap_promisc_enable(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr = { .ifr_flags = IFF_PROMISC };
+
+       dev->data->promiscuous = 1;
+       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1);
+}
+
+static void
+tap_promisc_disable(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr = { .ifr_flags = IFF_PROMISC };
+
+       dev->data->promiscuous = 0;
+       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0);
+}
+
+static void
+tap_allmulti_enable(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr = { .ifr_flags = IFF_ALLMULTI };
+
+       dev->data->all_multicast = 1;
+       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1);
+}
+
+static void
+tap_allmulti_disable(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr = { .ifr_flags = IFF_ALLMULTI };
+
+       dev->data->all_multicast = 0;
+       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0);
+}
+
+
+static void
+tap_mac_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr;
+
+       if (is_zero_ether_addr(mac_addr)) {
+               RTE_LOG(ERR, PMD, "%s: can't set an empty MAC address\n",
+                       dev->data->name);
+               return;
+       }
+
+       ifr.ifr_hwaddr.sa_family = AF_LOCAL;
+       rte_memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, ETHER_ADDR_LEN);
+       if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 1) < 0)
+               return;
+       rte_memcpy(&pmd->eth_addr, mac_addr, ETHER_ADDR_LEN);
+}
+
 static int
 tap_setup_queue(struct rte_eth_dev *dev,
                struct pmd_internals *internals,
                uint16_t qid)
 {
+       struct pmd_internals *pmd = dev->data->dev_private;
        struct rx_queue *rx = &internals->rxq[qid];
        struct tx_queue *tx = &internals->txq[qid];
        int fd;
@@ -419,17 +624,24 @@ tap_setup_queue(struct rte_eth_dev *dev,
                fd = tx->fd;
                if (fd < 0) {
                        RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
-                               dev->data->name, qid);
-                       fd = tun_alloc(dev->data->name);
+                               pmd->name, qid);
+                       fd = tun_alloc(pmd, qid);
                        if (fd < 0) {
-                               RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n",
-                                       dev->data->name);
+                               RTE_LOG(ERR, PMD, "tun_alloc(%s, %d) failed\n",
+                                       pmd->name, qid);
                                return -1;
                        }
+                       if (qid == 0) {
+                               struct ifreq ifr;
+
+                               ifr.ifr_mtu = dev->data->mtu;
+                               if (tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1) < 0) {
+                                       close(fd);
+                                       return -1;
+                               }
+                       }
                }
        }
-       dev->data->rx_queues[qid] = rx;
-       dev->data->tx_queues[qid] = tx;
 
        rx->fd = fd;
        tx->fd = fd;
@@ -437,6 +649,26 @@ tap_setup_queue(struct rte_eth_dev *dev,
        return fd;
 }
 
+static int
+rx_setup_queue(struct rte_eth_dev *dev,
+               struct pmd_internals *internals,
+               uint16_t qid)
+{
+       dev->data->rx_queues[qid] = &internals->rxq[qid];
+
+       return tap_setup_queue(dev, internals, qid);
+}
+
+static int
+tx_setup_queue(struct rte_eth_dev *dev,
+               struct pmd_internals *internals,
+               uint16_t qid)
+{
+       dev->data->tx_queues[qid] = &internals->txq[qid];
+
+       return tap_setup_queue(dev, internals, qid);
+}
+
 static int
 tap_rx_queue_setup(struct rte_eth_dev *dev,
                   uint16_t rx_queue_id,
@@ -450,12 +682,14 @@ tap_rx_queue_setup(struct rte_eth_dev *dev,
        int fd;
 
        if ((rx_queue_id >= internals->nb_queues) || !mp) {
-               RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n",
-                       internals->nb_queues, mp);
+               RTE_LOG(WARNING, PMD,
+                       "nb_queues %d too small or mempool NULL\n",
+                       internals->nb_queues);
                return -1;
        }
 
        internals->rxq[rx_queue_id].mp = mp;
+       internals->rxq[rx_queue_id].trigger_seen = 1; /* force initial burst */
        internals->rxq[rx_queue_id].in_port = dev->data->port_id;
 
        /* Now get the space available for data in the mbuf */
@@ -463,19 +697,18 @@ tap_rx_queue_setup(struct rte_eth_dev *dev,
                                RTE_PKTMBUF_HEADROOM);
 
        if (buf_size < ETH_FRAME_LEN) {
-               RTE_LOG(ERR, PMD,
+               RTE_LOG(WARNING, PMD,
                        "%s: %d bytes will not fit in mbuf (%d bytes)\n",
                        dev->data->name, ETH_FRAME_LEN, buf_size);
                return -ENOMEM;
        }
 
-       fd = tap_setup_queue(dev, internals, rx_queue_id);
+       fd = rx_setup_queue(dev, internals, rx_queue_id);
        if (fd == -1)
                return -1;
 
-       internals->fds[rx_queue_id] = fd;
-       RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
-               dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
+       RTE_LOG(DEBUG, PMD, "  RX TAP device name %s, qid %d on fd %d\n",
+               internals->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
 
        return 0;
 }
@@ -493,16 +726,42 @@ tap_tx_queue_setup(struct rte_eth_dev *dev,
        if (tx_queue_id >= internals->nb_queues)
                return -1;
 
-       ret = tap_setup_queue(dev, internals, tx_queue_id);
+       ret = tx_setup_queue(dev, internals, tx_queue_id);
        if (ret == -1)
                return -1;
 
-       RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
-               dev->data->name, tx_queue_id, internals->txq[tx_queue_id].fd);
+       RTE_LOG(DEBUG, PMD, "  TX TAP device name %s, qid %d on fd %d\n",
+               internals->name, tx_queue_id, internals->txq[tx_queue_id].fd);
 
        return 0;
 }
 
+static int
+tap_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr = { .ifr_mtu = mtu };
+       int err = 0;
+
+       err = tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1);
+       if (!err)
+               dev->data->mtu = mtu;
+
+       return err;
+}
+
+static int
+tap_set_mc_addr_list(struct rte_eth_dev *dev __rte_unused,
+                    struct ether_addr *mc_addr_set __rte_unused,
+                    uint32_t nb_mc_addr __rte_unused)
+{
+       /*
+        * Nothing to do actually: the tap has no filtering whatsoever, every
+        * packet is received.
+        */
+       return 0;
+}
+
 static const struct eth_dev_ops ops = {
        .dev_start              = tap_dev_start,
        .dev_stop               = tap_dev_stop,
@@ -514,53 +773,19 @@ static const struct eth_dev_ops ops = {
        .rx_queue_release       = tap_rx_queue_release,
        .tx_queue_release       = tap_tx_queue_release,
        .link_update            = tap_link_update,
+       .dev_set_link_up        = tap_link_set_up,
+       .dev_set_link_down      = tap_link_set_down,
+       .promiscuous_enable     = tap_promisc_enable,
+       .promiscuous_disable    = tap_promisc_disable,
+       .allmulticast_enable    = tap_allmulti_enable,
+       .allmulticast_disable   = tap_allmulti_disable,
+       .mac_addr_set           = tap_mac_set,
+       .mtu_set                = tap_mtu_set,
+       .set_mc_addr_list       = tap_set_mc_addr_list,
        .stats_get              = tap_stats_get,
        .stats_reset            = tap_stats_reset,
 };
 
-static int
-pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
-{
-       struct ifreq ifr;
-
-       if ((fd <= 0) || !dev || !addr)
-               return -1;
-
-       memset(&ifr, 0, sizeof(ifr));
-
-       if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
-               RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
-                       ifr.ifr_name);
-               return -1;
-       }
-
-       /* Set the host based MAC address to this special MAC format */
-       ifr.ifr_hwaddr.sa_data[0] = 'T';
-       ifr.ifr_hwaddr.sa_data[1] = 'a';
-       ifr.ifr_hwaddr.sa_data[2] = 'p';
-       ifr.ifr_hwaddr.sa_data[3] = '-';
-       ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
-       ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
-       if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
-               RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR) (%s)\n",
-                       dev->data->name, ifr.ifr_name);
-               return -1;
-       }
-
-       /* Set the local application MAC address, needs to be different then
-        * the host based MAC address.
-        */
-       ifr.ifr_hwaddr.sa_data[0] = 'd';
-       ifr.ifr_hwaddr.sa_data[1] = 'n';
-       ifr.ifr_hwaddr.sa_data[2] = 'e';
-       ifr.ifr_hwaddr.sa_data[3] = 't';
-       ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
-       ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
-       rte_memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
-
-       return 0;
-}
-
 static int
 eth_dev_tap_create(const char *name, char *tap_name)
 {
@@ -568,28 +793,27 @@ eth_dev_tap_create(const char *name, char *tap_name)
        struct rte_eth_dev *dev = NULL;
        struct pmd_internals *pmd = NULL;
        struct rte_eth_dev_data *data = NULL;
-       int i, fd = -1;
+       int i;
 
-       RTE_LOG(INFO, PMD,
-               "%s: Create TAP Ethernet device with %d queues on numa %u\n",
-                name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
+       RTE_LOG(DEBUG, PMD, "  TAP device on numa %u\n", rte_socket_id());
 
        data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
        if (!data) {
-               RTE_LOG(INFO, PMD, "Failed to allocate data\n");
+               RTE_LOG(ERR, PMD, "TAP Failed to allocate data\n");
                goto error_exit;
        }
 
        pmd = rte_zmalloc_socket(tap_name, sizeof(*pmd), 0, numa_node);
        if (!pmd) {
-               RTE_LOG(INFO, PMD, "Unable to allocate internal struct\n");
+               RTE_LOG(ERR, PMD, "TAP Unable to allocate internal struct\n");
                goto error_exit;
        }
 
-       /* Use the name and not the tap_name */
-       dev = rte_eth_dev_allocate(tap_name);
+       /* name in allocation and data->name must be consistent */
+       snprintf(data->name, sizeof(data->name), "%s", name);
+       dev = rte_eth_dev_allocate(name);
        if (!dev) {
-               RTE_LOG(INFO, PMD, "Unable to allocate device struct\n");
+               RTE_LOG(ERR, PMD, "TAP Unable to allocate device struct\n");
                goto error_exit;
        }
 
@@ -597,9 +821,18 @@ eth_dev_tap_create(const char *name, char *tap_name)
 
        pmd->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
 
+       pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0);
+       if (pmd->ioctl_sock == -1) {
+               RTE_LOG(ERR, PMD,
+                       "TAP Unable to get a socket for management: %s\n",
+                       strerror(errno));
+               goto error_exit;
+       }
+
        /* Setup some default values */
        data->dev_private = pmd;
        data->port_id = dev->data->port_id;
+       data->mtu = dev->data->mtu;
        data->dev_flags = RTE_ETH_DEV_DETACHABLE;
        data->kdrv = RTE_KDRV_NONE;
        data->drv_name = pmd_tap_drv.driver.name;
@@ -615,36 +848,17 @@ eth_dev_tap_create(const char *name, char *tap_name)
        dev->driver = NULL;
        dev->rx_pkt_burst = pmd_rx_burst;
        dev->tx_pkt_burst = pmd_tx_burst;
-       snprintf(dev->data->name, sizeof(dev->data->name), "%s", name);
-
-       /* Create the first Tap device */
-       fd = tun_alloc(tap_name);
-       if (fd < 0) {
-               RTE_LOG(INFO, PMD, "tun_alloc() failed\n");
-               goto error_exit;
-       }
 
-       /* Presetup the fds to -1 as being not working */
+       /* Presetup the fds to -1 as being not valid */
        for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-               pmd->fds[i] = -1;
                pmd->rxq[i].fd = -1;
                pmd->txq[i].fd = -1;
        }
 
-       /* Take the TUN/TAP fd and place in the first location */
-       pmd->rxq[0].fd = fd;
-       pmd->txq[0].fd = fd;
-       pmd->fds[0] = fd;
-
-       if (pmd_mac_address(fd, dev, &pmd->eth_addr) < 0) {
-               RTE_LOG(INFO, PMD, "Unable to get MAC address\n");
-               goto error_exit;
-       }
-
        return 0;
 
 error_exit:
-       RTE_PMD_DEBUG_TRACE("Unable to initialize %s\n", name);
+       RTE_LOG(DEBUG, PMD, "TAP Unable to initialize %s\n", name);
 
        rte_free(data);
        rte_free(pmd);
@@ -694,11 +908,8 @@ rte_pmd_tap_probe(const char *name, const char *params)
        snprintf(tap_name, sizeof(tap_name), "%s%d",
                 DEFAULT_TAP_NAME, tap_unit++);
 
-       RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s as %s\n",
-               name, tap_name);
-
        if (params && (params[0] != '\0')) {
-               RTE_LOG(INFO, PMD, "paramaters (%s)\n", params);
+               RTE_LOG(DEBUG, PMD, "paramaters (%s)\n", params);
 
                kvlist = rte_kvargs_parse(params, valid_arguments);
                if (kvlist) {
@@ -723,11 +934,14 @@ rte_pmd_tap_probe(const char *name, const char *params)
        }
        pmd_link.link_speed = speed;
 
+       RTE_LOG(NOTICE, PMD, "Initializing pmd_tap for %s as %s\n",
+               name, tap_name);
+
        ret = eth_dev_tap_create(name, tap_name);
 
 leave:
        if (ret == -1) {
-               RTE_LOG(INFO, PMD, "Failed to create pmd for %s as %s\n",
+               RTE_LOG(ERR, PMD, "Failed to create pmd for %s as %s\n",
                        name, tap_name);
                tap_unit--;             /* Restore the unit number */
        }
@@ -745,7 +959,7 @@ rte_pmd_tap_remove(const char *name)
        struct pmd_internals *internals;
        int i;
 
-       RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
+       RTE_LOG(DEBUG, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
                rte_socket_id());
 
        /* find the ethdev entry */
@@ -755,9 +969,10 @@ rte_pmd_tap_remove(const char *name)
 
        internals = eth_dev->data->dev_private;
        for (i = 0; i < internals->nb_queues; i++)
-               if (internals->fds[i] != -1)
-                       close(internals->fds[i]);
+               if (internals->rxq[i].fd != -1)
+                       close(internals->rxq[i].fd);
 
+       close(internals->ioctl_sock);
        rte_free(eth_dev->data->dev_private);
        rte_free(eth_dev->data);