common/cnxk: enable backpressure on CPT with inline inbound
[dpdk.git] / drivers / net / tap / rte_eth_tap.c
index 71be763..37ac18f 100644 (file)
@@ -7,8 +7,8 @@
 #include <rte_byteorder.h>
 #include <rte_common.h>
 #include <rte_mbuf.h>
-#include <rte_ethdev_driver.h>
-#include <rte_ethdev_vdev.h>
+#include <ethdev_driver.h>
+#include <ethdev_vdev.h>
 #include <rte_malloc.h>
 #include <rte_bus_vdev.h>
 #include <rte_kvargs.h>
@@ -18,8 +18,8 @@
 #include <rte_string_fns.h>
 #include <rte_ethdev.h>
 #include <rte_errno.h>
+#include <rte_cycles.h>
 
-#include <assert.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
 
 #define TAP_IOV_DEFAULT_MAX 1024
 
+#define TAP_RX_OFFLOAD (RTE_ETH_RX_OFFLOAD_SCATTER |   \
+                       RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \
+                       RTE_ETH_RX_OFFLOAD_UDP_CKSUM |  \
+                       RTE_ETH_RX_OFFLOAD_TCP_CKSUM)
+
+#define TAP_TX_OFFLOAD (RTE_ETH_TX_OFFLOAD_MULTI_SEGS |        \
+                       RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | \
+                       RTE_ETH_TX_OFFLOAD_UDP_CKSUM |  \
+                       RTE_ETH_TX_OFFLOAD_TCP_CKSUM |  \
+                       RTE_ETH_TX_OFFLOAD_TCP_TSO)
+
 static int tap_devices_count;
-static struct rte_vdev_driver pmd_tap_drv;
-static struct rte_vdev_driver pmd_tun_drv;
+
+static const char *tuntap_types[ETH_TUNTAP_TYPE_MAX] = {
+       "UNKNOWN", "TUN", "TAP"
+};
 
 static const char *valid_arguments[] = {
        ETH_TAP_IFACE_ARG,
@@ -84,10 +97,10 @@ static const char *valid_arguments[] = {
 static volatile uint32_t tap_trigger;  /* Rx trigger */
 
 static struct rte_eth_link pmd_link = {
-       .link_speed = ETH_SPEED_NUM_10G,
-       .link_duplex = ETH_LINK_FULL_DUPLEX,
-       .link_status = ETH_LINK_DOWN,
-       .link_autoneg = ETH_LINK_FIXED,
+       .link_speed = RTE_ETH_SPEED_NUM_10G,
+       .link_duplex = RTE_ETH_LINK_FULL_DUPLEX,
+       .link_status = RTE_ETH_LINK_DOWN,
+       .link_autoneg = RTE_ETH_LINK_FIXED,
 };
 
 static void
@@ -136,7 +149,7 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive)
 #ifdef IFF_MULTI_QUEUE
        unsigned int features;
 #endif
-       int fd;
+       int fd, signo, flags;
 
        memset(&ifr, 0, sizeof(struct ifreq));
 
@@ -201,52 +214,87 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive)
                }
        }
 
+       flags = fcntl(fd, F_GETFL);
+       if (flags == -1) {
+               TAP_LOG(WARNING,
+                       "Unable to get %s current flags\n",
+                       ifr.ifr_name);
+               goto error;
+       }
+
        /* Always set the file descriptor to non-blocking */
-       if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+       flags |= O_NONBLOCK;
+       if (fcntl(fd, F_SETFL, flags) < 0) {
                TAP_LOG(WARNING,
                        "Unable to set %s to nonblocking: %s",
                        ifr.ifr_name, strerror(errno));
                goto error;
        }
 
-       /* Set up trigger to optimize empty Rx bursts */
-       errno = 0;
-       do {
+       /* Find a free realtime signal */
+       for (signo = SIGRTMIN + 1; signo < SIGRTMAX; signo++) {
                struct sigaction sa;
-               int flags = fcntl(fd, F_GETFL);
 
-               if (flags == -1 || sigaction(SIGIO, NULL, &sa) == -1)
+               if (sigaction(signo, NULL, &sa) == -1) {
+                       TAP_LOG(WARNING,
+                               "Unable to get current rt-signal %d handler",
+                               signo);
+                       goto error;
+               }
+
+               /* Already have the handler we want on this signal  */
+               if (sa.sa_handler == tap_trigger_cb)
                        break;
-               if (sa.sa_handler != tap_trigger_cb) {
-                       /*
-                        * Make sure SIGIO is not already taken. This is done
-                        * as late as possible to leave the application a
-                        * chance to set up its own signal handler first.
-                        */
-                       if (sa.sa_handler != SIG_IGN &&
-                           sa.sa_handler != SIG_DFL) {
-                               errno = EBUSY;
-                               break;
-                       }
-                       sa = (struct sigaction){
-                               .sa_flags = SA_RESTART,
-                               .sa_handler = tap_trigger_cb,
-                       };
-                       if (sigaction(SIGIO, &sa, NULL) == -1)
-                               break;
+
+               /* Is handler in use by application */
+               if (sa.sa_handler != SIG_DFL) {
+                       TAP_LOG(DEBUG,
+                               "Skipping used rt-signal %d", signo);
+                       continue;
                }
-               /* Enable SIGIO on file descriptor */
-               fcntl(fd, F_SETFL, flags | O_ASYNC);
-               fcntl(fd, F_SETOWN, getpid());
-       } while (0);
 
-       if (errno) {
+               sa = (struct sigaction) {
+                       .sa_flags = SA_RESTART,
+                       .sa_handler = tap_trigger_cb,
+               };
+
+               if (sigaction(signo, &sa, NULL) == -1) {
+                       TAP_LOG(WARNING,
+                               "Unable to set rt-signal %d handler\n", signo);
+                       goto error;
+               }
+
+               /* Found a good signal to use */
+               TAP_LOG(DEBUG,
+                       "Using rt-signal %d", signo);
+               break;
+       }
+
+       if (signo == SIGRTMAX) {
+               TAP_LOG(WARNING, "All rt-signals are in use\n");
+
                /* Disable trigger globally in case of error */
                tap_trigger = 0;
-               TAP_LOG(WARNING, "Rx trigger disabled: %s",
-                       strerror(errno));
-       }
+               TAP_LOG(NOTICE, "No Rx trigger signal available\n");
+       } else {
+               /* Enable signal on file descriptor */
+               if (fcntl(fd, F_SETSIG, signo) < 0) {
+                       TAP_LOG(WARNING, "Unable to set signo %d for fd %d: %s",
+                               signo, fd, strerror(errno));
+                       goto error;
+               }
+               if (fcntl(fd, F_SETFL, flags | O_ASYNC) < 0) {
+                       TAP_LOG(WARNING, "Unable to set fcntl flags: %s",
+                               strerror(errno));
+                       goto error;
+               }
 
+               if (fcntl(fd, F_SETOWN, getpid()) < 0) {
+                       TAP_LOG(WARNING, "Unable to set fcntl owner: %s",
+                               strerror(errno));
+                       goto error;
+               }
+       }
        return fd;
 
 error:
@@ -266,21 +314,21 @@ tap_verify_csum(struct rte_mbuf *mbuf)
        uint16_t cksum = 0;
        void *l3_hdr;
        void *l4_hdr;
+       struct rte_udp_hdr *udp_hdr;
 
        if (l2 == RTE_PTYPE_L2_ETHER_VLAN)
                l2_len += 4;
        else if (l2 == RTE_PTYPE_L2_ETHER_QINQ)
                l2_len += 8;
        /* Don't verify checksum for packets with discontinuous L2 header */
-       if (unlikely(l2_len + sizeof(struct ipv4_hdr) >
+       if (unlikely(l2_len + sizeof(struct rte_ipv4_hdr) >
                     rte_pktmbuf_data_len(mbuf)))
                return;
        l3_hdr = rte_pktmbuf_mtod_offset(mbuf, void *, l2_len);
        if (l3 == RTE_PTYPE_L3_IPV4 || l3 == RTE_PTYPE_L3_IPV4_EXT) {
-               struct ipv4_hdr *iph = l3_hdr;
+               struct rte_ipv4_hdr *iph = l3_hdr;
 
-               /* ihl contains the number of 4-byte words in the header */
-               l3_len = 4 * (iph->version_ihl & 0xf);
+               l3_len = rte_ipv4_hdr_len(iph);
                if (unlikely(l2_len + l3_len > rte_pktmbuf_data_len(mbuf)))
                        return;
                /* check that the total length reported by header is not
@@ -292,12 +340,12 @@ tap_verify_csum(struct rte_mbuf *mbuf)
 
                cksum = ~rte_raw_cksum(iph, l3_len);
                mbuf->ol_flags |= cksum ?
-                       PKT_RX_IP_CKSUM_BAD :
-                       PKT_RX_IP_CKSUM_GOOD;
+                       RTE_MBUF_F_RX_IP_CKSUM_BAD :
+                       RTE_MBUF_F_RX_IP_CKSUM_GOOD;
        } else if (l3 == RTE_PTYPE_L3_IPV6) {
-               struct ipv6_hdr *iph = l3_hdr;
+               struct rte_ipv6_hdr *iph = l3_hdr;
 
-               l3_len = sizeof(struct ipv6_hdr);
+               l3_len = sizeof(struct rte_ipv6_hdr);
                /* check that the total length reported by header is not
                 * greater than the total received size
                 */
@@ -305,40 +353,59 @@ tap_verify_csum(struct rte_mbuf *mbuf)
                                rte_pktmbuf_data_len(mbuf))
                        return;
        } else {
-               /* IPv6 extensions are not supported */
+               /* - RTE_PTYPE_L3_IPV4_EXT_UNKNOWN cannot happen because
+                *   mbuf->packet_type is filled by rte_net_get_ptype() which
+                *   never returns this value.
+                * - IPv6 extensions are not supported.
+                */
                return;
        }
        if (l4 == RTE_PTYPE_L4_UDP || l4 == RTE_PTYPE_L4_TCP) {
+               int cksum_ok;
+
                l4_hdr = rte_pktmbuf_mtod_offset(mbuf, void *, l2_len + l3_len);
                /* Don't verify checksum for multi-segment packets. */
                if (mbuf->nb_segs > 1)
                        return;
-               if (l3 == RTE_PTYPE_L3_IPV4)
-                       cksum = ~rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
-               else if (l3 == RTE_PTYPE_L3_IPV6)
-                       cksum = ~rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
-               mbuf->ol_flags |= cksum ?
-                       PKT_RX_L4_CKSUM_BAD :
-                       PKT_RX_L4_CKSUM_GOOD;
+               if (l3 == RTE_PTYPE_L3_IPV4 || l3 == RTE_PTYPE_L3_IPV4_EXT) {
+                       if (l4 == RTE_PTYPE_L4_UDP) {
+                               udp_hdr = (struct rte_udp_hdr *)l4_hdr;
+                               if (udp_hdr->dgram_cksum == 0) {
+                                       /*
+                                        * For IPv4, a zero UDP checksum
+                                        * indicates that the sender did not
+                                        * generate one [RFC 768].
+                                        */
+                                       mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_NONE;
+                                       return;
+                               }
+                       }
+                       cksum_ok = !rte_ipv4_udptcp_cksum_verify(l3_hdr,
+                                                                l4_hdr);
+               } else { /* l3 == RTE_PTYPE_L3_IPV6, checked above */
+                       cksum_ok = !rte_ipv6_udptcp_cksum_verify(l3_hdr,
+                                                                l4_hdr);
+               }
+               mbuf->ol_flags |= cksum_ok ?
+                       RTE_MBUF_F_RX_L4_CKSUM_GOOD : RTE_MBUF_F_RX_L4_CKSUM_BAD;
        }
 }
 
-static uint64_t
-tap_rx_offload_get_port_capa(void)
+static void
+tap_rxq_pool_free(struct rte_mbuf *pool)
 {
-       /*
-        * No specific port Rx offload capabilities.
-        */
-       return 0;
-}
+       struct rte_mbuf *mbuf = pool;
+       uint16_t nb_segs = 1;
 
-static uint64_t
-tap_rx_offload_get_queue_capa(void)
-{
-       return DEV_RX_OFFLOAD_SCATTER |
-              DEV_RX_OFFLOAD_IPV4_CKSUM |
-              DEV_RX_OFFLOAD_UDP_CKSUM |
-              DEV_RX_OFFLOAD_TCP_CKSUM;
+       if (mbuf == NULL)
+               return;
+
+       while (mbuf->next) {
+               mbuf = mbuf->next;
+               nb_segs++;
+       }
+       pool->nb_segs = nb_segs;
+       rte_pktmbuf_free(pool);
 }
 
 /* Callback to handle the rx burst of packets to the correct interface and
@@ -355,10 +422,8 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
        if (trigger == rxq->trigger_seen)
                return 0;
-       if (trigger)
-               rxq->trigger_seen = trigger;
+
        process_private = rte_eth_devices[rxq->in_port].process_private;
-       rte_compiler_barrier();
        for (num_rx = 0; num_rx < nb_pkts; ) {
                struct rte_mbuf *mbuf = rxq->pool;
                struct rte_mbuf *seg = NULL;
@@ -368,7 +433,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
                len = readv(process_private->rxq_fds[rxq->queue_id],
                        *rxq->iovecs,
-                       1 + (rxq->rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ?
+                       1 + (rxq->rxmode->offloads & RTE_ETH_RX_OFFLOAD_SCATTER ?
                             rxq->nb_rx_desc : 1));
                if (len < (int)sizeof(struct tun_pi))
                        break;
@@ -393,7 +458,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                                        goto end;
 
                                seg->next = NULL;
-                               rte_pktmbuf_free(mbuf);
+                               tap_rxq_pool_free(mbuf);
 
                                goto end;
                        }
@@ -424,7 +489,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                seg->next = NULL;
                mbuf->packet_type = rte_net_get_ptype(mbuf, NULL,
                                                      RTE_PTYPE_ALL_MASK);
-               if (rxq->rxmode->offloads & DEV_RX_OFFLOAD_CHECKSUM)
+               if (rxq->rxmode->offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM)
                        tap_verify_csum(mbuf);
 
                /* account for the receive frame */
@@ -435,26 +500,10 @@ end:
        rxq->stats.ipackets += num_rx;
        rxq->stats.ibytes += num_rx_bytes;
 
-       return num_rx;
-}
-
-static uint64_t
-tap_tx_offload_get_port_capa(void)
-{
-       /*
-        * No specific port Tx offload capabilities.
-        */
-       return 0;
-}
+       if (trigger && num_rx < nb_pkts)
+               rxq->trigger_seen = trigger;
 
-static uint64_t
-tap_tx_offload_get_queue_capa(void)
-{
-       return DEV_TX_OFFLOAD_MULTI_SEGS |
-              DEV_TX_OFFLOAD_IPV4_CKSUM |
-              DEV_TX_OFFLOAD_UDP_CKSUM |
-              DEV_TX_OFFLOAD_TCP_CKSUM |
-              DEV_TX_OFFLOAD_TCP_TSO;
+       return num_rx;
 }
 
 /* Finalize l4 checksum calculation */
@@ -495,26 +544,26 @@ tap_tx_l3_cksum(char *packet, uint64_t ol_flags, unsigned int l2_len,
 {
        void *l3_hdr = packet + l2_len;
 
-       if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_IPV4)) {
-               struct ipv4_hdr *iph = l3_hdr;
+       if (ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_IPV4)) {
+               struct rte_ipv4_hdr *iph = l3_hdr;
                uint16_t cksum;
 
                iph->hdr_checksum = 0;
                cksum = rte_raw_cksum(iph, l3_len);
                iph->hdr_checksum = (cksum == 0xffff) ? cksum : ~cksum;
        }
-       if (ol_flags & PKT_TX_L4_MASK) {
+       if (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
                void *l4_hdr;
 
                l4_hdr = packet + l2_len + l3_len;
-               if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM)
-                       *l4_cksum = &((struct udp_hdr *)l4_hdr)->dgram_cksum;
-               else if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM)
-                       *l4_cksum = &((struct tcp_hdr *)l4_hdr)->cksum;
+               if ((ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_UDP_CKSUM)
+                       *l4_cksum = &((struct rte_udp_hdr *)l4_hdr)->dgram_cksum;
+               else if ((ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM)
+                       *l4_cksum = &((struct rte_tcp_hdr *)l4_hdr)->cksum;
                else
                        return;
                **l4_cksum = 0;
-               if (ol_flags & PKT_TX_IPV4)
+               if (ol_flags & RTE_MBUF_F_TX_IPV4)
                        *l4_phdr_cksum = rte_ipv4_phdr_cksum(l3_hdr, 0);
                else
                        *l4_phdr_cksum = rte_ipv6_phdr_cksum(l3_hdr, 0);
@@ -522,7 +571,7 @@ tap_tx_l3_cksum(char *packet, uint64_t ol_flags, unsigned int l2_len,
        }
 }
 
-static inline void
+static inline int
 tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
                        struct rte_mbuf **pmbufs,
                        uint16_t *num_packets, unsigned long *num_tx_bytes)
@@ -565,9 +614,9 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
                        char *buff_data = rte_pktmbuf_mtod(seg, void *);
                        proto = (*buff_data & 0xf0);
                        pi.proto = (proto == 0x40) ?
-                               rte_cpu_to_be_16(ETHER_TYPE_IPv4) :
+                               rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) :
                                ((proto == 0x60) ?
-                                       rte_cpu_to_be_16(ETHER_TYPE_IPv6) :
+                                       rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) :
                                        0x00);
                }
 
@@ -578,9 +627,9 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
 
                nb_segs = mbuf->nb_segs;
                if (txq->csum &&
-                   ((mbuf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_IPV4) ||
-                    (mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM ||
-                    (mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM))) {
+                   ((mbuf->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_IPV4) ||
+                     (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_UDP_CKSUM ||
+                     (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM))) {
                        is_cksum = 1;
 
                        /* Support only packets with at least layer 4
@@ -589,7 +638,7 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
                        seg_len = rte_pktmbuf_data_len(mbuf);
                        l234_hlen = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
                        if (seg_len < l234_hlen)
-                               break;
+                               return -1;
 
                        /* To change checksums, work on a * copy of l2, l3
                         * headers + l4 pseudo header
@@ -635,10 +684,12 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
                /* copy the tx frame data */
                n = writev(process_private->txq_fds[txq->queue_id], iovecs, j);
                if (n <= 0)
-                       break;
+                       return -1;
+
                (*num_packets)++;
                (*num_tx_bytes) += rte_pktmbuf_pkt_len(mbuf);
        }
+       return 0;
 }
 
 /* Callback to handle sending packets from the tap interface
@@ -657,27 +708,25 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                return 0;
 
        struct rte_mbuf *gso_mbufs[MAX_GSO_MBUFS];
-       max_size = *txq->mtu + (ETHER_HDR_LEN + ETHER_CRC_LEN + 4);
+       max_size = *txq->mtu + (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + 4);
        for (i = 0; i < nb_pkts; i++) {
                struct rte_mbuf *mbuf_in = bufs[num_tx];
                struct rte_mbuf **mbuf;
                uint16_t num_mbufs = 0;
                uint16_t tso_segsz = 0;
                int ret;
+               int num_tso_mbufs;
                uint16_t hdrs_len;
-               int j;
                uint64_t tso;
 
-               tso = mbuf_in->ol_flags & PKT_TX_TCP_SEG;
+               tso = mbuf_in->ol_flags & RTE_MBUF_F_TX_TCP_SEG;
                if (tso) {
                        struct rte_gso_ctx *gso_ctx = &txq->gso_ctx;
 
-                       assert(gso_ctx != NULL);
-
                        /* TCP segmentation implies TCP checksum offload */
-                       mbuf_in->ol_flags |= PKT_TX_TCP_CKSUM;
+                       mbuf_in->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM;
 
-                       /* gso size is calculated without ETHER_CRC_LEN */
+                       /* gso size is calculated without RTE_ETHER_CRC_LEN */
                        hdrs_len = mbuf_in->l2_len + mbuf_in->l3_len +
                                        mbuf_in->l4_len;
                        tso_segsz = mbuf_in->tso_segsz + hdrs_len;
@@ -687,43 +736,59 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                                break;
                        }
                        gso_ctx->gso_size = tso_segsz;
-                       ret = rte_gso_segment(mbuf_in, /* packet to segment */
+                       /* 'mbuf_in' packet to segment */
+                       num_tso_mbufs = rte_gso_segment(mbuf_in,
                                gso_ctx, /* gso control block */
                                (struct rte_mbuf **)&gso_mbufs, /* out mbufs */
                                RTE_DIM(gso_mbufs)); /* max tso mbufs */
 
                        /* ret contains the number of new created mbufs */
-                       if (ret < 0)
+                       if (num_tso_mbufs < 0)
                                break;
 
-                       mbuf = gso_mbufs;
-                       num_mbufs = ret;
+                       if (num_tso_mbufs >= 1) {
+                               mbuf = gso_mbufs;
+                               num_mbufs = num_tso_mbufs;
+                       } else {
+                               /* 0 means it can be transmitted directly
+                                * without gso.
+                                */
+                               mbuf = &mbuf_in;
+                               num_mbufs = 1;
+                       }
                } else {
                        /* stats.errs will be incremented */
                        if (rte_pktmbuf_pkt_len(mbuf_in) > max_size)
                                break;
 
                        /* ret 0 indicates no new mbufs were created */
-                       ret = 0;
+                       num_tso_mbufs = 0;
                        mbuf = &mbuf_in;
                        num_mbufs = 1;
                }
 
-               tap_write_mbufs(txq, num_mbufs, mbuf,
+               ret = tap_write_mbufs(txq, num_mbufs, mbuf,
                                &num_packets, &num_tx_bytes);
+               if (ret == -1) {
+                       txq->stats.errs++;
+                       /* free tso mbufs */
+                       if (num_tso_mbufs > 0)
+                               rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs);
+                       break;
+               }
                num_tx++;
                /* free original mbuf */
                rte_pktmbuf_free(mbuf_in);
                /* free tso mbufs */
-               for (j = 0; j < ret; j++)
-                       rte_pktmbuf_free(mbuf[j]);
+               if (num_tso_mbufs > 0)
+                       rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs);
        }
 
        txq->stats.opackets += num_packets;
        txq->stats.errs += nb_pkts - num_tx;
        txq->stats.obytes += num_tx_bytes;
 
-       return num_packets;
+       return num_tx;
 }
 
 static const char *
@@ -779,7 +844,7 @@ apply:
        case SIOCSIFMTU:
                break;
        default:
-               RTE_LOG(WARNING, PMD, "%s: ioctl() called with wrong arg\n",
+               TAP_LOG(WARNING, "%s: ioctl() called with wrong arg",
                        pmd->name);
                return -EINVAL;
        }
@@ -801,7 +866,7 @@ tap_link_set_down(struct rte_eth_dev *dev)
        struct pmd_internals *pmd = dev->data->dev_private;
        struct ifreq ifr = { .ifr_flags = IFF_UP };
 
-       dev->data->dev_link.link_status = ETH_LINK_DOWN;
+       dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
        return tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_ONLY);
 }
 
@@ -811,7 +876,7 @@ tap_link_set_up(struct rte_eth_dev *dev)
        struct pmd_internals *pmd = dev->data->dev_private;
        struct ifreq ifr = { .ifr_flags = IFF_UP };
 
-       dev->data->dev_link.link_status = ETH_LINK_UP;
+       dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
        return tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
 }
 
@@ -838,7 +903,7 @@ tap_dev_start(struct rte_eth_dev *dev)
 
 /* This function gets called when the current port gets stopped.
  */
-static void
+static int
 tap_dev_stop(struct rte_eth_dev *dev)
 {
        int i;
@@ -850,6 +915,8 @@ tap_dev_stop(struct rte_eth_dev *dev)
 
        tap_intr_handle_set(dev, 0);
        tap_link_set_down(dev);
+
+       return 0;
 }
 
 static int
@@ -889,58 +956,59 @@ tap_dev_speed_capa(void)
        uint32_t speed = pmd_link.link_speed;
        uint32_t capa = 0;
 
-       if (speed >= ETH_SPEED_NUM_10M)
-               capa |= ETH_LINK_SPEED_10M;
-       if (speed >= ETH_SPEED_NUM_100M)
-               capa |= ETH_LINK_SPEED_100M;
-       if (speed >= ETH_SPEED_NUM_1G)
-               capa |= ETH_LINK_SPEED_1G;
-       if (speed >= ETH_SPEED_NUM_5G)
-               capa |= ETH_LINK_SPEED_2_5G;
-       if (speed >= ETH_SPEED_NUM_5G)
-               capa |= ETH_LINK_SPEED_5G;
-       if (speed >= ETH_SPEED_NUM_10G)
-               capa |= ETH_LINK_SPEED_10G;
-       if (speed >= ETH_SPEED_NUM_20G)
-               capa |= ETH_LINK_SPEED_20G;
-       if (speed >= ETH_SPEED_NUM_25G)
-               capa |= ETH_LINK_SPEED_25G;
-       if (speed >= ETH_SPEED_NUM_40G)
-               capa |= ETH_LINK_SPEED_40G;
-       if (speed >= ETH_SPEED_NUM_50G)
-               capa |= ETH_LINK_SPEED_50G;
-       if (speed >= ETH_SPEED_NUM_56G)
-               capa |= ETH_LINK_SPEED_56G;
-       if (speed >= ETH_SPEED_NUM_100G)
-               capa |= ETH_LINK_SPEED_100G;
+       if (speed >= RTE_ETH_SPEED_NUM_10M)
+               capa |= RTE_ETH_LINK_SPEED_10M;
+       if (speed >= RTE_ETH_SPEED_NUM_100M)
+               capa |= RTE_ETH_LINK_SPEED_100M;
+       if (speed >= RTE_ETH_SPEED_NUM_1G)
+               capa |= RTE_ETH_LINK_SPEED_1G;
+       if (speed >= RTE_ETH_SPEED_NUM_5G)
+               capa |= RTE_ETH_LINK_SPEED_2_5G;
+       if (speed >= RTE_ETH_SPEED_NUM_5G)
+               capa |= RTE_ETH_LINK_SPEED_5G;
+       if (speed >= RTE_ETH_SPEED_NUM_10G)
+               capa |= RTE_ETH_LINK_SPEED_10G;
+       if (speed >= RTE_ETH_SPEED_NUM_20G)
+               capa |= RTE_ETH_LINK_SPEED_20G;
+       if (speed >= RTE_ETH_SPEED_NUM_25G)
+               capa |= RTE_ETH_LINK_SPEED_25G;
+       if (speed >= RTE_ETH_SPEED_NUM_40G)
+               capa |= RTE_ETH_LINK_SPEED_40G;
+       if (speed >= RTE_ETH_SPEED_NUM_50G)
+               capa |= RTE_ETH_LINK_SPEED_50G;
+       if (speed >= RTE_ETH_SPEED_NUM_56G)
+               capa |= RTE_ETH_LINK_SPEED_56G;
+       if (speed >= RTE_ETH_SPEED_NUM_100G)
+               capa |= RTE_ETH_LINK_SPEED_100G;
 
        return capa;
 }
 
-static void
+static int
 tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
        struct pmd_internals *internals = dev->data->dev_private;
 
        dev_info->if_index = internals->if_index;
        dev_info->max_mac_addrs = 1;
-       dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
+       dev_info->max_rx_pktlen = (uint32_t)RTE_ETHER_MAX_VLAN_FRAME_LEN;
        dev_info->max_rx_queues = RTE_PMD_TAP_MAX_QUEUES;
        dev_info->max_tx_queues = RTE_PMD_TAP_MAX_QUEUES;
        dev_info->min_rx_bufsize = 0;
        dev_info->speed_capa = tap_dev_speed_capa();
-       dev_info->rx_queue_offload_capa = tap_rx_offload_get_queue_capa();
-       dev_info->rx_offload_capa = tap_rx_offload_get_port_capa() |
-                                   dev_info->rx_queue_offload_capa;
-       dev_info->tx_queue_offload_capa = tap_tx_offload_get_queue_capa();
-       dev_info->tx_offload_capa = tap_tx_offload_get_port_capa() |
-                                   dev_info->tx_queue_offload_capa;
+       dev_info->rx_queue_offload_capa = TAP_RX_OFFLOAD;
+       dev_info->rx_offload_capa = dev_info->rx_queue_offload_capa;
+       dev_info->tx_queue_offload_capa = TAP_TX_OFFLOAD;
+       dev_info->tx_offload_capa = dev_info->tx_queue_offload_capa;
        dev_info->hash_key_size = TAP_RSS_HASH_KEY_SIZE;
        /*
         * limitation: TAP supports all of IP, UDP and TCP hash
         * functions together and not in partial combinations
         */
        dev_info->flow_type_rss_offloads = ~TAP_RSS_HF_MASK;
+       dev_info->dev_capa &= ~RTE_ETH_DEV_CAPA_FLOW_RULE_KEEP;
+
+       return 0;
 }
 
 static int
@@ -970,10 +1038,9 @@ tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
 
        for (i = 0; i < imax; i++) {
                tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
-               tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
                tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
                tx_total += tap_stats->q_opackets[i];
-               tx_err_total += tap_stats->q_errors[i];
+               tx_err_total += pmd->txq[i].stats.errs;
                tx_bytes_total += tap_stats->q_obytes[i];
        }
 
@@ -987,7 +1054,7 @@ tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
        return 0;
 }
 
-static void
+static int
 tap_stats_reset(struct rte_eth_dev *dev)
 {
        int i;
@@ -1003,23 +1070,40 @@ tap_stats_reset(struct rte_eth_dev *dev)
                pmd->txq[i].stats.errs = 0;
                pmd->txq[i].stats.obytes = 0;
        }
+
+       return 0;
 }
 
-static void
+static int
 tap_dev_close(struct rte_eth_dev *dev)
 {
        int i;
        struct pmd_internals *internals = dev->data->dev_private;
        struct pmd_process_private *process_private = dev->process_private;
+       struct rx_queue *rxq;
+
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+               rte_free(dev->process_private);
+               return 0;
+       }
 
        tap_link_set_down(dev);
-       tap_flow_flush(dev, NULL);
-       tap_flow_implicit_flush(internals, NULL);
+       if (internals->nlsk_fd != -1) {
+               tap_flow_flush(dev, NULL);
+               tap_flow_implicit_flush(internals, NULL);
+               tap_nl_final(internals->nlsk_fd);
+               internals->nlsk_fd = -1;
+       }
 
        for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
                if (process_private->rxq_fds[i] != -1) {
+                       rxq = &internals->rxq[i];
                        close(process_private->rxq_fds[i]);
                        process_private->rxq_fds[i] = -1;
+                       tap_rxq_pool_free(rxq->pool);
+                       rte_free(rxq->iovecs);
+                       rxq->pool = NULL;
+                       rxq->iovecs = NULL;
                }
                if (process_private->txq_fds[i] != -1) {
                        close(process_private->txq_fds[i]);
@@ -1029,33 +1113,57 @@ tap_dev_close(struct rte_eth_dev *dev)
 
        if (internals->remote_if_index) {
                /* Restore initial remote state */
-               ioctl(internals->ioctl_sock, SIOCSIFFLAGS,
+               int ret = ioctl(internals->ioctl_sock, SIOCSIFFLAGS,
                                &internals->remote_initial_flags);
+               if (ret)
+                       TAP_LOG(ERR, "restore remote state failed: %d", ret);
+
        }
 
+       rte_mempool_free(internals->gso_ctx_mp);
+       internals->gso_ctx_mp = NULL;
+
        if (internals->ka_fd != -1) {
                close(internals->ka_fd);
                internals->ka_fd = -1;
        }
+
+       /* mac_addrs must not be freed alone because part of dev_private */
+       dev->data->mac_addrs = NULL;
+
+       internals = dev->data->dev_private;
+       TAP_LOG(DEBUG, "Closing %s Ethernet device on numa %u",
+               tuntap_types[internals->type], rte_socket_id());
+
+       if (internals->ioctl_sock != -1) {
+               close(internals->ioctl_sock);
+               internals->ioctl_sock = -1;
+       }
+       rte_free(dev->process_private);
+       if (tap_devices_count == 1)
+               rte_mp_action_unregister(TAP_MP_KEY);
+       tap_devices_count--;
        /*
         * Since TUN device has no more opened file descriptors
         * it will be removed from kernel
         */
+
+       return 0;
 }
 
 static void
-tap_rx_queue_release(void *queue)
+tap_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 {
-       struct rx_queue *rxq = queue;
+       struct rx_queue *rxq = dev->data->rx_queues[qid];
        struct pmd_process_private *process_private;
 
        if (!rxq)
                return;
        process_private = rte_eth_devices[rxq->in_port].process_private;
-       if (process_private->rxq_fds[rxq->queue_id] > 0) {
+       if (process_private->rxq_fds[rxq->queue_id] != -1) {
                close(process_private->rxq_fds[rxq->queue_id]);
                process_private->rxq_fds[rxq->queue_id] = -1;
-               rte_pktmbuf_free(rxq->pool);
+               tap_rxq_pool_free(rxq->pool);
                rte_free(rxq->iovecs);
                rxq->pool = NULL;
                rxq->iovecs = NULL;
@@ -1063,16 +1171,16 @@ tap_rx_queue_release(void *queue)
 }
 
 static void
-tap_tx_queue_release(void *queue)
+tap_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 {
-       struct tx_queue *txq = queue;
+       struct tx_queue *txq = dev->data->tx_queues[qid];
        struct pmd_process_private *process_private;
 
        if (!txq)
                return;
        process_private = rte_eth_devices[txq->out_port].process_private;
 
-       if (process_private->txq_fds[txq->queue_id] > 0) {
+       if (process_private->txq_fds[txq->queue_id] != -1) {
                close(process_private->txq_fds[txq->queue_id]);
                process_private->txq_fds[txq->queue_id] = -1;
        }
@@ -1089,64 +1197,128 @@ tap_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused)
                tap_ioctl(pmd, SIOCGIFFLAGS, &ifr, 0, REMOTE_ONLY);
                if (!(ifr.ifr_flags & IFF_UP) ||
                    !(ifr.ifr_flags & IFF_RUNNING)) {
-                       dev_link->link_status = ETH_LINK_DOWN;
+                       dev_link->link_status = RTE_ETH_LINK_DOWN;
                        return 0;
                }
        }
        tap_ioctl(pmd, SIOCGIFFLAGS, &ifr, 0, LOCAL_ONLY);
        dev_link->link_status =
                ((ifr.ifr_flags & IFF_UP) && (ifr.ifr_flags & IFF_RUNNING) ?
-                ETH_LINK_UP :
-                ETH_LINK_DOWN);
+                RTE_ETH_LINK_UP :
+                RTE_ETH_LINK_DOWN);
        return 0;
 }
 
-static void
+static int
 tap_promisc_enable(struct rte_eth_dev *dev)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
        struct ifreq ifr = { .ifr_flags = IFF_PROMISC };
+       int ret;
+
+       ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
+       if (ret != 0)
+               return ret;
 
-       dev->data->promiscuous = 1;
-       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
-       if (pmd->remote_if_index && !pmd->flow_isolate)
-               tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC);
+       if (pmd->remote_if_index && !pmd->flow_isolate) {
+               dev->data->promiscuous = 1;
+               ret = tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC);
+               if (ret != 0) {
+                       /* Rollback promisc flag */
+                       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE);
+                       /*
+                        * rte_eth_dev_promiscuous_enable() rollback
+                        * dev->data->promiscuous in the case of failure.
+                        */
+                       return ret;
+               }
+       }
+
+       return 0;
 }
 
-static void
+static int
 tap_promisc_disable(struct rte_eth_dev *dev)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
        struct ifreq ifr = { .ifr_flags = IFF_PROMISC };
+       int ret;
+
+       ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE);
+       if (ret != 0)
+               return ret;
 
-       dev->data->promiscuous = 0;
-       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE);
-       if (pmd->remote_if_index && !pmd->flow_isolate)
-               tap_flow_implicit_destroy(pmd, TAP_REMOTE_PROMISC);
+       if (pmd->remote_if_index && !pmd->flow_isolate) {
+               dev->data->promiscuous = 0;
+               ret = tap_flow_implicit_destroy(pmd, TAP_REMOTE_PROMISC);
+               if (ret != 0) {
+                       /* Rollback promisc flag */
+                       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
+                       /*
+                        * rte_eth_dev_promiscuous_disable() rollback
+                        * dev->data->promiscuous in the case of failure.
+                        */
+                       return ret;
+               }
+       }
+
+       return 0;
 }
 
-static void
+static int
 tap_allmulti_enable(struct rte_eth_dev *dev)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
        struct ifreq ifr = { .ifr_flags = IFF_ALLMULTI };
+       int ret;
 
-       dev->data->all_multicast = 1;
-       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
-       if (pmd->remote_if_index && !pmd->flow_isolate)
-               tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI);
+       ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
+       if (ret != 0)
+               return ret;
+
+       if (pmd->remote_if_index && !pmd->flow_isolate) {
+               dev->data->all_multicast = 1;
+               ret = tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI);
+               if (ret != 0) {
+                       /* Rollback allmulti flag */
+                       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE);
+                       /*
+                        * rte_eth_dev_allmulticast_enable() rollback
+                        * dev->data->all_multicast in the case of failure.
+                        */
+                       return ret;
+               }
+       }
+
+       return 0;
 }
 
-static void
+static int
 tap_allmulti_disable(struct rte_eth_dev *dev)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
        struct ifreq ifr = { .ifr_flags = IFF_ALLMULTI };
+       int ret;
+
+       ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE);
+       if (ret != 0)
+               return ret;
+
+       if (pmd->remote_if_index && !pmd->flow_isolate) {
+               dev->data->all_multicast = 0;
+               ret = tap_flow_implicit_destroy(pmd, TAP_REMOTE_ALLMULTI);
+               if (ret != 0) {
+                       /* Rollback allmulti flag */
+                       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
+                       /*
+                        * rte_eth_dev_allmulticast_disable() rollback
+                        * dev->data->all_multicast in the case of failure.
+                        */
+                       return ret;
+               }
+       }
 
-       dev->data->all_multicast = 0;
-       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE);
-       if (pmd->remote_if_index && !pmd->flow_isolate)
-               tap_flow_implicit_destroy(pmd, TAP_REMOTE_ALLMULTI);
+       return 0;
 }
 
 static int
@@ -1163,7 +1335,7 @@ tap_mac_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
                return -ENOTSUP;
        }
 
-       if (is_zero_ether_addr(mac_addr)) {
+       if (rte_is_zero_ether_addr(mac_addr)) {
                TAP_LOG(ERR, "%s: can't set an empty MAC address",
                        dev->device->name);
                return -EINVAL;
@@ -1172,23 +1344,24 @@ tap_mac_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
        ret = tap_ioctl(pmd, SIOCGIFHWADDR, &ifr, 0, LOCAL_ONLY);
        if (ret < 0)
                return ret;
-       if (is_same_ether_addr((struct rte_ether_addr *)&ifr.ifr_hwaddr.sa_data,
-                              mac_addr))
+       if (rte_is_same_ether_addr(
+                       (struct rte_ether_addr *)&ifr.ifr_hwaddr.sa_data,
+                       mac_addr))
                return 0;
        /* Check the current MAC address on the remote */
        ret = tap_ioctl(pmd, SIOCGIFHWADDR, &ifr, 0, REMOTE_ONLY);
        if (ret < 0)
                return ret;
-       if (!is_same_ether_addr(
+       if (!rte_is_same_ether_addr(
                        (struct rte_ether_addr *)&ifr.ifr_hwaddr.sa_data,
                        mac_addr))
                mode = LOCAL_AND_REMOTE;
        ifr.ifr_hwaddr.sa_family = AF_LOCAL;
-       rte_memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, ETHER_ADDR_LEN);
+       rte_memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, RTE_ETHER_ADDR_LEN);
        ret = tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 1, mode);
        if (ret < 0)
                return ret;
-       rte_memcpy(&pmd->eth_addr, mac_addr, ETHER_ADDR_LEN);
+       rte_memcpy(&pmd->eth_addr, mac_addr, RTE_ETHER_ADDR_LEN);
        if (pmd->remote_if_index && !pmd->flow_isolate) {
                /* Replace MAC redirection rule after a MAC change */
                ret = tap_flow_implicit_destroy(pmd, TAP_REMOTE_LOCAL_MAC);
@@ -1215,33 +1388,40 @@ tap_gso_ctx_setup(struct rte_gso_ctx *gso_ctx, struct rte_eth_dev *dev)
 {
        uint32_t gso_types;
        char pool_name[64];
-
-       /*
-        * Create private mbuf pool with TAP_GSO_MBUF_SEG_SIZE bytes
-        * size per mbuf use this pool for both direct and indirect mbufs
-        */
-
-       struct rte_mempool *mp;      /* Mempool for GSO packets */
+       struct pmd_internals *pmd = dev->data->dev_private;
+       int ret;
 
        /* initialize GSO context */
-       gso_types = DEV_TX_OFFLOAD_TCP_TSO;
-       snprintf(pool_name, sizeof(pool_name), "mp_%s", dev->device->name);
-       mp = rte_mempool_lookup((const char *)pool_name);
-       if (!mp) {
-               mp = rte_pktmbuf_pool_create(pool_name, TAP_GSO_MBUFS_NUM,
-                       TAP_GSO_MBUF_CACHE_SIZE, 0,
+       gso_types = RTE_ETH_TX_OFFLOAD_TCP_TSO;
+       if (!pmd->gso_ctx_mp) {
+               /*
+                * Create private mbuf pool with TAP_GSO_MBUF_SEG_SIZE
+                * bytes size per mbuf use this pool for both direct and
+                * indirect mbufs
+                */
+               ret = snprintf(pool_name, sizeof(pool_name), "mp_%s",
+                               dev->device->name);
+               if (ret < 0 || ret >= (int)sizeof(pool_name)) {
+                       TAP_LOG(ERR,
+                               "%s: failed to create mbuf pool name for device %s,"
+                               "device name too long or output error, ret: %d\n",
+                               pmd->name, dev->device->name, ret);
+                       return -ENAMETOOLONG;
+               }
+               pmd->gso_ctx_mp = rte_pktmbuf_pool_create(pool_name,
+                       TAP_GSO_MBUFS_NUM, TAP_GSO_MBUF_CACHE_SIZE, 0,
                        RTE_PKTMBUF_HEADROOM + TAP_GSO_MBUF_SEG_SIZE,
                        SOCKET_ID_ANY);
-               if (!mp) {
-                       struct pmd_internals *pmd = dev->data->dev_private;
-                       RTE_LOG(DEBUG, PMD, "%s: failed to create mbuf pool for device %s\n",
+               if (!pmd->gso_ctx_mp) {
+                       TAP_LOG(ERR,
+                               "%s: failed to create mbuf pool for device %s\n",
                                pmd->name, dev->device->name);
                        return -1;
                }
        }
 
-       gso_ctx->direct_pool = mp;
-       gso_ctx->indirect_pool = mp;
+       gso_ctx->direct_pool = pmd->gso_ctx_mp;
+       gso_ctx->indirect_pool = pmd->gso_ctx_mp;
        gso_ctx->gso_types = gso_types;
        gso_ctx->gso_size = 0; /* gso_size is set in tx_burst() per packet */
        gso_ctx->flag = 0;
@@ -1398,7 +1578,7 @@ tap_rx_queue_setup(struct rte_eth_dev *dev,
        return 0;
 
 error:
-       rte_pktmbuf_free(rxq->pool);
+       tap_rxq_pool_free(rxq->pool);
        rxq->pool = NULL;
        rte_free(rxq->iovecs);
        rxq->iovecs = NULL;
@@ -1427,9 +1607,9 @@ tap_tx_queue_setup(struct rte_eth_dev *dev,
 
        offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
        txq->csum = !!(offloads &
-                       (DEV_TX_OFFLOAD_IPV4_CKSUM |
-                        DEV_TX_OFFLOAD_UDP_CKSUM |
-                        DEV_TX_OFFLOAD_TCP_CKSUM));
+                       (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
+                        RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
+                        RTE_ETH_TX_OFFLOAD_TCP_CKSUM));
 
        ret = tap_setup_queue(dev, internals, tx_queue_id, 0);
        if (ret == -1)
@@ -1448,13 +1628,8 @@ tap_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
        struct ifreq ifr = { .ifr_mtu = mtu };
-       int err = 0;
 
-       err = tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE);
-       if (!err)
-               dev->data->mtu = mtu;
-
-       return err;
+       return tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE);
 }
 
 static int
@@ -1489,33 +1664,50 @@ tap_dev_intr_handler(void *cb_arg)
        struct rte_eth_dev *dev = cb_arg;
        struct pmd_internals *pmd = dev->data->dev_private;
 
-       tap_nl_recv(pmd->intr_handle.fd, tap_nl_msg_handler, dev);
+       tap_nl_recv(rte_intr_fd_get(pmd->intr_handle),
+                   tap_nl_msg_handler, dev);
 }
 
 static int
 tap_lsc_intr_handle_set(struct rte_eth_dev *dev, int set)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
+       int ret;
 
        /* In any case, disable interrupt if the conf is no longer there. */
        if (!dev->data->dev_conf.intr_conf.lsc) {
-               if (pmd->intr_handle.fd != -1) {
-                       tap_nl_final(pmd->intr_handle.fd);
-                       rte_intr_callback_unregister(&pmd->intr_handle,
-                               tap_dev_intr_handler, dev);
-               }
+               if (rte_intr_fd_get(pmd->intr_handle) != -1)
+                       goto clean;
+
                return 0;
        }
        if (set) {
-               pmd->intr_handle.fd = tap_nl_init(RTMGRP_LINK);
-               if (unlikely(pmd->intr_handle.fd == -1))
+               rte_intr_fd_set(pmd->intr_handle, tap_nl_init(RTMGRP_LINK));
+               if (unlikely(rte_intr_fd_get(pmd->intr_handle) == -1))
                        return -EBADF;
                return rte_intr_callback_register(
-                       &pmd->intr_handle, tap_dev_intr_handler, dev);
+                       pmd->intr_handle, tap_dev_intr_handler, dev);
        }
-       tap_nl_final(pmd->intr_handle.fd);
-       return rte_intr_callback_unregister(&pmd->intr_handle,
-                                           tap_dev_intr_handler, dev);
+
+clean:
+       do {
+               ret = rte_intr_callback_unregister(pmd->intr_handle,
+                       tap_dev_intr_handler, dev);
+               if (ret >= 0) {
+                       break;
+               } else if (ret == -EAGAIN) {
+                       rte_delay_ms(100);
+               } else {
+                       TAP_LOG(ERR, "intr callback unregister failed: %d",
+                                    ret);
+                       break;
+               }
+       } while (true);
+
+       tap_nl_final(rte_intr_fd_get(pmd->intr_handle));
+       rte_intr_fd_set(pmd->intr_handle, -1);
+
+       return 0;
 }
 
 static int
@@ -1524,8 +1716,11 @@ tap_intr_handle_set(struct rte_eth_dev *dev, int set)
        int err;
 
        err = tap_lsc_intr_handle_set(dev, set);
-       if (err)
+       if (err < 0) {
+               if (!set)
+                       tap_rx_intr_vec_set(dev, 0);
                return err;
+       }
        err = tap_rx_intr_vec_set(dev, set);
        if (err && set)
                tap_lsc_intr_handle_set(dev, 0);
@@ -1567,7 +1762,7 @@ static int
 tap_flow_ctrl_get(struct rte_eth_dev *dev __rte_unused,
                  struct rte_eth_fc_conf *fc_conf)
 {
-       fc_conf->mode = RTE_FC_NONE;
+       fc_conf->mode = RTE_ETH_FC_NONE;
        return 0;
 }
 
@@ -1575,7 +1770,7 @@ static int
 tap_flow_ctrl_set(struct rte_eth_dev *dev __rte_unused,
                  struct rte_eth_fc_conf *fc_conf)
 {
-       if (fc_conf->mode != RTE_FC_NONE)
+       if (fc_conf->mode != RTE_ETH_FC_NONE)
                return -ENOTSUP;
        return 0;
 }
@@ -1674,11 +1869,7 @@ static const struct eth_dev_ops ops = {
        .stats_reset            = tap_stats_reset,
        .dev_supported_ptypes_get = tap_dev_supported_ptypes_get,
        .rss_hash_update        = tap_rss_hash_update,
-       .filter_ctrl            = tap_dev_filter_ctrl,
-};
-
-static const char *tuntap_types[ETH_TUNTAP_TYPE_MAX] = {
-       "UNKNOWN", "TUN", "TAP"
+       .flow_ops_get           = tap_dev_flow_ops_get,
 };
 
 static int
@@ -1717,6 +1908,9 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,
        pmd->dev = dev;
        strlcpy(pmd->name, tap_name, sizeof(pmd->name));
        pmd->type = type;
+       pmd->ka_fd = -1;
+       pmd->nlsk_fd = -1;
+       pmd->gso_ctx_mp = NULL;
 
        pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0);
        if (pmd->ioctl_sock == -1) {
@@ -1726,10 +1920,18 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,
                goto error_exit;
        }
 
+       /* Allocate interrupt instance */
+       pmd->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
+       if (pmd->intr_handle == NULL) {
+               TAP_LOG(ERR, "Failed to allocate intr handle");
+               goto error_exit;
+       }
+
        /* Setup some default values */
        data = dev->data;
        data->dev_private = pmd;
-       data->dev_flags = RTE_ETH_DEV_INTR_LSC;
+       data->dev_flags = RTE_ETH_DEV_INTR_LSC |
+                               RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
        data->numa_node = numa_node;
 
        data->dev_link = pmd_link;
@@ -1742,20 +1944,19 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,
        dev->rx_pkt_burst = pmd_rx_burst;
        dev->tx_pkt_burst = pmd_tx_burst;
 
-       pmd->intr_handle.type = RTE_INTR_HANDLE_EXT;
-       pmd->intr_handle.fd = -1;
-       dev->intr_handle = &pmd->intr_handle;
+       rte_intr_type_set(pmd->intr_handle, RTE_INTR_HANDLE_EXT);
+       rte_intr_fd_set(pmd->intr_handle, -1);
+       dev->intr_handle = pmd->intr_handle;
 
        /* Presetup the fds to -1 as being not valid */
-       pmd->ka_fd = -1;
        for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
                process_private->rxq_fds[i] = -1;
                process_private->txq_fds[i] = -1;
        }
 
        if (pmd->type == ETH_TUNTAP_TYPE_TAP) {
-               if (is_zero_ether_addr(mac_addr))
-                       eth_random_addr((uint8_t *)&pmd->eth_addr);
+               if (rte_is_zero_ether_addr(mac_addr))
+                       rte_eth_random_addr((uint8_t *)&pmd->eth_addr);
                else
                        rte_memcpy(&pmd->eth_addr, mac_addr, sizeof(*mac_addr));
        }
@@ -1781,7 +1982,7 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,
                memset(&ifr, 0, sizeof(struct ifreq));
                ifr.ifr_hwaddr.sa_family = AF_LOCAL;
                rte_memcpy(ifr.ifr_hwaddr.sa_data, &pmd->eth_addr,
-                               ETHER_ADDR_LEN);
+                               RTE_ETHER_ADDR_LEN);
                if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0)
                        goto error_exit;
        }
@@ -1836,7 +2037,7 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,
                        goto error_remote;
                }
                rte_memcpy(&pmd->eth_addr, ifr.ifr_hwaddr.sa_data,
-                          ETHER_ADDR_LEN);
+                          RTE_ETHER_ADDR_LEN);
                /* The desired MAC is already in ifreq after SIOCGIFHWADDR. */
                if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0) {
                        TAP_LOG(ERR, "%s: failed to get %s MAC address.",
@@ -1887,11 +2088,16 @@ error_remote:
        tap_flow_implicit_flush(pmd, NULL);
 
 error_exit:
-       if (pmd->ioctl_sock > 0)
+       if (pmd->nlsk_fd != -1)
+               close(pmd->nlsk_fd);
+       if (pmd->ka_fd != -1)
+               close(pmd->ka_fd);
+       if (pmd->ioctl_sock != -1)
                close(pmd->ioctl_sock);
        /* mac_addrs must not be freed alone because part of dev_private */
        dev->data->mac_addrs = NULL;
        rte_eth_dev_release_port(dev);
+       rte_intr_instance_free(pmd->intr_handle);
 
 error_exit_nodev:
        TAP_LOG(ERR, "%s Unable to initialize %s",
@@ -1995,8 +2201,10 @@ set_mac_type(const char *key __rte_unused,
                static int iface_idx;
 
                /* fixed mac = 00:64:74:61:70:<iface_idx> */
-               memcpy((char *)user_mac->addr_bytes, "\0dtap", ETHER_ADDR_LEN);
-               user_mac->addr_bytes[ETHER_ADDR_LEN - 1] = iface_idx++ + '0';
+               memcpy((char *)user_mac->addr_bytes, "\0dtap",
+                       RTE_ETHER_ADDR_LEN);
+               user_mac->addr_bytes[RTE_ETHER_ADDR_LEN - 1] =
+                       iface_idx++ + '0';
                goto success;
        }
 
@@ -2064,7 +2272,7 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev)
                        }
                }
        }
-       pmd_link.link_speed = ETH_SPEED_NUM_10G;
+       pmd_link.link_speed = RTE_ETH_SPEED_NUM_10G;
 
        TAP_LOG(DEBUG, "Initializing pmd_tun for %s", name);
 
@@ -2238,7 +2446,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
                return 0;
        }
 
-       speed = ETH_SPEED_NUM_10G;
+       speed = RTE_ETH_SPEED_NUM_10G;
 
        /* use tap%d which causes kernel to choose next available */
        strlcpy(tap_name, DEFAULT_TAP_NAME "%d", RTE_ETH_NAME_MAX_LEN);
@@ -2284,7 +2492,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
        /* Register IPC feed callback */
        if (!tap_devices_count) {
                ret = rte_mp_action_register(TAP_MP_KEY, tap_mp_sync_queues);
-               if (ret < 0) {
+               if (ret < 0 && rte_errno != ENOTSUP) {
                        TAP_LOG(ERR, "tap: Failed to register IPC callback: %s",
                                strerror(rte_errno));
                        goto leave;
@@ -2316,54 +2524,15 @@ static int
 rte_pmd_tap_remove(struct rte_vdev_device *dev)
 {
        struct rte_eth_dev *eth_dev = NULL;
-       struct pmd_internals *internals;
-       struct pmd_process_private *process_private;
-       int i;
 
        /* find the ethdev entry */
        eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
        if (!eth_dev)
-               return -ENODEV;
-
-       /* mac_addrs must not be freed alone because part of dev_private */
-       eth_dev->data->mac_addrs = NULL;
-
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-               return rte_eth_dev_release_port(eth_dev);
-
-       internals = eth_dev->data->dev_private;
-       process_private = eth_dev->process_private;
-
-       TAP_LOG(DEBUG, "Closing %s Ethernet device on numa %u",
-               tuntap_types[internals->type], rte_socket_id());
-
-       if (internals->nlsk_fd) {
-               tap_flow_flush(eth_dev, NULL);
-               tap_flow_implicit_flush(internals, NULL);
-               tap_nl_final(internals->nlsk_fd);
-       }
-       for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-               if (process_private->rxq_fds[i] != -1) {
-                       close(process_private->rxq_fds[i]);
-                       process_private->rxq_fds[i] = -1;
-               }
-               if (process_private->txq_fds[i] != -1) {
-                       close(process_private->txq_fds[i]);
-                       process_private->txq_fds[i] = -1;
-               }
-       }
+               return 0;
 
-       close(internals->ioctl_sock);
-       rte_free(eth_dev->process_private);
-       if (tap_devices_count == 1)
-               rte_mp_action_unregister(TAP_MP_KEY);
-       tap_devices_count--;
+       tap_dev_close(eth_dev);
        rte_eth_dev_release_port(eth_dev);
 
-       if (internals->ka_fd != -1) {
-               close(internals->ka_fd);
-               internals->ka_fd = -1;
-       }
        return 0;
 }
 
@@ -2386,11 +2555,4 @@ RTE_PMD_REGISTER_PARAM_STRING(net_tap,
                              ETH_TAP_IFACE_ARG "=<string> "
                              ETH_TAP_MAC_ARG "=" ETH_TAP_MAC_ARG_FMT " "
                              ETH_TAP_REMOTE_ARG "=<string>");
-int tap_logtype;
-
-RTE_INIT(tap_init_log)
-{
-       tap_logtype = rte_log_register("pmd.net.tap");
-       if (tap_logtype >= 0)
-               rte_log_set_level(tap_logtype, RTE_LOG_NOTICE);
-}
+RTE_LOG_REGISTER_DEFAULT(tap_logtype, NOTICE);