net/tap: fix fd leak on creation failure
[dpdk.git] / drivers / net / tap / rte_eth_tap.c
index 896aae5..fea464c 100644 (file)
@@ -19,7 +19,6 @@
 #include <rte_ethdev.h>
 #include <rte_errno.h>
 
-#include <assert.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
@@ -71,8 +70,6 @@
 #define TAP_IOV_DEFAULT_MAX 1024
 
 static int tap_devices_count;
-static struct rte_vdev_driver pmd_tap_drv;
-static struct rte_vdev_driver pmd_tun_drv;
 
 static const char *valid_arguments[] = {
        ETH_TAP_IFACE_ARG,
@@ -341,6 +338,23 @@ tap_rx_offload_get_queue_capa(void)
               DEV_RX_OFFLOAD_TCP_CKSUM;
 }
 
+static void
+tap_rxq_pool_free(struct rte_mbuf *pool)
+{
+       struct rte_mbuf *mbuf = pool;
+       uint16_t nb_segs = 1;
+
+       if (mbuf == NULL)
+               return;
+
+       while (mbuf->next) {
+               mbuf = mbuf->next;
+               nb_segs++;
+       }
+       pool->nb_segs = nb_segs;
+       rte_pktmbuf_free(pool);
+}
+
 /* Callback to handle the rx burst of packets to the correct interface and
  * file descriptor(s) in a multi-queue setup.
  */
@@ -355,10 +369,8 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
        if (trigger == rxq->trigger_seen)
                return 0;
-       if (trigger)
-               rxq->trigger_seen = trigger;
+
        process_private = rte_eth_devices[rxq->in_port].process_private;
-       rte_compiler_barrier();
        for (num_rx = 0; num_rx < nb_pkts; ) {
                struct rte_mbuf *mbuf = rxq->pool;
                struct rte_mbuf *seg = NULL;
@@ -393,7 +405,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                                        goto end;
 
                                seg->next = NULL;
-                               rte_pktmbuf_free(mbuf);
+                               tap_rxq_pool_free(mbuf);
 
                                goto end;
                        }
@@ -435,6 +447,9 @@ end:
        rxq->stats.ipackets += num_rx;
        rxq->stats.ibytes += num_rx_bytes;
 
+       if (trigger && num_rx < nb_pkts)
+               rxq->trigger_seen = trigger;
+
        return num_rx;
 }
 
@@ -522,7 +537,7 @@ tap_tx_l3_cksum(char *packet, uint64_t ol_flags, unsigned int l2_len,
        }
 }
 
-static inline void
+static inline int
 tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
                        struct rte_mbuf **pmbufs,
                        uint16_t *num_packets, unsigned long *num_tx_bytes)
@@ -565,9 +580,9 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
                        char *buff_data = rte_pktmbuf_mtod(seg, void *);
                        proto = (*buff_data & 0xf0);
                        pi.proto = (proto == 0x40) ?
-                               rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv4) :
+                               rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) :
                                ((proto == 0x60) ?
-                                       rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv6) :
+                                       rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) :
                                        0x00);
                }
 
@@ -589,7 +604,7 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
                        seg_len = rte_pktmbuf_data_len(mbuf);
                        l234_hlen = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
                        if (seg_len < l234_hlen)
-                               break;
+                               return -1;
 
                        /* To change checksums, work on a * copy of l2, l3
                         * headers + l4 pseudo header
@@ -635,10 +650,12 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
                /* copy the tx frame data */
                n = writev(process_private->txq_fds[txq->queue_id], iovecs, j);
                if (n <= 0)
-                       break;
+                       return -1;
+
                (*num_packets)++;
                (*num_tx_bytes) += rte_pktmbuf_pkt_len(mbuf);
        }
+       return 0;
 }
 
 /* Callback to handle sending packets from the tap interface
@@ -664,16 +681,14 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                uint16_t num_mbufs = 0;
                uint16_t tso_segsz = 0;
                int ret;
+               int num_tso_mbufs;
                uint16_t hdrs_len;
-               int j;
                uint64_t tso;
 
                tso = mbuf_in->ol_flags & PKT_TX_TCP_SEG;
                if (tso) {
                        struct rte_gso_ctx *gso_ctx = &txq->gso_ctx;
 
-                       assert(gso_ctx != NULL);
-
                        /* TCP segmentation implies TCP checksum offload */
                        mbuf_in->ol_flags |= PKT_TX_TCP_CKSUM;
 
@@ -687,43 +702,51 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                                break;
                        }
                        gso_ctx->gso_size = tso_segsz;
-                       ret = rte_gso_segment(mbuf_in, /* packet to segment */
+                       /* 'mbuf_in' packet to segment */
+                       num_tso_mbufs = rte_gso_segment(mbuf_in,
                                gso_ctx, /* gso control block */
                                (struct rte_mbuf **)&gso_mbufs, /* out mbufs */
                                RTE_DIM(gso_mbufs)); /* max tso mbufs */
 
                        /* ret contains the number of new created mbufs */
-                       if (ret < 0)
+                       if (num_tso_mbufs < 0)
                                break;
 
                        mbuf = gso_mbufs;
-                       num_mbufs = ret;
+                       num_mbufs = num_tso_mbufs;
                } else {
                        /* stats.errs will be incremented */
                        if (rte_pktmbuf_pkt_len(mbuf_in) > max_size)
                                break;
 
                        /* ret 0 indicates no new mbufs were created */
-                       ret = 0;
+                       num_tso_mbufs = 0;
                        mbuf = &mbuf_in;
                        num_mbufs = 1;
                }
 
-               tap_write_mbufs(txq, num_mbufs, mbuf,
+               ret = tap_write_mbufs(txq, num_mbufs, mbuf,
                                &num_packets, &num_tx_bytes);
+               if (ret == -1) {
+                       txq->stats.errs++;
+                       /* free tso mbufs */
+                       if (num_tso_mbufs > 0)
+                               rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs);
+                       break;
+               }
                num_tx++;
                /* free original mbuf */
                rte_pktmbuf_free(mbuf_in);
                /* free tso mbufs */
-               for (j = 0; j < ret; j++)
-                       rte_pktmbuf_free(mbuf[j]);
+               if (num_tso_mbufs > 0)
+                       rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs);
        }
 
        txq->stats.opackets += num_packets;
        txq->stats.errs += nb_pkts - num_tx;
        txq->stats.obytes += num_tx_bytes;
 
-       return num_packets;
+       return num_tx;
 }
 
 static const char *
@@ -779,7 +802,7 @@ apply:
        case SIOCSIFMTU:
                break;
        default:
-               RTE_LOG(WARNING, PMD, "%s: ioctl() called with wrong arg\n",
+               TAP_LOG(WARNING, "%s: ioctl() called with wrong arg",
                        pmd->name);
                return -EINVAL;
        }
@@ -917,7 +940,7 @@ tap_dev_speed_capa(void)
        return capa;
 }
 
-static void
+static int
 tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
        struct pmd_internals *internals = dev->data->dev_private;
@@ -941,6 +964,8 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
         * functions together and not in partial combinations
         */
        dev_info->flow_type_rss_offloads = ~TAP_RSS_HF_MASK;
+
+       return 0;
 }
 
 static int
@@ -970,10 +995,9 @@ tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
 
        for (i = 0; i < imax; i++) {
                tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
-               tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
                tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
                tx_total += tap_stats->q_opackets[i];
-               tx_err_total += tap_stats->q_errors[i];
+               tx_err_total += pmd->txq[i].stats.errs;
                tx_bytes_total += tap_stats->q_obytes[i];
        }
 
@@ -987,7 +1011,7 @@ tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
        return 0;
 }
 
-static void
+static int
 tap_stats_reset(struct rte_eth_dev *dev)
 {
        int i;
@@ -1003,6 +1027,8 @@ tap_stats_reset(struct rte_eth_dev *dev)
                pmd->txq[i].stats.errs = 0;
                pmd->txq[i].stats.obytes = 0;
        }
+
+       return 0;
 }
 
 static void
@@ -1011,15 +1037,25 @@ tap_dev_close(struct rte_eth_dev *dev)
        int i;
        struct pmd_internals *internals = dev->data->dev_private;
        struct pmd_process_private *process_private = dev->process_private;
+       struct rx_queue *rxq;
 
        tap_link_set_down(dev);
-       tap_flow_flush(dev, NULL);
-       tap_flow_implicit_flush(internals, NULL);
+       if (internals->nlsk_fd != -1) {
+               tap_flow_flush(dev, NULL);
+               tap_flow_implicit_flush(internals, NULL);
+               tap_nl_final(internals->nlsk_fd);
+               internals->nlsk_fd = -1;
+       }
 
        for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
                if (process_private->rxq_fds[i] != -1) {
+                       rxq = &internals->rxq[i];
                        close(process_private->rxq_fds[i]);
                        process_private->rxq_fds[i] = -1;
+                       tap_rxq_pool_free(rxq->pool);
+                       rte_free(rxq->iovecs);
+                       rxq->pool = NULL;
+                       rxq->iovecs = NULL;
                }
                if (process_private->txq_fds[i] != -1) {
                        close(process_private->txq_fds[i]);
@@ -1055,7 +1091,7 @@ tap_rx_queue_release(void *queue)
        if (process_private->rxq_fds[rxq->queue_id] > 0) {
                close(process_private->rxq_fds[rxq->queue_id]);
                process_private->rxq_fds[rxq->queue_id] = -1;
-               rte_pktmbuf_free(rxq->pool);
+               tap_rxq_pool_free(rxq->pool);
                rte_free(rxq->iovecs);
                rxq->pool = NULL;
                rxq->iovecs = NULL;
@@ -1101,52 +1137,116 @@ tap_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused)
        return 0;
 }
 
-static void
+static int
 tap_promisc_enable(struct rte_eth_dev *dev)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
        struct ifreq ifr = { .ifr_flags = IFF_PROMISC };
+       int ret;
+
+       ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
+       if (ret != 0)
+               return ret;
+
+       if (pmd->remote_if_index && !pmd->flow_isolate) {
+               dev->data->promiscuous = 1;
+               ret = tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC);
+               if (ret != 0) {
+                       /* Rollback promisc flag */
+                       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE);
+                       /*
+                        * rte_eth_dev_promiscuous_enable() rollback
+                        * dev->data->promiscuous in the case of failure.
+                        */
+                       return ret;
+               }
+       }
 
-       dev->data->promiscuous = 1;
-       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
-       if (pmd->remote_if_index && !pmd->flow_isolate)
-               tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC);
+       return 0;
 }
 
-static void
+static int
 tap_promisc_disable(struct rte_eth_dev *dev)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
        struct ifreq ifr = { .ifr_flags = IFF_PROMISC };
+       int ret;
+
+       ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE);
+       if (ret != 0)
+               return ret;
+
+       if (pmd->remote_if_index && !pmd->flow_isolate) {
+               dev->data->promiscuous = 0;
+               ret = tap_flow_implicit_destroy(pmd, TAP_REMOTE_PROMISC);
+               if (ret != 0) {
+                       /* Rollback promisc flag */
+                       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
+                       /*
+                        * rte_eth_dev_promiscuous_disable() rollback
+                        * dev->data->promiscuous in the case of failure.
+                        */
+                       return ret;
+               }
+       }
 
-       dev->data->promiscuous = 0;
-       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE);
-       if (pmd->remote_if_index && !pmd->flow_isolate)
-               tap_flow_implicit_destroy(pmd, TAP_REMOTE_PROMISC);
+       return 0;
 }
 
-static void
+static int
 tap_allmulti_enable(struct rte_eth_dev *dev)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
        struct ifreq ifr = { .ifr_flags = IFF_ALLMULTI };
+       int ret;
+
+       ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
+       if (ret != 0)
+               return ret;
+
+       if (pmd->remote_if_index && !pmd->flow_isolate) {
+               dev->data->all_multicast = 1;
+               ret = tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI);
+               if (ret != 0) {
+                       /* Rollback allmulti flag */
+                       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE);
+                       /*
+                        * rte_eth_dev_allmulticast_enable() rollback
+                        * dev->data->all_multicast in the case of failure.
+                        */
+                       return ret;
+               }
+       }
 
-       dev->data->all_multicast = 1;
-       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
-       if (pmd->remote_if_index && !pmd->flow_isolate)
-               tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI);
+       return 0;
 }
 
-static void
+static int
 tap_allmulti_disable(struct rte_eth_dev *dev)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
        struct ifreq ifr = { .ifr_flags = IFF_ALLMULTI };
+       int ret;
+
+       ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE);
+       if (ret != 0)
+               return ret;
+
+       if (pmd->remote_if_index && !pmd->flow_isolate) {
+               dev->data->all_multicast = 0;
+               ret = tap_flow_implicit_destroy(pmd, TAP_REMOTE_ALLMULTI);
+               if (ret != 0) {
+                       /* Rollback allmulti flag */
+                       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE);
+                       /*
+                        * rte_eth_dev_allmulticast_disable() rollback
+                        * dev->data->all_multicast in the case of failure.
+                        */
+                       return ret;
+               }
+       }
 
-       dev->data->all_multicast = 0;
-       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE);
-       if (pmd->remote_if_index && !pmd->flow_isolate)
-               tap_flow_implicit_destroy(pmd, TAP_REMOTE_ALLMULTI);
+       return 0;
 }
 
 static int
@@ -1235,7 +1335,9 @@ tap_gso_ctx_setup(struct rte_gso_ctx *gso_ctx, struct rte_eth_dev *dev)
                        SOCKET_ID_ANY);
                if (!mp) {
                        struct pmd_internals *pmd = dev->data->dev_private;
-                       RTE_LOG(DEBUG, PMD, "%s: failed to create mbuf pool for device %s\n",
+
+                       TAP_LOG(ERR,
+                               "%s: failed to create mbuf pool for device %s\n",
                                pmd->name, dev->device->name);
                        return -1;
                }
@@ -1399,7 +1501,7 @@ tap_rx_queue_setup(struct rte_eth_dev *dev,
        return 0;
 
 error:
-       rte_pktmbuf_free(rxq->pool);
+       tap_rxq_pool_free(rxq->pool);
        rxq->pool = NULL;
        rte_free(rxq->iovecs);
        rxq->iovecs = NULL;
@@ -1525,8 +1627,11 @@ tap_intr_handle_set(struct rte_eth_dev *dev, int set)
        int err;
 
        err = tap_lsc_intr_handle_set(dev, set);
-       if (err)
+       if (err < 0) {
+               if (!set)
+                       tap_rx_intr_vec_set(dev, 0);
                return err;
+       }
        err = tap_rx_intr_vec_set(dev, set);
        if (err && set)
                tap_lsc_intr_handle_set(dev, 0);
@@ -1718,6 +1823,8 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,
        pmd->dev = dev;
        strlcpy(pmd->name, tap_name, sizeof(pmd->name));
        pmd->type = type;
+       pmd->ka_fd = -1;
+       pmd->nlsk_fd = -1;
 
        pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0);
        if (pmd->ioctl_sock == -1) {
@@ -1748,7 +1855,6 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,
        dev->intr_handle = &pmd->intr_handle;
 
        /* Presetup the fds to -1 as being not valid */
-       pmd->ka_fd = -1;
        for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
                process_private->rxq_fds[i] = -1;
                process_private->txq_fds[i] = -1;
@@ -1888,7 +1994,11 @@ error_remote:
        tap_flow_implicit_flush(pmd, NULL);
 
 error_exit:
-       if (pmd->ioctl_sock > 0)
+       if (pmd->nlsk_fd != -1)
+               close(pmd->nlsk_fd);
+       if (pmd->ka_fd != -1)
+               close(pmd->ka_fd);
+       if (pmd->ioctl_sock != -1)
                close(pmd->ioctl_sock);
        /* mac_addrs must not be freed alone because part of dev_private */
        dev->data->mac_addrs = NULL;
@@ -2287,7 +2397,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
        /* Register IPC feed callback */
        if (!tap_devices_count) {
                ret = rte_mp_action_register(TAP_MP_KEY, tap_mp_sync_queues);
-               if (ret < 0) {
+               if (ret < 0 && rte_errno != ENOTSUP) {
                        TAP_LOG(ERR, "tap: Failed to register IPC callback: %s",
                                strerror(rte_errno));
                        goto leave;
@@ -2320,8 +2430,6 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev)
 {
        struct rte_eth_dev *eth_dev = NULL;
        struct pmd_internals *internals;
-       struct pmd_process_private *process_private;
-       int i;
 
        /* find the ethdev entry */
        eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
@@ -2334,28 +2442,12 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev)
        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
                return rte_eth_dev_release_port(eth_dev);
 
-       internals = eth_dev->data->dev_private;
-       process_private = eth_dev->process_private;
+       tap_dev_close(eth_dev);
 
+       internals = eth_dev->data->dev_private;
        TAP_LOG(DEBUG, "Closing %s Ethernet device on numa %u",
                tuntap_types[internals->type], rte_socket_id());
 
-       if (internals->nlsk_fd) {
-               tap_flow_flush(eth_dev, NULL);
-               tap_flow_implicit_flush(internals, NULL);
-               tap_nl_final(internals->nlsk_fd);
-       }
-       for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-               if (process_private->rxq_fds[i] != -1) {
-                       close(process_private->rxq_fds[i]);
-                       process_private->rxq_fds[i] = -1;
-               }
-               if (process_private->txq_fds[i] != -1) {
-                       close(process_private->txq_fds[i]);
-                       process_private->txq_fds[i] = -1;
-               }
-       }
-
        close(internals->ioctl_sock);
        rte_free(eth_dev->process_private);
        if (tap_devices_count == 1)
@@ -2363,10 +2455,6 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev)
        tap_devices_count--;
        rte_eth_dev_release_port(eth_dev);
 
-       if (internals->ka_fd != -1) {
-               close(internals->ka_fd);
-               internals->ka_fd = -1;
-       }
        return 0;
 }