net/tap: add speed capabilities
[dpdk.git] / drivers / net / tap / rte_eth_tap.c
index efc4426..c531feb 100644 (file)
@@ -31,6 +31,8 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <rte_atomic.h>
+#include <rte_common.h>
 #include <rte_mbuf.h>
 #include <rte_ethdev.h>
 #include <rte_malloc.h>
 #include <sys/socket.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdint.h>
 #include <unistd.h>
-#include <poll.h>
 #include <arpa/inet.h>
 #include <linux/if.h>
 #include <linux/if_tun.h>
@@ -73,6 +77,8 @@ static const char *valid_arguments[] = {
 
 static int tap_unit;
 
+static volatile uint32_t tap_trigger;  /* Rx trigger */
+
 static struct rte_eth_link pmd_link = {
        .link_speed = ETH_SPEED_NUM_10G,
        .link_duplex = ETH_LINK_FULL_DUPLEX,
@@ -90,6 +96,7 @@ struct pkt_stats {
 
 struct rx_queue {
        struct rte_mempool *mp;         /* Mempool for RX packets */
+       uint32_t trigger_seen;          /* Last seen Rx trigger value */
        uint16_t in_port;               /* Port ID */
        int fd;
 
@@ -107,11 +114,23 @@ struct pmd_internals {
        struct ether_addr eth_addr;     /* Mac address of the device port */
 
        int if_index;                   /* IF_INDEX for the port */
+       int ioctl_sock;                 /* socket for ioctl calls */
 
        struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];    /* List of RX queues */
        struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];    /* List of TX queues */
 };
 
+static void
+tap_trigger_cb(int sig __rte_unused)
+{
+       /* Valid trigger values are nonzero */
+       tap_trigger = (tap_trigger + 1) | 0x80000000;
+}
+
+static int
+tap_ioctl(struct pmd_internals *pmd, unsigned long request,
+         struct ifreq *ifr, int set);
+
 /* Tun/Tap allocation routine
  *
  * name is the number of the interface to use, unless NULL to take the host
@@ -129,7 +148,7 @@ tun_alloc(struct pmd_internals *pmd, uint16_t qid)
        memset(&ifr, 0, sizeof(struct ifreq));
 
        ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
-       strncpy(ifr.ifr_name, pmd->name, IFNAMSIZ);
+       snprintf(ifr.ifr_name, IFNAMSIZ, "%s", pmd->name);
 
        RTE_LOG(DEBUG, PMD, "ifr_name '%s'\n", ifr.ifr_name);
 
@@ -176,14 +195,50 @@ tun_alloc(struct pmd_internals *pmd, uint16_t qid)
                goto error;
        }
 
-       if (qid == 0) {
-               if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
-                       RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
-                               ifr.ifr_name);
-                       goto error;
+       /* Set up trigger to optimize empty Rx bursts */
+       errno = 0;
+       do {
+               struct sigaction sa;
+               int flags = fcntl(fd, F_GETFL);
+
+               if (flags == -1 || sigaction(SIGIO, NULL, &sa) == -1)
+                       break;
+               if (sa.sa_handler != tap_trigger_cb) {
+                       /*
+                        * Make sure SIGIO is not already taken. This is done
+                        * as late as possible to leave the application a
+                        * chance to set up its own signal handler first.
+                        */
+                       if (sa.sa_handler != SIG_IGN &&
+                           sa.sa_handler != SIG_DFL) {
+                               errno = EBUSY;
+                               break;
+                       }
+                       sa = (struct sigaction){
+                               .sa_flags = SA_RESTART,
+                               .sa_handler = tap_trigger_cb,
+                       };
+                       if (sigaction(SIGIO, &sa, NULL) == -1)
+                               break;
                }
+               /* Enable SIGIO on file descriptor */
+               fcntl(fd, F_SETFL, flags | O_ASYNC);
+               fcntl(fd, F_SETOWN, getpid());
+       } while (0);
+       if (errno) {
+               /* Disable trigger globally in case of error */
+               tap_trigger = 0;
+               RTE_LOG(WARNING, PMD, "Rx trigger disabled: %s\n",
+                       strerror(errno));
+       }
 
-               rte_memcpy(&pmd->eth_addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+       if (qid == 0) {
+               struct ifreq ifr;
+
+               if (tap_ioctl(pmd, SIOCGIFHWADDR, &ifr, 0) < 0)
+                       goto error;
+               rte_memcpy(&pmd->eth_addr, ifr.ifr_hwaddr.sa_data,
+                          ETHER_ADDR_LEN);
        }
 
        return fd;
@@ -205,7 +260,13 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
        struct rx_queue *rxq = queue;
        uint16_t num_rx;
        unsigned long num_rx_bytes = 0;
+       uint32_t trigger = tap_trigger;
 
+       if (trigger == rxq->trigger_seen)
+               return 0;
+       if (trigger)
+               rxq->trigger_seen = trigger;
+       rte_compiler_barrier();
        for (num_rx = 0; num_rx < nb_pkts; ) {
                /* allocate the next mbuf */
                mbuf = rte_pktmbuf_alloc(rxq->mp);
@@ -242,7 +303,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
        struct rte_mbuf *mbuf;
        struct tx_queue *txq = queue;
-       struct pollfd pfd;
        uint16_t num_tx = 0;
        unsigned long num_tx_bytes = 0;
        int i, n;
@@ -250,26 +310,18 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
        if (unlikely(nb_pkts == 0))
                return 0;
 
-       pfd.events = POLLOUT;
-       pfd.fd = txq->fd;
        for (i = 0; i < nb_pkts; i++) {
-               n = poll(&pfd, 1, 0);
-
+               /* copy the tx frame data */
+               mbuf = bufs[num_tx];
+               n = write(txq->fd,
+                         rte_pktmbuf_mtod(mbuf, void *),
+                         rte_pktmbuf_pkt_len(mbuf));
                if (n <= 0)
                        break;
 
-               if (pfd.revents & POLLOUT) {
-                       /* copy the tx frame data */
-                       mbuf = bufs[num_tx];
-                       n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
-                                 rte_pktmbuf_pkt_len(mbuf));
-                       if (n <= 0)
-                               break;
-
-                       num_tx++;
-                       num_tx_bytes += mbuf->pkt_len;
-                       rte_pktmbuf_free(mbuf);
-               }
+               num_tx++;
+               num_tx_bytes += mbuf->pkt_len;
+               rte_pktmbuf_free(mbuf);
        }
 
        txq->stats.opackets += num_tx;
@@ -280,63 +332,59 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 }
 
 static int
-tap_link_set_flags(struct pmd_internals *pmd, short flags, int add)
+tap_ioctl(struct pmd_internals *pmd, unsigned long request,
+         struct ifreq *ifr, int set)
 {
-       struct ifreq ifr;
-       int err, s;
-
-       /*
-        * An AF_INET/DGRAM socket is needed for
-        * SIOCGIFFLAGS/SIOCSIFFLAGS, using fd won't work.
-        */
-       s = socket(AF_INET, SOCK_DGRAM, 0);
-       if (s < 0) {
-               RTE_LOG(ERR, PMD,
-                       "Unable to get a socket to set flags: %s\n",
-                       strerror(errno));
-               return -1;
-       }
-       memset(&ifr, 0, sizeof(ifr));
-       strncpy(ifr.ifr_name, pmd->name, IFNAMSIZ);
-       err = ioctl(s, SIOCGIFFLAGS, &ifr);
-       if (err < 0) {
-               RTE_LOG(WARNING, PMD, "Unable to get %s device flags: %s\n",
-                       pmd->name, strerror(errno));
-               close(s);
-               return -1;
-       }
-       if (add)
-               ifr.ifr_flags |= flags;
-       else
-               ifr.ifr_flags &= ~flags;
-       err = ioctl(s, SIOCSIFFLAGS, &ifr);
-       if (err < 0) {
-               RTE_LOG(WARNING, PMD, "Unable to %s flags 0x%x: %s\n",
-                       add ? "set" : "unset", flags, strerror(errno));
-               close(s);
-               return -1;
-       }
-       close(s);
+       short req_flags = ifr->ifr_flags;
 
+       snprintf(ifr->ifr_name, IFNAMSIZ, "%s", pmd->name);
+       switch (request) {
+       case SIOCSIFFLAGS:
+               /* fetch current flags to leave other flags untouched */
+               if (ioctl(pmd->ioctl_sock, SIOCGIFFLAGS, ifr) < 0)
+                       goto error;
+               if (set)
+                       ifr->ifr_flags |= req_flags;
+               else
+                       ifr->ifr_flags &= ~req_flags;
+               break;
+       case SIOCGIFHWADDR:
+       case SIOCSIFHWADDR:
+       case SIOCSIFMTU:
+               break;
+       default:
+               RTE_LOG(WARNING, PMD, "%s: ioctl() called with wrong arg\n",
+                       pmd->name);
+               return -EINVAL;
+       }
+       if (ioctl(pmd->ioctl_sock, request, ifr) < 0)
+               goto error;
        return 0;
+
+error:
+       RTE_LOG(ERR, PMD, "%s: ioctl(%lu) failed with error: %s\n",
+               ifr->ifr_name, request, strerror(errno));
+       return -errno;
 }
 
 static int
 tap_link_set_down(struct rte_eth_dev *dev)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr = { .ifr_flags = IFF_UP };
 
        dev->data->dev_link.link_status = ETH_LINK_DOWN;
-       return tap_link_set_flags(pmd, IFF_UP | IFF_NOARP, 0);
+       return tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0);
 }
 
 static int
 tap_link_set_up(struct rte_eth_dev *dev)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr = { .ifr_flags = IFF_UP };
 
        dev->data->dev_link.link_status = ETH_LINK_UP;
-       return tap_link_set_flags(pmd, IFF_UP | IFF_NOARP, 1);
+       return tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1);
 }
 
 static int
@@ -359,6 +407,40 @@ tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
        return 0;
 }
 
+static uint32_t
+tap_dev_speed_capa(void)
+{
+       uint32_t speed = pmd_link.link_speed;
+       uint32_t capa = 0;
+
+       if (speed >= ETH_SPEED_NUM_10M)
+               capa |= ETH_LINK_SPEED_10M;
+       if (speed >= ETH_SPEED_NUM_100M)
+               capa |= ETH_LINK_SPEED_100M;
+       if (speed >= ETH_SPEED_NUM_1G)
+               capa |= ETH_LINK_SPEED_1G;
+       if (speed >= ETH_SPEED_NUM_5G)
+               capa |= ETH_LINK_SPEED_2_5G;
+       if (speed >= ETH_SPEED_NUM_5G)
+               capa |= ETH_LINK_SPEED_5G;
+       if (speed >= ETH_SPEED_NUM_10G)
+               capa |= ETH_LINK_SPEED_10G;
+       if (speed >= ETH_SPEED_NUM_20G)
+               capa |= ETH_LINK_SPEED_20G;
+       if (speed >= ETH_SPEED_NUM_25G)
+               capa |= ETH_LINK_SPEED_25G;
+       if (speed >= ETH_SPEED_NUM_40G)
+               capa |= ETH_LINK_SPEED_40G;
+       if (speed >= ETH_SPEED_NUM_50G)
+               capa |= ETH_LINK_SPEED_50G;
+       if (speed >= ETH_SPEED_NUM_56G)
+               capa |= ETH_LINK_SPEED_56G;
+       if (speed >= ETH_SPEED_NUM_100G)
+               capa |= ETH_LINK_SPEED_100G;
+
+       return capa;
+}
+
 static void
 tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
@@ -371,6 +453,7 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
        dev_info->max_tx_queues = internals->nb_queues;
        dev_info->min_rx_bufsize = 0;
        dev_info->pci_dev = NULL;
+       dev_info->speed_capa = tap_dev_speed_capa();
 }
 
 static void
@@ -470,36 +553,60 @@ static void
 tap_promisc_enable(struct rte_eth_dev *dev)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr = { .ifr_flags = IFF_PROMISC };
 
        dev->data->promiscuous = 1;
-       tap_link_set_flags(pmd, IFF_PROMISC, 1);
+       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1);
 }
 
 static void
 tap_promisc_disable(struct rte_eth_dev *dev)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr = { .ifr_flags = IFF_PROMISC };
 
        dev->data->promiscuous = 0;
-       tap_link_set_flags(pmd, IFF_PROMISC, 0);
+       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0);
 }
 
 static void
 tap_allmulti_enable(struct rte_eth_dev *dev)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr = { .ifr_flags = IFF_ALLMULTI };
 
        dev->data->all_multicast = 1;
-       tap_link_set_flags(pmd, IFF_ALLMULTI, 1);
+       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1);
 }
 
 static void
 tap_allmulti_disable(struct rte_eth_dev *dev)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr = { .ifr_flags = IFF_ALLMULTI };
 
        dev->data->all_multicast = 0;
-       tap_link_set_flags(pmd, IFF_ALLMULTI, 0);
+       tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0);
+}
+
+
+static void
+tap_mac_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr;
+
+       if (is_zero_ether_addr(mac_addr)) {
+               RTE_LOG(ERR, PMD, "%s: can't set an empty MAC address\n",
+                       dev->data->name);
+               return;
+       }
+
+       ifr.ifr_hwaddr.sa_family = AF_LOCAL;
+       rte_memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, ETHER_ADDR_LEN);
+       if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 1) < 0)
+               return;
+       rte_memcpy(&pmd->eth_addr, mac_addr, ETHER_ADDR_LEN);
 }
 
 static int
@@ -524,6 +631,15 @@ tap_setup_queue(struct rte_eth_dev *dev,
                                        pmd->name, qid);
                                return -1;
                        }
+                       if (qid == 0) {
+                               struct ifreq ifr;
+
+                               ifr.ifr_mtu = dev->data->mtu;
+                               if (tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1) < 0) {
+                                       close(fd);
+                                       return -1;
+                               }
+                       }
                }
        }
 
@@ -573,6 +689,7 @@ tap_rx_queue_setup(struct rte_eth_dev *dev,
        }
 
        internals->rxq[rx_queue_id].mp = mp;
+       internals->rxq[rx_queue_id].trigger_seen = 1; /* force initial burst */
        internals->rxq[rx_queue_id].in_port = dev->data->port_id;
 
        /* Now get the space available for data in the mbuf */
@@ -619,6 +736,20 @@ tap_tx_queue_setup(struct rte_eth_dev *dev,
        return 0;
 }
 
+static int
+tap_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct ifreq ifr = { .ifr_mtu = mtu };
+       int err = 0;
+
+       err = tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1);
+       if (!err)
+               dev->data->mtu = mtu;
+
+       return err;
+}
+
 static const struct eth_dev_ops ops = {
        .dev_start              = tap_dev_start,
        .dev_stop               = tap_dev_stop,
@@ -636,6 +767,8 @@ static const struct eth_dev_ops ops = {
        .promiscuous_disable    = tap_promisc_disable,
        .allmulticast_enable    = tap_allmulti_enable,
        .allmulticast_disable   = tap_allmulti_disable,
+       .mac_addr_set           = tap_mac_set,
+       .mtu_set                = tap_mtu_set,
        .stats_get              = tap_stats_get,
        .stats_reset            = tap_stats_reset,
 };
@@ -663,7 +796,9 @@ eth_dev_tap_create(const char *name, char *tap_name)
                goto error_exit;
        }
 
-       dev = rte_eth_dev_allocate(tap_name);
+       /* name in allocation and data->name must be consistent */
+       snprintf(data->name, sizeof(data->name), "%s", name);
+       dev = rte_eth_dev_allocate(name);
        if (!dev) {
                RTE_LOG(ERR, PMD, "TAP Unable to allocate device struct\n");
                goto error_exit;
@@ -673,9 +808,18 @@ eth_dev_tap_create(const char *name, char *tap_name)
 
        pmd->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
 
+       pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0);
+       if (pmd->ioctl_sock == -1) {
+               RTE_LOG(ERR, PMD,
+                       "TAP Unable to get a socket for management: %s\n",
+                       strerror(errno));
+               goto error_exit;
+       }
+
        /* Setup some default values */
        data->dev_private = pmd;
        data->port_id = dev->data->port_id;
+       data->mtu = dev->data->mtu;
        data->dev_flags = RTE_ETH_DEV_DETACHABLE;
        data->kdrv = RTE_KDRV_NONE;
        data->drv_name = pmd_tap_drv.driver.name;
@@ -691,7 +835,6 @@ eth_dev_tap_create(const char *name, char *tap_name)
        dev->driver = NULL;
        dev->rx_pkt_burst = pmd_rx_burst;
        dev->tx_pkt_burst = pmd_tx_burst;
-       snprintf(dev->data->name, sizeof(dev->data->name), "%s", name);
 
        /* Presetup the fds to -1 as being not valid */
        for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
@@ -816,6 +959,7 @@ rte_pmd_tap_remove(const char *name)
                if (internals->rxq[i].fd != -1)
                        close(internals->rxq[i].fd);
 
+       close(internals->ioctl_sock);
        rte_free(eth_dev->data->dev_private);
        rte_free(eth_dev->data);