X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Ftap%2Frte_eth_tap.c;h=c515de3bf71d92b51189a283a429edb428032b21;hb=902fa8b50d609150f717394ba0c5b72890c66d9b;hp=2846ce0d3e5c0c61756b6ba48431ee8c1128ce58;hpb=cc6cf04f59ec20757990b3e8586b7c3a7f497720;p=dpdk.git diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c index 2846ce0d3e..c515de3bf7 100644 --- a/drivers/net/tap/rte_eth_tap.c +++ b/drivers/net/tap/rte_eth_tap.c @@ -7,8 +7,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -69,8 +70,23 @@ #define TAP_IOV_DEFAULT_MAX 1024 +#define TAP_RX_OFFLOAD (DEV_RX_OFFLOAD_SCATTER | \ + DEV_RX_OFFLOAD_IPV4_CKSUM | \ + DEV_RX_OFFLOAD_UDP_CKSUM | \ + DEV_RX_OFFLOAD_TCP_CKSUM) + +#define TAP_TX_OFFLOAD (DEV_TX_OFFLOAD_MULTI_SEGS | \ + DEV_TX_OFFLOAD_IPV4_CKSUM | \ + DEV_TX_OFFLOAD_UDP_CKSUM | \ + DEV_TX_OFFLOAD_TCP_CKSUM | \ + DEV_TX_OFFLOAD_TCP_TSO) + static int tap_devices_count; +static const char *tuntap_types[ETH_TUNTAP_TYPE_MAX] = { + "UNKNOWN", "TUN", "TAP" +}; + static const char *valid_arguments[] = { ETH_TAP_IFACE_ARG, ETH_TAP_REMOTE_ARG, @@ -133,7 +149,7 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive) #ifdef IFF_MULTI_QUEUE unsigned int features; #endif - int fd; + int fd, signo, flags; memset(&ifr, 0, sizeof(struct ifreq)); @@ -198,52 +214,87 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive) } } + flags = fcntl(fd, F_GETFL); + if (flags == -1) { + TAP_LOG(WARNING, + "Unable to get %s current flags\n", + ifr.ifr_name); + goto error; + } + /* Always set the file descriptor to non-blocking */ - if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) { + flags |= O_NONBLOCK; + if (fcntl(fd, F_SETFL, flags) < 0) { TAP_LOG(WARNING, "Unable to set %s to nonblocking: %s", ifr.ifr_name, strerror(errno)); goto error; } - /* Set up trigger to optimize empty Rx bursts */ - errno = 0; - do { + /* Find a free realtime signal */ + for (signo = SIGRTMIN + 1; signo < SIGRTMAX; signo++) { struct sigaction sa; - int flags = fcntl(fd, F_GETFL); - if (flags == -1 || sigaction(SIGIO, NULL, &sa) == -1) + if (sigaction(signo, NULL, &sa) == -1) { + TAP_LOG(WARNING, + "Unable to get current rt-signal %d handler", + signo); + goto error; + } + + /* Already have the handler we want on this signal */ + if (sa.sa_handler == tap_trigger_cb) break; - if (sa.sa_handler != tap_trigger_cb) { - /* - * Make sure SIGIO is not already taken. This is done - * as late as possible to leave the application a - * chance to set up its own signal handler first. - */ - if (sa.sa_handler != SIG_IGN && - sa.sa_handler != SIG_DFL) { - errno = EBUSY; - break; - } - sa = (struct sigaction){ - .sa_flags = SA_RESTART, - .sa_handler = tap_trigger_cb, - }; - if (sigaction(SIGIO, &sa, NULL) == -1) - break; + + /* Is handler in use by application */ + if (sa.sa_handler != SIG_DFL) { + TAP_LOG(DEBUG, + "Skipping used rt-signal %d", signo); + continue; } - /* Enable SIGIO on file descriptor */ - fcntl(fd, F_SETFL, flags | O_ASYNC); - fcntl(fd, F_SETOWN, getpid()); - } while (0); - if (errno) { + sa = (struct sigaction) { + .sa_flags = SA_RESTART, + .sa_handler = tap_trigger_cb, + }; + + if (sigaction(signo, &sa, NULL) == -1) { + TAP_LOG(WARNING, + "Unable to set rt-signal %d handler\n", signo); + goto error; + } + + /* Found a good signal to use */ + TAP_LOG(DEBUG, + "Using rt-signal %d", signo); + break; + } + + if (signo == SIGRTMAX) { + TAP_LOG(WARNING, "All rt-signals are in use\n"); + /* Disable trigger globally in case of error */ tap_trigger = 0; - TAP_LOG(WARNING, "Rx trigger disabled: %s", - strerror(errno)); - } + TAP_LOG(NOTICE, "No Rx trigger signal available\n"); + } else { + /* Enable signal on file descriptor */ + if (fcntl(fd, F_SETSIG, signo) < 0) { + TAP_LOG(WARNING, "Unable to set signo %d for fd %d: %s", + signo, fd, strerror(errno)); + goto error; + } + if (fcntl(fd, F_SETFL, flags | O_ASYNC) < 0) { + TAP_LOG(WARNING, "Unable to set fcntl flags: %s", + strerror(errno)); + goto error; + } + if (fcntl(fd, F_SETOWN, getpid()) < 0) { + TAP_LOG(WARNING, "Unable to set fcntl owner: %s", + strerror(errno)); + goto error; + } + } return fd; error: @@ -263,6 +314,7 @@ tap_verify_csum(struct rte_mbuf *mbuf) uint16_t cksum = 0; void *l3_hdr; void *l4_hdr; + struct rte_udp_hdr *udp_hdr; if (l2 == RTE_PTYPE_L2_ETHER_VLAN) l2_len += 4; @@ -276,8 +328,7 @@ tap_verify_csum(struct rte_mbuf *mbuf) if (l3 == RTE_PTYPE_L3_IPV4 || l3 == RTE_PTYPE_L3_IPV4_EXT) { struct rte_ipv4_hdr *iph = l3_hdr; - /* ihl contains the number of 4-byte words in the header */ - l3_len = 4 * (iph->version_ihl & 0xf); + l3_len = rte_ipv4_hdr_len(iph); if (unlikely(l2_len + l3_len > rte_pktmbuf_data_len(mbuf))) return; /* check that the total length reported by header is not @@ -302,42 +353,44 @@ tap_verify_csum(struct rte_mbuf *mbuf) rte_pktmbuf_data_len(mbuf)) return; } else { - /* IPv6 extensions are not supported */ + /* - RTE_PTYPE_L3_IPV4_EXT_UNKNOWN cannot happen because + * mbuf->packet_type is filled by rte_net_get_ptype() which + * never returns this value. + * - IPv6 extensions are not supported. + */ return; } if (l4 == RTE_PTYPE_L4_UDP || l4 == RTE_PTYPE_L4_TCP) { + int cksum_ok; + l4_hdr = rte_pktmbuf_mtod_offset(mbuf, void *, l2_len + l3_len); /* Don't verify checksum for multi-segment packets. */ if (mbuf->nb_segs > 1) return; - if (l3 == RTE_PTYPE_L3_IPV4) - cksum = ~rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr); - else if (l3 == RTE_PTYPE_L3_IPV6) - cksum = ~rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr); - mbuf->ol_flags |= cksum ? - PKT_RX_L4_CKSUM_BAD : - PKT_RX_L4_CKSUM_GOOD; + if (l3 == RTE_PTYPE_L3_IPV4 || l3 == RTE_PTYPE_L3_IPV4_EXT) { + if (l4 == RTE_PTYPE_L4_UDP) { + udp_hdr = (struct rte_udp_hdr *)l4_hdr; + if (udp_hdr->dgram_cksum == 0) { + /* + * For IPv4, a zero UDP checksum + * indicates that the sender did not + * generate one [RFC 768]. + */ + mbuf->ol_flags |= PKT_RX_L4_CKSUM_NONE; + return; + } + } + cksum_ok = !rte_ipv4_udptcp_cksum_verify(l3_hdr, + l4_hdr); + } else { /* l3 == RTE_PTYPE_L3_IPV6, checked above */ + cksum_ok = !rte_ipv6_udptcp_cksum_verify(l3_hdr, + l4_hdr); + } + mbuf->ol_flags |= cksum_ok ? + PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD; } } -static uint64_t -tap_rx_offload_get_port_capa(void) -{ - /* - * No specific port Rx offload capabilities. - */ - return 0; -} - -static uint64_t -tap_rx_offload_get_queue_capa(void) -{ - return DEV_RX_OFFLOAD_SCATTER | - DEV_RX_OFFLOAD_IPV4_CKSUM | - DEV_RX_OFFLOAD_UDP_CKSUM | - DEV_RX_OFFLOAD_TCP_CKSUM; -} - static void tap_rxq_pool_free(struct rte_mbuf *pool) { @@ -453,25 +506,6 @@ end: return num_rx; } -static uint64_t -tap_tx_offload_get_port_capa(void) -{ - /* - * No specific port Tx offload capabilities. - */ - return 0; -} - -static uint64_t -tap_tx_offload_get_queue_capa(void) -{ - return DEV_TX_OFFLOAD_MULTI_SEGS | - DEV_TX_OFFLOAD_IPV4_CKSUM | - DEV_TX_OFFLOAD_UDP_CKSUM | - DEV_TX_OFFLOAD_TCP_CKSUM | - DEV_TX_OFFLOAD_TCP_TSO; -} - /* Finalize l4 checksum calculation */ static void tap_tx_l4_cksum(uint16_t *l4_cksum, uint16_t l4_phdr_cksum, @@ -712,8 +746,16 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) if (num_tso_mbufs < 0) break; - mbuf = gso_mbufs; - num_mbufs = num_tso_mbufs; + if (num_tso_mbufs >= 1) { + mbuf = gso_mbufs; + num_mbufs = num_tso_mbufs; + } else { + /* 0 means it can be transmitted directly + * without gso. + */ + mbuf = &mbuf_in; + num_mbufs = 1; + } } else { /* stats.errs will be incremented */ if (rte_pktmbuf_pkt_len(mbuf_in) > max_size) @@ -861,7 +903,7 @@ tap_dev_start(struct rte_eth_dev *dev) /* This function gets called when the current port gets stopped. */ -static void +static int tap_dev_stop(struct rte_eth_dev *dev) { int i; @@ -873,6 +915,8 @@ tap_dev_stop(struct rte_eth_dev *dev) tap_intr_handle_set(dev, 0); tap_link_set_down(dev); + + return 0; } static int @@ -952,12 +996,10 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->max_tx_queues = RTE_PMD_TAP_MAX_QUEUES; dev_info->min_rx_bufsize = 0; dev_info->speed_capa = tap_dev_speed_capa(); - dev_info->rx_queue_offload_capa = tap_rx_offload_get_queue_capa(); - dev_info->rx_offload_capa = tap_rx_offload_get_port_capa() | - dev_info->rx_queue_offload_capa; - dev_info->tx_queue_offload_capa = tap_tx_offload_get_queue_capa(); - dev_info->tx_offload_capa = tap_tx_offload_get_port_capa() | - dev_info->tx_queue_offload_capa; + dev_info->rx_queue_offload_capa = TAP_RX_OFFLOAD; + dev_info->rx_offload_capa = dev_info->rx_queue_offload_capa; + dev_info->tx_queue_offload_capa = TAP_TX_OFFLOAD; + dev_info->tx_offload_capa = dev_info->tx_queue_offload_capa; dev_info->hash_key_size = TAP_RSS_HASH_KEY_SIZE; /* * limitation: TAP supports all of IP, UDP and TCP hash @@ -1031,7 +1073,7 @@ tap_stats_reset(struct rte_eth_dev *dev) return 0; } -static void +static int tap_dev_close(struct rte_eth_dev *dev) { int i; @@ -1039,6 +1081,11 @@ tap_dev_close(struct rte_eth_dev *dev) struct pmd_process_private *process_private = dev->process_private; struct rx_queue *rxq; + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + rte_free(dev->process_private); + return 0; + } + tap_link_set_down(dev); if (internals->nlsk_fd != -1) { tap_flow_flush(dev, NULL); @@ -1065,18 +1112,42 @@ tap_dev_close(struct rte_eth_dev *dev) if (internals->remote_if_index) { /* Restore initial remote state */ - ioctl(internals->ioctl_sock, SIOCSIFFLAGS, + int ret = ioctl(internals->ioctl_sock, SIOCSIFFLAGS, &internals->remote_initial_flags); + if (ret) + TAP_LOG(ERR, "restore remote state failed: %d", ret); + } + rte_mempool_free(internals->gso_ctx_mp); + internals->gso_ctx_mp = NULL; + if (internals->ka_fd != -1) { close(internals->ka_fd); internals->ka_fd = -1; } + + /* mac_addrs must not be freed alone because part of dev_private */ + dev->data->mac_addrs = NULL; + + internals = dev->data->dev_private; + TAP_LOG(DEBUG, "Closing %s Ethernet device on numa %u", + tuntap_types[internals->type], rte_socket_id()); + + if (internals->ioctl_sock != -1) { + close(internals->ioctl_sock); + internals->ioctl_sock = -1; + } + rte_free(dev->process_private); + if (tap_devices_count == 1) + rte_mp_action_unregister(TAP_MP_KEY); + tap_devices_count--; /* * Since TUN device has no more opened file descriptors * it will be removed from kernel */ + + return 0; } static void @@ -1088,7 +1159,7 @@ tap_rx_queue_release(void *queue) if (!rxq) return; process_private = rte_eth_devices[rxq->in_port].process_private; - if (process_private->rxq_fds[rxq->queue_id] > 0) { + if (process_private->rxq_fds[rxq->queue_id] != -1) { close(process_private->rxq_fds[rxq->queue_id]); process_private->rxq_fds[rxq->queue_id] = -1; tap_rxq_pool_free(rxq->pool); @@ -1108,7 +1179,7 @@ tap_tx_queue_release(void *queue) return; process_private = rte_eth_devices[txq->out_port].process_private; - if (process_private->txq_fds[txq->queue_id] > 0) { + if (process_private->txq_fds[txq->queue_id] != -1) { close(process_private->txq_fds[txq->queue_id]); process_private->txq_fds[txq->queue_id] = -1; } @@ -1316,26 +1387,31 @@ tap_gso_ctx_setup(struct rte_gso_ctx *gso_ctx, struct rte_eth_dev *dev) { uint32_t gso_types; char pool_name[64]; - - /* - * Create private mbuf pool with TAP_GSO_MBUF_SEG_SIZE bytes - * size per mbuf use this pool for both direct and indirect mbufs - */ - - struct rte_mempool *mp; /* Mempool for GSO packets */ + struct pmd_internals *pmd = dev->data->dev_private; + int ret; /* initialize GSO context */ gso_types = DEV_TX_OFFLOAD_TCP_TSO; - snprintf(pool_name, sizeof(pool_name), "mp_%s", dev->device->name); - mp = rte_mempool_lookup((const char *)pool_name); - if (!mp) { - mp = rte_pktmbuf_pool_create(pool_name, TAP_GSO_MBUFS_NUM, - TAP_GSO_MBUF_CACHE_SIZE, 0, + if (!pmd->gso_ctx_mp) { + /* + * Create private mbuf pool with TAP_GSO_MBUF_SEG_SIZE + * bytes size per mbuf use this pool for both direct and + * indirect mbufs + */ + ret = snprintf(pool_name, sizeof(pool_name), "mp_%s", + dev->device->name); + if (ret < 0 || ret >= (int)sizeof(pool_name)) { + TAP_LOG(ERR, + "%s: failed to create mbuf pool name for device %s," + "device name too long or output error, ret: %d\n", + pmd->name, dev->device->name, ret); + return -ENAMETOOLONG; + } + pmd->gso_ctx_mp = rte_pktmbuf_pool_create(pool_name, + TAP_GSO_MBUFS_NUM, TAP_GSO_MBUF_CACHE_SIZE, 0, RTE_PKTMBUF_HEADROOM + TAP_GSO_MBUF_SEG_SIZE, SOCKET_ID_ANY); - if (!mp) { - struct pmd_internals *pmd = dev->data->dev_private; - + if (!pmd->gso_ctx_mp) { TAP_LOG(ERR, "%s: failed to create mbuf pool for device %s\n", pmd->name, dev->device->name); @@ -1343,8 +1419,8 @@ tap_gso_ctx_setup(struct rte_gso_ctx *gso_ctx, struct rte_eth_dev *dev) } } - gso_ctx->direct_pool = mp; - gso_ctx->indirect_pool = mp; + gso_ctx->direct_pool = pmd->gso_ctx_mp; + gso_ctx->indirect_pool = pmd->gso_ctx_mp; gso_ctx->gso_types = gso_types; gso_ctx->gso_size = 0; /* gso_size is set in tx_burst() per packet */ gso_ctx->flag = 0; @@ -1599,13 +1675,12 @@ static int tap_lsc_intr_handle_set(struct rte_eth_dev *dev, int set) { struct pmd_internals *pmd = dev->data->dev_private; + int ret; /* In any case, disable interrupt if the conf is no longer there. */ if (!dev->data->dev_conf.intr_conf.lsc) { if (pmd->intr_handle.fd != -1) { - tap_nl_final(pmd->intr_handle.fd); - rte_intr_callback_unregister(&pmd->intr_handle, - tap_dev_intr_handler, dev); + goto clean; } return 0; } @@ -1616,9 +1691,26 @@ tap_lsc_intr_handle_set(struct rte_eth_dev *dev, int set) return rte_intr_callback_register( &pmd->intr_handle, tap_dev_intr_handler, dev); } + +clean: + do { + ret = rte_intr_callback_unregister(&pmd->intr_handle, + tap_dev_intr_handler, dev); + if (ret >= 0) { + break; + } else if (ret == -EAGAIN) { + rte_delay_ms(100); + } else { + TAP_LOG(ERR, "intr callback unregister failed: %d", + ret); + break; + } + } while (true); + tap_nl_final(pmd->intr_handle.fd); - return rte_intr_callback_unregister(&pmd->intr_handle, - tap_dev_intr_handler, dev); + pmd->intr_handle.fd = -1; + + return 0; } static int @@ -1780,11 +1872,7 @@ static const struct eth_dev_ops ops = { .stats_reset = tap_stats_reset, .dev_supported_ptypes_get = tap_dev_supported_ptypes_get, .rss_hash_update = tap_rss_hash_update, - .filter_ctrl = tap_dev_filter_ctrl, -}; - -static const char *tuntap_types[ETH_TUNTAP_TYPE_MAX] = { - "UNKNOWN", "TUN", "TAP" + .flow_ops_get = tap_dev_flow_ops_get, }; static int @@ -1823,6 +1911,9 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name, pmd->dev = dev; strlcpy(pmd->name, tap_name, sizeof(pmd->name)); pmd->type = type; + pmd->ka_fd = -1; + pmd->nlsk_fd = -1; + pmd->gso_ctx_mp = NULL; pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0); if (pmd->ioctl_sock == -1) { @@ -1835,7 +1926,8 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name, /* Setup some default values */ data = dev->data; data->dev_private = pmd; - data->dev_flags = RTE_ETH_DEV_INTR_LSC; + data->dev_flags = RTE_ETH_DEV_INTR_LSC | + RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; data->numa_node = numa_node; data->dev_link = pmd_link; @@ -1853,7 +1945,6 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name, dev->intr_handle = &pmd->intr_handle; /* Presetup the fds to -1 as being not valid */ - pmd->ka_fd = -1; for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { process_private->rxq_fds[i] = -1; process_private->txq_fds[i] = -1; @@ -1993,7 +2084,11 @@ error_remote: tap_flow_implicit_flush(pmd, NULL); error_exit: - if (pmd->ioctl_sock > 0) + if (pmd->nlsk_fd != -1) + close(pmd->nlsk_fd); + if (pmd->ka_fd != -1) + close(pmd->ka_fd); + if (pmd->ioctl_sock != -1) close(pmd->ioctl_sock); /* mac_addrs must not be freed alone because part of dev_private */ dev->data->mac_addrs = NULL; @@ -2424,36 +2519,15 @@ static int rte_pmd_tap_remove(struct rte_vdev_device *dev) { struct rte_eth_dev *eth_dev = NULL; - struct pmd_internals *internals; /* find the ethdev entry */ eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev)); if (!eth_dev) - return -ENODEV; - - /* mac_addrs must not be freed alone because part of dev_private */ - eth_dev->data->mac_addrs = NULL; - - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return rte_eth_dev_release_port(eth_dev); + return 0; tap_dev_close(eth_dev); - - internals = eth_dev->data->dev_private; - TAP_LOG(DEBUG, "Closing %s Ethernet device on numa %u", - tuntap_types[internals->type], rte_socket_id()); - - close(internals->ioctl_sock); - rte_free(eth_dev->process_private); - if (tap_devices_count == 1) - rte_mp_action_unregister(TAP_MP_KEY); - tap_devices_count--; rte_eth_dev_release_port(eth_dev); - if (internals->ka_fd != -1) { - close(internals->ka_fd); - internals->ka_fd = -1; - } return 0; } @@ -2476,11 +2550,4 @@ RTE_PMD_REGISTER_PARAM_STRING(net_tap, ETH_TAP_IFACE_ARG "= " ETH_TAP_MAC_ARG "=" ETH_TAP_MAC_ARG_FMT " " ETH_TAP_REMOTE_ARG "="); -int tap_logtype; - -RTE_INIT(tap_init_log) -{ - tap_logtype = rte_log_register("pmd.net.tap"); - if (tap_logtype >= 0) - rte_log_set_level(tap_logtype, RTE_LOG_NOTICE); -} +RTE_LOG_REGISTER_DEFAULT(tap_logtype, NOTICE);