X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Ftap%2Frte_eth_tap.c;h=b19e26ba0e65e45cd0362d91215dea11e0cd42aa;hb=25ae7f1a5d9d127a46f8d62d1d689f77a78138fd;hp=64bd049110e17ee97232987c85307ec85a4d7c54;hpb=0310fd25b4a9c3ebb604b1447c32149b085a548a;p=dpdk.git diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c index 64bd049110..b19e26ba0e 100644 --- a/drivers/net/tap/rte_eth_tap.c +++ b/drivers/net/tap/rte_eth_tap.c @@ -18,8 +18,8 @@ #include #include #include +#include -#include #include #include #include @@ -134,7 +134,7 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive) #ifdef IFF_MULTI_QUEUE unsigned int features; #endif - int fd; + int fd, signo, flags; memset(&ifr, 0, sizeof(struct ifreq)); @@ -199,52 +199,87 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive) } } + flags = fcntl(fd, F_GETFL); + if (flags == -1) { + TAP_LOG(WARNING, + "Unable to get %s current flags\n", + ifr.ifr_name); + goto error; + } + /* Always set the file descriptor to non-blocking */ - if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) { + flags |= O_NONBLOCK; + if (fcntl(fd, F_SETFL, flags) < 0) { TAP_LOG(WARNING, "Unable to set %s to nonblocking: %s", ifr.ifr_name, strerror(errno)); goto error; } - /* Set up trigger to optimize empty Rx bursts */ - errno = 0; - do { + /* Find a free realtime signal */ + for (signo = SIGRTMIN + 1; signo < SIGRTMAX; signo++) { struct sigaction sa; - int flags = fcntl(fd, F_GETFL); - if (flags == -1 || sigaction(SIGIO, NULL, &sa) == -1) + if (sigaction(signo, NULL, &sa) == -1) { + TAP_LOG(WARNING, + "Unable to get current rt-signal %d handler", + signo); + goto error; + } + + /* Already have the handler we want on this signal */ + if (sa.sa_handler == tap_trigger_cb) break; - if (sa.sa_handler != tap_trigger_cb) { - /* - * Make sure SIGIO is not already taken. This is done - * as late as possible to leave the application a - * chance to set up its own signal handler first. - */ - if (sa.sa_handler != SIG_IGN && - sa.sa_handler != SIG_DFL) { - errno = EBUSY; - break; - } - sa = (struct sigaction){ - .sa_flags = SA_RESTART, - .sa_handler = tap_trigger_cb, - }; - if (sigaction(SIGIO, &sa, NULL) == -1) - break; + + /* Is handler in use by application */ + if (sa.sa_handler != SIG_DFL) { + TAP_LOG(DEBUG, + "Skipping used rt-signal %d", signo); + continue; } - /* Enable SIGIO on file descriptor */ - fcntl(fd, F_SETFL, flags | O_ASYNC); - fcntl(fd, F_SETOWN, getpid()); - } while (0); - if (errno) { + sa = (struct sigaction) { + .sa_flags = SA_RESTART, + .sa_handler = tap_trigger_cb, + }; + + if (sigaction(signo, &sa, NULL) == -1) { + TAP_LOG(WARNING, + "Unable to set rt-signal %d handler\n", signo); + goto error; + } + + /* Found a good signal to use */ + TAP_LOG(DEBUG, + "Using rt-signal %d", signo); + break; + } + + if (signo == SIGRTMAX) { + TAP_LOG(WARNING, "All rt-signals are in use\n"); + /* Disable trigger globally in case of error */ tap_trigger = 0; - TAP_LOG(WARNING, "Rx trigger disabled: %s", - strerror(errno)); - } + TAP_LOG(NOTICE, "No Rx trigger signal available\n"); + } else { + /* Enable signal on file descriptor */ + if (fcntl(fd, F_SETSIG, signo) < 0) { + TAP_LOG(WARNING, "Unable to set signo %d for fd %d: %s", + signo, fd, strerror(errno)); + goto error; + } + if (fcntl(fd, F_SETFL, flags | O_ASYNC) < 0) { + TAP_LOG(WARNING, "Unable to set fcntl flags: %s", + strerror(errno)); + goto error; + } + if (fcntl(fd, F_SETOWN, getpid()) < 0) { + TAP_LOG(WARNING, "Unable to set fcntl owner: %s", + strerror(errno)); + goto error; + } + } return fd; error: @@ -339,6 +374,23 @@ tap_rx_offload_get_queue_capa(void) DEV_RX_OFFLOAD_TCP_CKSUM; } +static void +tap_rxq_pool_free(struct rte_mbuf *pool) +{ + struct rte_mbuf *mbuf = pool; + uint16_t nb_segs = 1; + + if (mbuf == NULL) + return; + + while (mbuf->next) { + mbuf = mbuf->next; + nb_segs++; + } + pool->nb_segs = nb_segs; + rte_pktmbuf_free(pool); +} + /* Callback to handle the rx burst of packets to the correct interface and * file descriptor(s) in a multi-queue setup. */ @@ -353,10 +405,8 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) if (trigger == rxq->trigger_seen) return 0; - if (trigger) - rxq->trigger_seen = trigger; + process_private = rte_eth_devices[rxq->in_port].process_private; - rte_compiler_barrier(); for (num_rx = 0; num_rx < nb_pkts; ) { struct rte_mbuf *mbuf = rxq->pool; struct rte_mbuf *seg = NULL; @@ -391,7 +441,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) goto end; seg->next = NULL; - rte_pktmbuf_free(mbuf); + tap_rxq_pool_free(mbuf); goto end; } @@ -433,6 +483,9 @@ end: rxq->stats.ipackets += num_rx; rxq->stats.ibytes += num_rx_bytes; + if (trigger && num_rx < nb_pkts) + rxq->trigger_seen = trigger; + return num_rx; } @@ -520,7 +573,7 @@ tap_tx_l3_cksum(char *packet, uint64_t ol_flags, unsigned int l2_len, } } -static inline void +static inline int tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, struct rte_mbuf **pmbufs, uint16_t *num_packets, unsigned long *num_tx_bytes) @@ -587,7 +640,7 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, seg_len = rte_pktmbuf_data_len(mbuf); l234_hlen = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; if (seg_len < l234_hlen) - break; + return -1; /* To change checksums, work on a * copy of l2, l3 * headers + l4 pseudo header @@ -633,10 +686,12 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, /* copy the tx frame data */ n = writev(process_private->txq_fds[txq->queue_id], iovecs, j); if (n <= 0) - break; + return -1; + (*num_packets)++; (*num_tx_bytes) += rte_pktmbuf_pkt_len(mbuf); } + return 0; } /* Callback to handle sending packets from the tap interface @@ -662,16 +717,14 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) uint16_t num_mbufs = 0; uint16_t tso_segsz = 0; int ret; + int num_tso_mbufs; uint16_t hdrs_len; - int j; uint64_t tso; tso = mbuf_in->ol_flags & PKT_TX_TCP_SEG; if (tso) { struct rte_gso_ctx *gso_ctx = &txq->gso_ctx; - assert(gso_ctx != NULL); - /* TCP segmentation implies TCP checksum offload */ mbuf_in->ol_flags |= PKT_TX_TCP_CKSUM; @@ -685,43 +738,51 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) break; } gso_ctx->gso_size = tso_segsz; - ret = rte_gso_segment(mbuf_in, /* packet to segment */ + /* 'mbuf_in' packet to segment */ + num_tso_mbufs = rte_gso_segment(mbuf_in, gso_ctx, /* gso control block */ (struct rte_mbuf **)&gso_mbufs, /* out mbufs */ RTE_DIM(gso_mbufs)); /* max tso mbufs */ /* ret contains the number of new created mbufs */ - if (ret < 0) + if (num_tso_mbufs < 0) break; mbuf = gso_mbufs; - num_mbufs = ret; + num_mbufs = num_tso_mbufs; } else { /* stats.errs will be incremented */ if (rte_pktmbuf_pkt_len(mbuf_in) > max_size) break; /* ret 0 indicates no new mbufs were created */ - ret = 0; + num_tso_mbufs = 0; mbuf = &mbuf_in; num_mbufs = 1; } - tap_write_mbufs(txq, num_mbufs, mbuf, + ret = tap_write_mbufs(txq, num_mbufs, mbuf, &num_packets, &num_tx_bytes); + if (ret == -1) { + txq->stats.errs++; + /* free tso mbufs */ + if (num_tso_mbufs > 0) + rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs); + break; + } num_tx++; /* free original mbuf */ rte_pktmbuf_free(mbuf_in); /* free tso mbufs */ - for (j = 0; j < ret; j++) - rte_pktmbuf_free(mbuf[j]); + if (num_tso_mbufs > 0) + rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs); } txq->stats.opackets += num_packets; txq->stats.errs += nb_pkts - num_tx; txq->stats.obytes += num_tx_bytes; - return num_packets; + return num_tx; } static const char * @@ -777,7 +838,7 @@ apply: case SIOCSIFMTU: break; default: - RTE_LOG(WARNING, PMD, "%s: ioctl() called with wrong arg\n", + TAP_LOG(WARNING, "%s: ioctl() called with wrong arg", pmd->name); return -EINVAL; } @@ -915,7 +976,7 @@ tap_dev_speed_capa(void) return capa; } -static void +static int tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct pmd_internals *internals = dev->data->dev_private; @@ -939,6 +1000,8 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) * functions together and not in partial combinations */ dev_info->flow_type_rss_offloads = ~TAP_RSS_HF_MASK; + + return 0; } static int @@ -984,7 +1047,7 @@ tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats) return 0; } -static void +static int tap_stats_reset(struct rte_eth_dev *dev) { int i; @@ -1000,6 +1063,8 @@ tap_stats_reset(struct rte_eth_dev *dev) pmd->txq[i].stats.errs = 0; pmd->txq[i].stats.obytes = 0; } + + return 0; } static void @@ -1008,15 +1073,25 @@ tap_dev_close(struct rte_eth_dev *dev) int i; struct pmd_internals *internals = dev->data->dev_private; struct pmd_process_private *process_private = dev->process_private; + struct rx_queue *rxq; tap_link_set_down(dev); - tap_flow_flush(dev, NULL); - tap_flow_implicit_flush(internals, NULL); + if (internals->nlsk_fd != -1) { + tap_flow_flush(dev, NULL); + tap_flow_implicit_flush(internals, NULL); + tap_nl_final(internals->nlsk_fd); + internals->nlsk_fd = -1; + } for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { if (process_private->rxq_fds[i] != -1) { + rxq = &internals->rxq[i]; close(process_private->rxq_fds[i]); process_private->rxq_fds[i] = -1; + tap_rxq_pool_free(rxq->pool); + rte_free(rxq->iovecs); + rxq->pool = NULL; + rxq->iovecs = NULL; } if (process_private->txq_fds[i] != -1) { close(process_private->txq_fds[i]); @@ -1049,10 +1124,10 @@ tap_rx_queue_release(void *queue) if (!rxq) return; process_private = rte_eth_devices[rxq->in_port].process_private; - if (process_private->rxq_fds[rxq->queue_id] > 0) { + if (process_private->rxq_fds[rxq->queue_id] != -1) { close(process_private->rxq_fds[rxq->queue_id]); process_private->rxq_fds[rxq->queue_id] = -1; - rte_pktmbuf_free(rxq->pool); + tap_rxq_pool_free(rxq->pool); rte_free(rxq->iovecs); rxq->pool = NULL; rxq->iovecs = NULL; @@ -1069,7 +1144,7 @@ tap_tx_queue_release(void *queue) return; process_private = rte_eth_devices[txq->out_port].process_private; - if (process_private->txq_fds[txq->queue_id] > 0) { + if (process_private->txq_fds[txq->queue_id] != -1) { close(process_private->txq_fds[txq->queue_id]); process_private->txq_fds[txq->queue_id] = -1; } @@ -1098,52 +1173,116 @@ tap_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused) return 0; } -static void +static int tap_promisc_enable(struct rte_eth_dev *dev) { struct pmd_internals *pmd = dev->data->dev_private; struct ifreq ifr = { .ifr_flags = IFF_PROMISC }; + int ret; + + ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); + if (ret != 0) + return ret; + + if (pmd->remote_if_index && !pmd->flow_isolate) { + dev->data->promiscuous = 1; + ret = tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC); + if (ret != 0) { + /* Rollback promisc flag */ + tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); + /* + * rte_eth_dev_promiscuous_enable() rollback + * dev->data->promiscuous in the case of failure. + */ + return ret; + } + } - dev->data->promiscuous = 1; - tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); - if (pmd->remote_if_index && !pmd->flow_isolate) - tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC); + return 0; } -static void +static int tap_promisc_disable(struct rte_eth_dev *dev) { struct pmd_internals *pmd = dev->data->dev_private; struct ifreq ifr = { .ifr_flags = IFF_PROMISC }; + int ret; + + ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); + if (ret != 0) + return ret; + + if (pmd->remote_if_index && !pmd->flow_isolate) { + dev->data->promiscuous = 0; + ret = tap_flow_implicit_destroy(pmd, TAP_REMOTE_PROMISC); + if (ret != 0) { + /* Rollback promisc flag */ + tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); + /* + * rte_eth_dev_promiscuous_disable() rollback + * dev->data->promiscuous in the case of failure. + */ + return ret; + } + } - dev->data->promiscuous = 0; - tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); - if (pmd->remote_if_index && !pmd->flow_isolate) - tap_flow_implicit_destroy(pmd, TAP_REMOTE_PROMISC); + return 0; } -static void +static int tap_allmulti_enable(struct rte_eth_dev *dev) { struct pmd_internals *pmd = dev->data->dev_private; struct ifreq ifr = { .ifr_flags = IFF_ALLMULTI }; + int ret; + + ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); + if (ret != 0) + return ret; + + if (pmd->remote_if_index && !pmd->flow_isolate) { + dev->data->all_multicast = 1; + ret = tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI); + if (ret != 0) { + /* Rollback allmulti flag */ + tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); + /* + * rte_eth_dev_allmulticast_enable() rollback + * dev->data->all_multicast in the case of failure. + */ + return ret; + } + } - dev->data->all_multicast = 1; - tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); - if (pmd->remote_if_index && !pmd->flow_isolate) - tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI); + return 0; } -static void +static int tap_allmulti_disable(struct rte_eth_dev *dev) { struct pmd_internals *pmd = dev->data->dev_private; struct ifreq ifr = { .ifr_flags = IFF_ALLMULTI }; + int ret; + + ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); + if (ret != 0) + return ret; - dev->data->all_multicast = 0; - tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); - if (pmd->remote_if_index && !pmd->flow_isolate) - tap_flow_implicit_destroy(pmd, TAP_REMOTE_ALLMULTI); + if (pmd->remote_if_index && !pmd->flow_isolate) { + dev->data->all_multicast = 0; + ret = tap_flow_implicit_destroy(pmd, TAP_REMOTE_ALLMULTI); + if (ret != 0) { + /* Rollback allmulti flag */ + tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); + /* + * rte_eth_dev_allmulticast_disable() rollback + * dev->data->all_multicast in the case of failure. + */ + return ret; + } + } + + return 0; } static int @@ -1232,7 +1371,9 @@ tap_gso_ctx_setup(struct rte_gso_ctx *gso_ctx, struct rte_eth_dev *dev) SOCKET_ID_ANY); if (!mp) { struct pmd_internals *pmd = dev->data->dev_private; - RTE_LOG(DEBUG, PMD, "%s: failed to create mbuf pool for device %s\n", + + TAP_LOG(ERR, + "%s: failed to create mbuf pool for device %s\n", pmd->name, dev->device->name); return -1; } @@ -1396,7 +1537,7 @@ tap_rx_queue_setup(struct rte_eth_dev *dev, return 0; error: - rte_pktmbuf_free(rxq->pool); + tap_rxq_pool_free(rxq->pool); rxq->pool = NULL; rte_free(rxq->iovecs); rxq->iovecs = NULL; @@ -1494,13 +1635,12 @@ static int tap_lsc_intr_handle_set(struct rte_eth_dev *dev, int set) { struct pmd_internals *pmd = dev->data->dev_private; + int ret; /* In any case, disable interrupt if the conf is no longer there. */ if (!dev->data->dev_conf.intr_conf.lsc) { if (pmd->intr_handle.fd != -1) { - tap_nl_final(pmd->intr_handle.fd); - rte_intr_callback_unregister(&pmd->intr_handle, - tap_dev_intr_handler, dev); + goto clean; } return 0; } @@ -1511,9 +1651,26 @@ tap_lsc_intr_handle_set(struct rte_eth_dev *dev, int set) return rte_intr_callback_register( &pmd->intr_handle, tap_dev_intr_handler, dev); } + +clean: + do { + ret = rte_intr_callback_unregister(&pmd->intr_handle, + tap_dev_intr_handler, dev); + if (ret >= 0) { + break; + } else if (ret == -EAGAIN) { + rte_delay_ms(100); + } else { + TAP_LOG(ERR, "intr callback unregister failed: %d", + ret); + break; + } + } while (true); + tap_nl_final(pmd->intr_handle.fd); - return rte_intr_callback_unregister(&pmd->intr_handle, - tap_dev_intr_handler, dev); + pmd->intr_handle.fd = -1; + + return 0; } static int @@ -1522,8 +1679,11 @@ tap_intr_handle_set(struct rte_eth_dev *dev, int set) int err; err = tap_lsc_intr_handle_set(dev, set); - if (err) + if (err < 0) { + if (!set) + tap_rx_intr_vec_set(dev, 0); return err; + } err = tap_rx_intr_vec_set(dev, set); if (err && set) tap_lsc_intr_handle_set(dev, 0); @@ -1715,6 +1875,8 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name, pmd->dev = dev; strlcpy(pmd->name, tap_name, sizeof(pmd->name)); pmd->type = type; + pmd->ka_fd = -1; + pmd->nlsk_fd = -1; pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0); if (pmd->ioctl_sock == -1) { @@ -1745,7 +1907,6 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name, dev->intr_handle = &pmd->intr_handle; /* Presetup the fds to -1 as being not valid */ - pmd->ka_fd = -1; for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { process_private->rxq_fds[i] = -1; process_private->txq_fds[i] = -1; @@ -1885,7 +2046,11 @@ error_remote: tap_flow_implicit_flush(pmd, NULL); error_exit: - if (pmd->ioctl_sock > 0) + if (pmd->nlsk_fd != -1) + close(pmd->nlsk_fd); + if (pmd->ka_fd != -1) + close(pmd->ka_fd); + if (pmd->ioctl_sock != -1) close(pmd->ioctl_sock); /* mac_addrs must not be freed alone because part of dev_private */ dev->data->mac_addrs = NULL; @@ -2317,8 +2482,6 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev) { struct rte_eth_dev *eth_dev = NULL; struct pmd_internals *internals; - struct pmd_process_private *process_private; - int i; /* find the ethdev entry */ eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev)); @@ -2331,28 +2494,12 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev) if (rte_eal_process_type() != RTE_PROC_PRIMARY) return rte_eth_dev_release_port(eth_dev); - internals = eth_dev->data->dev_private; - process_private = eth_dev->process_private; + tap_dev_close(eth_dev); + internals = eth_dev->data->dev_private; TAP_LOG(DEBUG, "Closing %s Ethernet device on numa %u", tuntap_types[internals->type], rte_socket_id()); - if (internals->nlsk_fd) { - tap_flow_flush(eth_dev, NULL); - tap_flow_implicit_flush(internals, NULL); - tap_nl_final(internals->nlsk_fd); - } - for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { - if (process_private->rxq_fds[i] != -1) { - close(process_private->rxq_fds[i]); - process_private->rxq_fds[i] = -1; - } - if (process_private->txq_fds[i] != -1) { - close(process_private->txq_fds[i]); - process_private->txq_fds[i] = -1; - } - } - close(internals->ioctl_sock); rte_free(eth_dev->process_private); if (tap_devices_count == 1) @@ -2360,10 +2507,6 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev) tap_devices_count--; rte_eth_dev_release_port(eth_dev); - if (internals->ka_fd != -1) { - close(internals->ka_fd); - internals->ka_fd = -1; - } return 0; } @@ -2386,11 +2529,4 @@ RTE_PMD_REGISTER_PARAM_STRING(net_tap, ETH_TAP_IFACE_ARG "= " ETH_TAP_MAC_ARG "=" ETH_TAP_MAC_ARG_FMT " " ETH_TAP_REMOTE_ARG "="); -int tap_logtype; - -RTE_INIT(tap_init_log) -{ - tap_logtype = rte_log_register("pmd.net.tap"); - if (tap_logtype >= 0) - rte_log_set_level(tap_logtype, RTE_LOG_NOTICE); -} +RTE_LOG_REGISTER(tap_logtype, pmd.net.tap, NOTICE);