X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Ftap%2Frte_eth_tap.c;h=81c688471d3f5d0777dbb2b404490db8fd6e6a61;hb=f30e69b41f94;hp=e7817e89614cf40ad6ac029348a2faee9542f37c;hpb=662dbc322d0467e0dcdcbbb2b2283a43a5b849e7;p=dpdk.git diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c index e7817e8961..81c688471d 100644 --- a/drivers/net/tap/rte_eth_tap.c +++ b/drivers/net/tap/rte_eth_tap.c @@ -18,8 +18,8 @@ #include #include #include +#include -#include #include #include #include @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -67,9 +68,13 @@ /* IPC key for queue fds sync */ #define TAP_MP_KEY "tap_mp_sync_queues" +#define TAP_IOV_DEFAULT_MAX 1024 + static int tap_devices_count; -static struct rte_vdev_driver pmd_tap_drv; -static struct rte_vdev_driver pmd_tun_drv; + +static const char *tuntap_types[ETH_TUNTAP_TYPE_MAX] = { + "UNKNOWN", "TUN", "TAP" +}; static const char *valid_arguments[] = { ETH_TAP_IFACE_ARG, @@ -78,11 +83,6 @@ static const char *valid_arguments[] = { NULL }; -static unsigned int tap_unit; -static unsigned int tun_unit; - -static char tuntap_name[8]; - static volatile uint32_t tap_trigger; /* Rx trigger */ static struct rte_eth_link pmd_link = { @@ -138,7 +138,7 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive) #ifdef IFF_MULTI_QUEUE unsigned int features; #endif - int fd; + int fd, signo, flags; memset(&ifr, 0, sizeof(struct ifreq)); @@ -148,24 +148,21 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive) */ ifr.ifr_flags = (pmd->type == ETH_TUNTAP_TYPE_TAP) ? IFF_TAP : IFF_TUN | IFF_POINTOPOINT; - snprintf(ifr.ifr_name, IFNAMSIZ, "%s", pmd->name); - - TAP_LOG(DEBUG, "ifr_name '%s'", ifr.ifr_name); + strlcpy(ifr.ifr_name, pmd->name, IFNAMSIZ); fd = open(TUN_TAP_DEV_PATH, O_RDWR); if (fd < 0) { - TAP_LOG(ERR, "Unable to create %s interface", tuntap_name); + TAP_LOG(ERR, "Unable to open %s interface", TUN_TAP_DEV_PATH); goto error; } #ifdef IFF_MULTI_QUEUE /* Grab the TUN features to verify we can work multi-queue */ if (ioctl(fd, TUNGETFEATURES, &features) < 0) { - TAP_LOG(ERR, "%s unable to get TUN/TAP features", - tuntap_name); + TAP_LOG(ERR, "unable to get TUN/TAP features"); goto error; } - TAP_LOG(DEBUG, "%s Features %08x", tuntap_name, features); + TAP_LOG(DEBUG, "%s Features %08x", TUN_TAP_DEV_PATH, features); if (features & IFF_MULTI_QUEUE) { TAP_LOG(DEBUG, " Multi-queue support for %d queues", @@ -185,6 +182,13 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive) goto error; } + /* + * Name passed to kernel might be wildcard like dtun%d + * and need to find the resulting device. + */ + TAP_LOG(DEBUG, "Device name is '%s'", ifr.ifr_name); + strlcpy(pmd->name, ifr.ifr_name, RTE_ETH_NAME_MAX_LEN); + if (is_keepalive) { /* * Detach the TUN/TAP keep-alive queue @@ -199,56 +203,91 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive) } } + flags = fcntl(fd, F_GETFL); + if (flags == -1) { + TAP_LOG(WARNING, + "Unable to get %s current flags\n", + ifr.ifr_name); + goto error; + } + /* Always set the file descriptor to non-blocking */ - if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) { + flags |= O_NONBLOCK; + if (fcntl(fd, F_SETFL, flags) < 0) { TAP_LOG(WARNING, "Unable to set %s to nonblocking: %s", ifr.ifr_name, strerror(errno)); goto error; } - /* Set up trigger to optimize empty Rx bursts */ - errno = 0; - do { + /* Find a free realtime signal */ + for (signo = SIGRTMIN + 1; signo < SIGRTMAX; signo++) { struct sigaction sa; - int flags = fcntl(fd, F_GETFL); - if (flags == -1 || sigaction(SIGIO, NULL, &sa) == -1) + if (sigaction(signo, NULL, &sa) == -1) { + TAP_LOG(WARNING, + "Unable to get current rt-signal %d handler", + signo); + goto error; + } + + /* Already have the handler we want on this signal */ + if (sa.sa_handler == tap_trigger_cb) break; - if (sa.sa_handler != tap_trigger_cb) { - /* - * Make sure SIGIO is not already taken. This is done - * as late as possible to leave the application a - * chance to set up its own signal handler first. - */ - if (sa.sa_handler != SIG_IGN && - sa.sa_handler != SIG_DFL) { - errno = EBUSY; - break; - } - sa = (struct sigaction){ - .sa_flags = SA_RESTART, - .sa_handler = tap_trigger_cb, - }; - if (sigaction(SIGIO, &sa, NULL) == -1) - break; + + /* Is handler in use by application */ + if (sa.sa_handler != SIG_DFL) { + TAP_LOG(DEBUG, + "Skipping used rt-signal %d", signo); + continue; + } + + sa = (struct sigaction) { + .sa_flags = SA_RESTART, + .sa_handler = tap_trigger_cb, + }; + + if (sigaction(signo, &sa, NULL) == -1) { + TAP_LOG(WARNING, + "Unable to set rt-signal %d handler\n", signo); + goto error; } - /* Enable SIGIO on file descriptor */ - fcntl(fd, F_SETFL, flags | O_ASYNC); - fcntl(fd, F_SETOWN, getpid()); - } while (0); - if (errno) { + /* Found a good signal to use */ + TAP_LOG(DEBUG, + "Using rt-signal %d", signo); + break; + } + + if (signo == SIGRTMAX) { + TAP_LOG(WARNING, "All rt-signals are in use\n"); + /* Disable trigger globally in case of error */ tap_trigger = 0; - TAP_LOG(WARNING, "Rx trigger disabled: %s", - strerror(errno)); - } + TAP_LOG(NOTICE, "No Rx trigger signal available\n"); + } else { + /* Enable signal on file descriptor */ + if (fcntl(fd, F_SETSIG, signo) < 0) { + TAP_LOG(WARNING, "Unable to set signo %d for fd %d: %s", + signo, fd, strerror(errno)); + goto error; + } + if (fcntl(fd, F_SETFL, flags | O_ASYNC) < 0) { + TAP_LOG(WARNING, "Unable to set fcntl flags: %s", + strerror(errno)); + goto error; + } + if (fcntl(fd, F_SETOWN, getpid()) < 0) { + TAP_LOG(WARNING, "Unable to set fcntl owner: %s", + strerror(errno)); + goto error; + } + } return fd; error: - if (fd > 0) + if (fd >= 0) close(fd); return -1; } @@ -259,7 +298,7 @@ tap_verify_csum(struct rte_mbuf *mbuf) uint32_t l2 = mbuf->packet_type & RTE_PTYPE_L2_MASK; uint32_t l3 = mbuf->packet_type & RTE_PTYPE_L3_MASK; uint32_t l4 = mbuf->packet_type & RTE_PTYPE_L4_MASK; - unsigned int l2_len = sizeof(struct ether_hdr); + unsigned int l2_len = sizeof(struct rte_ether_hdr); unsigned int l3_len; uint16_t cksum = 0; void *l3_hdr; @@ -270,24 +309,37 @@ tap_verify_csum(struct rte_mbuf *mbuf) else if (l2 == RTE_PTYPE_L2_ETHER_QINQ) l2_len += 8; /* Don't verify checksum for packets with discontinuous L2 header */ - if (unlikely(l2_len + sizeof(struct ipv4_hdr) > + if (unlikely(l2_len + sizeof(struct rte_ipv4_hdr) > rte_pktmbuf_data_len(mbuf))) return; l3_hdr = rte_pktmbuf_mtod_offset(mbuf, void *, l2_len); if (l3 == RTE_PTYPE_L3_IPV4 || l3 == RTE_PTYPE_L3_IPV4_EXT) { - struct ipv4_hdr *iph = l3_hdr; + struct rte_ipv4_hdr *iph = l3_hdr; - /* ihl contains the number of 4-byte words in the header */ - l3_len = 4 * (iph->version_ihl & 0xf); + l3_len = rte_ipv4_hdr_len(iph); if (unlikely(l2_len + l3_len > rte_pktmbuf_data_len(mbuf))) return; + /* check that the total length reported by header is not + * greater than the total received size + */ + if (l2_len + rte_be_to_cpu_16(iph->total_length) > + rte_pktmbuf_data_len(mbuf)) + return; cksum = ~rte_raw_cksum(iph, l3_len); mbuf->ol_flags |= cksum ? PKT_RX_IP_CKSUM_BAD : PKT_RX_IP_CKSUM_GOOD; } else if (l3 == RTE_PTYPE_L3_IPV6) { - l3_len = sizeof(struct ipv6_hdr); + struct rte_ipv6_hdr *iph = l3_hdr; + + l3_len = sizeof(struct rte_ipv6_hdr); + /* check that the total length reported by header is not + * greater than the total received size + */ + if (l2_len + l3_len + rte_be_to_cpu_16(iph->payload_len) > + rte_pktmbuf_data_len(mbuf)) + return; } else { /* IPv6 extensions are not supported */ return; @@ -325,6 +377,23 @@ tap_rx_offload_get_queue_capa(void) DEV_RX_OFFLOAD_TCP_CKSUM; } +static void +tap_rxq_pool_free(struct rte_mbuf *pool) +{ + struct rte_mbuf *mbuf = pool; + uint16_t nb_segs = 1; + + if (mbuf == NULL) + return; + + while (mbuf->next) { + mbuf = mbuf->next; + nb_segs++; + } + pool->nb_segs = nb_segs; + rte_pktmbuf_free(pool); +} + /* Callback to handle the rx burst of packets to the correct interface and * file descriptor(s) in a multi-queue setup. */ @@ -339,10 +408,8 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) if (trigger == rxq->trigger_seen) return 0; - if (trigger) - rxq->trigger_seen = trigger; + process_private = rte_eth_devices[rxq->in_port].process_private; - rte_compiler_barrier(); for (num_rx = 0; num_rx < nb_pkts; ) { struct rte_mbuf *mbuf = rxq->pool; struct rte_mbuf *seg = NULL; @@ -377,7 +444,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) goto end; seg->next = NULL; - rte_pktmbuf_free(mbuf); + tap_rxq_pool_free(mbuf); goto end; } @@ -419,6 +486,9 @@ end: rxq->stats.ipackets += num_rx; rxq->stats.ibytes += num_rx_bytes; + if (trigger && num_rx < nb_pkts) + rxq->trigger_seen = trigger; + return num_rx; } @@ -480,7 +550,7 @@ tap_tx_l3_cksum(char *packet, uint64_t ol_flags, unsigned int l2_len, void *l3_hdr = packet + l2_len; if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_IPV4)) { - struct ipv4_hdr *iph = l3_hdr; + struct rte_ipv4_hdr *iph = l3_hdr; uint16_t cksum; iph->hdr_checksum = 0; @@ -492,9 +562,9 @@ tap_tx_l3_cksum(char *packet, uint64_t ol_flags, unsigned int l2_len, l4_hdr = packet + l2_len + l3_len; if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) - *l4_cksum = &((struct udp_hdr *)l4_hdr)->dgram_cksum; + *l4_cksum = &((struct rte_udp_hdr *)l4_hdr)->dgram_cksum; else if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM) - *l4_cksum = &((struct tcp_hdr *)l4_hdr)->cksum; + *l4_cksum = &((struct rte_tcp_hdr *)l4_hdr)->cksum; else return; **l4_cksum = 0; @@ -506,7 +576,7 @@ tap_tx_l3_cksum(char *packet, uint64_t ol_flags, unsigned int l2_len, } } -static inline void +static inline int tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, struct rte_mbuf **pmbufs, uint16_t *num_packets, unsigned long *num_tx_bytes) @@ -549,9 +619,9 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, char *buff_data = rte_pktmbuf_mtod(seg, void *); proto = (*buff_data & 0xf0); pi.proto = (proto == 0x40) ? - rte_cpu_to_be_16(ETHER_TYPE_IPv4) : + rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) : ((proto == 0x60) ? - rte_cpu_to_be_16(ETHER_TYPE_IPv6) : + rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) : 0x00); } @@ -573,7 +643,7 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, seg_len = rte_pktmbuf_data_len(mbuf); l234_hlen = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; if (seg_len < l234_hlen) - break; + return -1; /* To change checksums, work on a * copy of l2, l3 * headers + l4 pseudo header @@ -619,10 +689,12 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, /* copy the tx frame data */ n = writev(process_private->txq_fds[txq->queue_id], iovecs, j); if (n <= 0) - break; + return -1; + (*num_packets)++; (*num_tx_bytes) += rte_pktmbuf_pkt_len(mbuf); } + return 0; } /* Callback to handle sending packets from the tap interface @@ -641,27 +713,25 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) return 0; struct rte_mbuf *gso_mbufs[MAX_GSO_MBUFS]; - max_size = *txq->mtu + (ETHER_HDR_LEN + ETHER_CRC_LEN + 4); + max_size = *txq->mtu + (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + 4); for (i = 0; i < nb_pkts; i++) { struct rte_mbuf *mbuf_in = bufs[num_tx]; struct rte_mbuf **mbuf; uint16_t num_mbufs = 0; uint16_t tso_segsz = 0; int ret; + int num_tso_mbufs; uint16_t hdrs_len; - int j; uint64_t tso; tso = mbuf_in->ol_flags & PKT_TX_TCP_SEG; if (tso) { struct rte_gso_ctx *gso_ctx = &txq->gso_ctx; - assert(gso_ctx != NULL); - /* TCP segmentation implies TCP checksum offload */ mbuf_in->ol_flags |= PKT_TX_TCP_CKSUM; - /* gso size is calculated without ETHER_CRC_LEN */ + /* gso size is calculated without RTE_ETHER_CRC_LEN */ hdrs_len = mbuf_in->l2_len + mbuf_in->l3_len + mbuf_in->l4_len; tso_segsz = mbuf_in->tso_segsz + hdrs_len; @@ -671,43 +741,51 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) break; } gso_ctx->gso_size = tso_segsz; - ret = rte_gso_segment(mbuf_in, /* packet to segment */ + /* 'mbuf_in' packet to segment */ + num_tso_mbufs = rte_gso_segment(mbuf_in, gso_ctx, /* gso control block */ (struct rte_mbuf **)&gso_mbufs, /* out mbufs */ RTE_DIM(gso_mbufs)); /* max tso mbufs */ /* ret contains the number of new created mbufs */ - if (ret < 0) + if (num_tso_mbufs < 0) break; mbuf = gso_mbufs; - num_mbufs = ret; + num_mbufs = num_tso_mbufs; } else { /* stats.errs will be incremented */ if (rte_pktmbuf_pkt_len(mbuf_in) > max_size) break; /* ret 0 indicates no new mbufs were created */ - ret = 0; + num_tso_mbufs = 0; mbuf = &mbuf_in; num_mbufs = 1; } - tap_write_mbufs(txq, num_mbufs, mbuf, + ret = tap_write_mbufs(txq, num_mbufs, mbuf, &num_packets, &num_tx_bytes); + if (ret == -1) { + txq->stats.errs++; + /* free tso mbufs */ + if (num_tso_mbufs > 0) + rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs); + break; + } num_tx++; /* free original mbuf */ rte_pktmbuf_free(mbuf_in); /* free tso mbufs */ - for (j = 0; j < ret; j++) - rte_pktmbuf_free(mbuf[j]); + if (num_tso_mbufs > 0) + rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs); } txq->stats.opackets += num_packets; txq->stats.errs += nb_pkts - num_tx; txq->stats.obytes += num_tx_bytes; - return num_packets; + return num_tx; } static const char * @@ -744,9 +822,9 @@ tap_ioctl(struct pmd_internals *pmd, unsigned long request, */ apply: if (remote) - snprintf(ifr->ifr_name, IFNAMSIZ, "%s", pmd->remote_iface); + strlcpy(ifr->ifr_name, pmd->remote_iface, IFNAMSIZ); else if (mode == LOCAL_ONLY || mode == LOCAL_AND_REMOTE) - snprintf(ifr->ifr_name, IFNAMSIZ, "%s", pmd->name); + strlcpy(ifr->ifr_name, pmd->name, IFNAMSIZ); switch (request) { case SIOCSIFFLAGS: /* fetch current flags to leave other flags untouched */ @@ -763,7 +841,7 @@ apply: case SIOCSIFMTU: break; default: - RTE_LOG(WARNING, PMD, "%s: ioctl() called with wrong arg\n", + TAP_LOG(WARNING, "%s: ioctl() called with wrong arg", pmd->name); return -EINVAL; } @@ -822,7 +900,7 @@ tap_dev_start(struct rte_eth_dev *dev) /* This function gets called when the current port gets stopped. */ -static void +static int tap_dev_stop(struct rte_eth_dev *dev) { int i; @@ -834,11 +912,15 @@ tap_dev_stop(struct rte_eth_dev *dev) tap_intr_handle_set(dev, 0); tap_link_set_down(dev); + + return 0; } static int tap_dev_configure(struct rte_eth_dev *dev) { + struct pmd_internals *pmd = dev->data->dev_private; + if (dev->data->nb_rx_queues > RTE_PMD_TAP_MAX_QUEUES) { TAP_LOG(ERR, "%s: number of rx queues %d exceeds max num of queues %d", @@ -856,11 +938,11 @@ tap_dev_configure(struct rte_eth_dev *dev) return -1; } - TAP_LOG(INFO, "%s: %p: TX configured queues number: %u", - dev->device->name, (void *)dev, dev->data->nb_tx_queues); + TAP_LOG(INFO, "%s: %s: TX configured queues number: %u", + dev->device->name, pmd->name, dev->data->nb_tx_queues); - TAP_LOG(INFO, "%s: %p: RX configured queues number: %u", - dev->device->name, (void *)dev, dev->data->nb_rx_queues); + TAP_LOG(INFO, "%s: %s: RX configured queues number: %u", + dev->device->name, pmd->name, dev->data->nb_rx_queues); return 0; } @@ -899,14 +981,14 @@ tap_dev_speed_capa(void) return capa; } -static void +static int tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct pmd_internals *internals = dev->data->dev_private; dev_info->if_index = internals->if_index; dev_info->max_mac_addrs = 1; - dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN; + dev_info->max_rx_pktlen = (uint32_t)RTE_ETHER_MAX_VLAN_FRAME_LEN; dev_info->max_rx_queues = RTE_PMD_TAP_MAX_QUEUES; dev_info->max_tx_queues = RTE_PMD_TAP_MAX_QUEUES; dev_info->min_rx_bufsize = 0; @@ -923,6 +1005,8 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) * functions together and not in partial combinations */ dev_info->flow_type_rss_offloads = ~TAP_RSS_HF_MASK; + + return 0; } static int @@ -952,10 +1036,9 @@ tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats) for (i = 0; i < imax; i++) { tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets; - tap_stats->q_errors[i] = pmd->txq[i].stats.errs; tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes; tx_total += tap_stats->q_opackets[i]; - tx_err_total += tap_stats->q_errors[i]; + tx_err_total += pmd->txq[i].stats.errs; tx_bytes_total += tap_stats->q_obytes[i]; } @@ -969,7 +1052,7 @@ tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats) return 0; } -static void +static int tap_stats_reset(struct rte_eth_dev *dev) { int i; @@ -985,23 +1068,40 @@ tap_stats_reset(struct rte_eth_dev *dev) pmd->txq[i].stats.errs = 0; pmd->txq[i].stats.obytes = 0; } + + return 0; } -static void +static int tap_dev_close(struct rte_eth_dev *dev) { int i; struct pmd_internals *internals = dev->data->dev_private; struct pmd_process_private *process_private = dev->process_private; + struct rx_queue *rxq; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + rte_free(dev->process_private); + return 0; + } tap_link_set_down(dev); - tap_flow_flush(dev, NULL); - tap_flow_implicit_flush(internals, NULL); + if (internals->nlsk_fd != -1) { + tap_flow_flush(dev, NULL); + tap_flow_implicit_flush(internals, NULL); + tap_nl_final(internals->nlsk_fd); + internals->nlsk_fd = -1; + } for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { if (process_private->rxq_fds[i] != -1) { + rxq = &internals->rxq[i]; close(process_private->rxq_fds[i]); process_private->rxq_fds[i] = -1; + tap_rxq_pool_free(rxq->pool); + rte_free(rxq->iovecs); + rxq->pool = NULL; + rxq->iovecs = NULL; } if (process_private->txq_fds[i] != -1) { close(process_private->txq_fds[i]); @@ -1015,14 +1115,35 @@ tap_dev_close(struct rte_eth_dev *dev) &internals->remote_initial_flags); } + rte_mempool_free(internals->gso_ctx_mp); + internals->gso_ctx_mp = NULL; + if (internals->ka_fd != -1) { close(internals->ka_fd); internals->ka_fd = -1; } + + /* mac_addrs must not be freed alone because part of dev_private */ + dev->data->mac_addrs = NULL; + + internals = dev->data->dev_private; + TAP_LOG(DEBUG, "Closing %s Ethernet device on numa %u", + tuntap_types[internals->type], rte_socket_id()); + + if (internals->ioctl_sock != -1) { + close(internals->ioctl_sock); + internals->ioctl_sock = -1; + } + rte_free(dev->process_private); + if (tap_devices_count == 1) + rte_mp_action_unregister(TAP_MP_KEY); + tap_devices_count--; /* * Since TUN device has no more opened file descriptors * it will be removed from kernel */ + + return 0; } static void @@ -1034,10 +1155,10 @@ tap_rx_queue_release(void *queue) if (!rxq) return; process_private = rte_eth_devices[rxq->in_port].process_private; - if (process_private->rxq_fds[rxq->queue_id] > 0) { + if (process_private->rxq_fds[rxq->queue_id] != -1) { close(process_private->rxq_fds[rxq->queue_id]); process_private->rxq_fds[rxq->queue_id] = -1; - rte_pktmbuf_free(rxq->pool); + tap_rxq_pool_free(rxq->pool); rte_free(rxq->iovecs); rxq->pool = NULL; rxq->iovecs = NULL; @@ -1054,7 +1175,7 @@ tap_tx_queue_release(void *queue) return; process_private = rte_eth_devices[txq->out_port].process_private; - if (process_private->txq_fds[txq->queue_id] > 0) { + if (process_private->txq_fds[txq->queue_id] != -1) { close(process_private->txq_fds[txq->queue_id]); process_private->txq_fds[txq->queue_id] = -1; } @@ -1083,56 +1204,120 @@ tap_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused) return 0; } -static void +static int tap_promisc_enable(struct rte_eth_dev *dev) { struct pmd_internals *pmd = dev->data->dev_private; struct ifreq ifr = { .ifr_flags = IFF_PROMISC }; + int ret; - dev->data->promiscuous = 1; - tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); - if (pmd->remote_if_index && !pmd->flow_isolate) - tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC); + ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); + if (ret != 0) + return ret; + + if (pmd->remote_if_index && !pmd->flow_isolate) { + dev->data->promiscuous = 1; + ret = tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC); + if (ret != 0) { + /* Rollback promisc flag */ + tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); + /* + * rte_eth_dev_promiscuous_enable() rollback + * dev->data->promiscuous in the case of failure. + */ + return ret; + } + } + + return 0; } -static void +static int tap_promisc_disable(struct rte_eth_dev *dev) { struct pmd_internals *pmd = dev->data->dev_private; struct ifreq ifr = { .ifr_flags = IFF_PROMISC }; + int ret; + + ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); + if (ret != 0) + return ret; + + if (pmd->remote_if_index && !pmd->flow_isolate) { + dev->data->promiscuous = 0; + ret = tap_flow_implicit_destroy(pmd, TAP_REMOTE_PROMISC); + if (ret != 0) { + /* Rollback promisc flag */ + tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); + /* + * rte_eth_dev_promiscuous_disable() rollback + * dev->data->promiscuous in the case of failure. + */ + return ret; + } + } - dev->data->promiscuous = 0; - tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); - if (pmd->remote_if_index && !pmd->flow_isolate) - tap_flow_implicit_destroy(pmd, TAP_REMOTE_PROMISC); + return 0; } -static void +static int tap_allmulti_enable(struct rte_eth_dev *dev) { struct pmd_internals *pmd = dev->data->dev_private; struct ifreq ifr = { .ifr_flags = IFF_ALLMULTI }; + int ret; + + ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); + if (ret != 0) + return ret; + + if (pmd->remote_if_index && !pmd->flow_isolate) { + dev->data->all_multicast = 1; + ret = tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI); + if (ret != 0) { + /* Rollback allmulti flag */ + tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); + /* + * rte_eth_dev_allmulticast_enable() rollback + * dev->data->all_multicast in the case of failure. + */ + return ret; + } + } - dev->data->all_multicast = 1; - tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); - if (pmd->remote_if_index && !pmd->flow_isolate) - tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI); + return 0; } -static void +static int tap_allmulti_disable(struct rte_eth_dev *dev) { struct pmd_internals *pmd = dev->data->dev_private; struct ifreq ifr = { .ifr_flags = IFF_ALLMULTI }; + int ret; + + ret = tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); + if (ret != 0) + return ret; + + if (pmd->remote_if_index && !pmd->flow_isolate) { + dev->data->all_multicast = 0; + ret = tap_flow_implicit_destroy(pmd, TAP_REMOTE_ALLMULTI); + if (ret != 0) { + /* Rollback allmulti flag */ + tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); + /* + * rte_eth_dev_allmulticast_disable() rollback + * dev->data->all_multicast in the case of failure. + */ + return ret; + } + } - dev->data->all_multicast = 0; - tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 0, LOCAL_AND_REMOTE); - if (pmd->remote_if_index && !pmd->flow_isolate) - tap_flow_implicit_destroy(pmd, TAP_REMOTE_ALLMULTI); + return 0; } static int -tap_mac_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr) +tap_mac_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr) { struct pmd_internals *pmd = dev->data->dev_private; enum ioctl_mode mode = LOCAL_ONLY; @@ -1145,7 +1330,7 @@ tap_mac_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr) return -ENOTSUP; } - if (is_zero_ether_addr(mac_addr)) { + if (rte_is_zero_ether_addr(mac_addr)) { TAP_LOG(ERR, "%s: can't set an empty MAC address", dev->device->name); return -EINVAL; @@ -1154,22 +1339,24 @@ tap_mac_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr) ret = tap_ioctl(pmd, SIOCGIFHWADDR, &ifr, 0, LOCAL_ONLY); if (ret < 0) return ret; - if (is_same_ether_addr((struct ether_addr *)&ifr.ifr_hwaddr.sa_data, - mac_addr)) + if (rte_is_same_ether_addr( + (struct rte_ether_addr *)&ifr.ifr_hwaddr.sa_data, + mac_addr)) return 0; /* Check the current MAC address on the remote */ ret = tap_ioctl(pmd, SIOCGIFHWADDR, &ifr, 0, REMOTE_ONLY); if (ret < 0) return ret; - if (!is_same_ether_addr((struct ether_addr *)&ifr.ifr_hwaddr.sa_data, - mac_addr)) + if (!rte_is_same_ether_addr( + (struct rte_ether_addr *)&ifr.ifr_hwaddr.sa_data, + mac_addr)) mode = LOCAL_AND_REMOTE; ifr.ifr_hwaddr.sa_family = AF_LOCAL; - rte_memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, ETHER_ADDR_LEN); + rte_memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, RTE_ETHER_ADDR_LEN); ret = tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 1, mode); if (ret < 0) return ret; - rte_memcpy(&pmd->eth_addr, mac_addr, ETHER_ADDR_LEN); + rte_memcpy(&pmd->eth_addr, mac_addr, RTE_ETHER_ADDR_LEN); if (pmd->remote_if_index && !pmd->flow_isolate) { /* Replace MAC redirection rule after a MAC change */ ret = tap_flow_implicit_destroy(pmd, TAP_REMOTE_LOCAL_MAC); @@ -1196,33 +1383,40 @@ tap_gso_ctx_setup(struct rte_gso_ctx *gso_ctx, struct rte_eth_dev *dev) { uint32_t gso_types; char pool_name[64]; - - /* - * Create private mbuf pool with TAP_GSO_MBUF_SEG_SIZE bytes - * size per mbuf use this pool for both direct and indirect mbufs - */ - - struct rte_mempool *mp; /* Mempool for GSO packets */ + struct pmd_internals *pmd = dev->data->dev_private; + int ret; /* initialize GSO context */ gso_types = DEV_TX_OFFLOAD_TCP_TSO; - snprintf(pool_name, sizeof(pool_name), "mp_%s", dev->device->name); - mp = rte_mempool_lookup((const char *)pool_name); - if (!mp) { - mp = rte_pktmbuf_pool_create(pool_name, TAP_GSO_MBUFS_NUM, - TAP_GSO_MBUF_CACHE_SIZE, 0, + if (!pmd->gso_ctx_mp) { + /* + * Create private mbuf pool with TAP_GSO_MBUF_SEG_SIZE + * bytes size per mbuf use this pool for both direct and + * indirect mbufs + */ + ret = snprintf(pool_name, sizeof(pool_name), "mp_%s", + dev->device->name); + if (ret < 0 || ret >= (int)sizeof(pool_name)) { + TAP_LOG(ERR, + "%s: failed to create mbuf pool name for device %s," + "device name too long or output error, ret: %d\n", + pmd->name, dev->device->name, ret); + return -ENAMETOOLONG; + } + pmd->gso_ctx_mp = rte_pktmbuf_pool_create(pool_name, + TAP_GSO_MBUFS_NUM, TAP_GSO_MBUF_CACHE_SIZE, 0, RTE_PKTMBUF_HEADROOM + TAP_GSO_MBUF_SEG_SIZE, SOCKET_ID_ANY); - if (!mp) { - struct pmd_internals *pmd = dev->data->dev_private; - RTE_LOG(DEBUG, PMD, "%s: failed to create mbuf pool for device %s\n", + if (!pmd->gso_ctx_mp) { + TAP_LOG(ERR, + "%s: failed to create mbuf pool for device %s\n", pmd->name, dev->device->name); return -1; } } - gso_ctx->direct_pool = mp; - gso_ctx->indirect_pool = mp; + gso_ctx->direct_pool = pmd->gso_ctx_mp; + gso_ctx->indirect_pool = pmd->gso_ctx_mp; gso_ctx->gso_types = gso_types; gso_ctx->gso_size = 0; /* gso_size is set in tx_burst() per packet */ gso_ctx->flag = 0; @@ -1310,6 +1504,13 @@ tap_rx_queue_setup(struct rte_eth_dev *dev, struct rx_queue *rxq = &internals->rxq[rx_queue_id]; struct rte_mbuf **tmp = &rxq->pool; long iov_max = sysconf(_SC_IOV_MAX); + + if (iov_max <= 0) { + TAP_LOG(WARNING, + "_SC_IOV_MAX is not defined. Using %d as default", + TAP_IOV_DEFAULT_MAX); + iov_max = TAP_IOV_DEFAULT_MAX; + } uint16_t nb_desc = RTE_MIN(nb_rx_desc, iov_max - 1); struct iovec (*iovecs)[nb_desc + 1]; int data_off = RTE_PKTMBUF_HEADROOM; @@ -1372,7 +1573,7 @@ tap_rx_queue_setup(struct rte_eth_dev *dev, return 0; error: - rte_pktmbuf_free(rxq->pool); + tap_rxq_pool_free(rxq->pool); rxq->pool = NULL; rte_free(rxq->iovecs); rxq->iovecs = NULL; @@ -1433,7 +1634,7 @@ tap_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) static int tap_set_mc_addr_list(struct rte_eth_dev *dev __rte_unused, - struct ether_addr *mc_addr_set __rte_unused, + struct rte_ether_addr *mc_addr_set __rte_unused, uint32_t nb_mc_addr __rte_unused) { /* @@ -1470,13 +1671,12 @@ static int tap_lsc_intr_handle_set(struct rte_eth_dev *dev, int set) { struct pmd_internals *pmd = dev->data->dev_private; + int ret; /* In any case, disable interrupt if the conf is no longer there. */ if (!dev->data->dev_conf.intr_conf.lsc) { if (pmd->intr_handle.fd != -1) { - tap_nl_final(pmd->intr_handle.fd); - rte_intr_callback_unregister(&pmd->intr_handle, - tap_dev_intr_handler, dev); + goto clean; } return 0; } @@ -1487,9 +1687,26 @@ tap_lsc_intr_handle_set(struct rte_eth_dev *dev, int set) return rte_intr_callback_register( &pmd->intr_handle, tap_dev_intr_handler, dev); } + +clean: + do { + ret = rte_intr_callback_unregister(&pmd->intr_handle, + tap_dev_intr_handler, dev); + if (ret >= 0) { + break; + } else if (ret == -EAGAIN) { + rte_delay_ms(100); + } else { + TAP_LOG(ERR, "intr callback unregister failed: %d", + ret); + break; + } + } while (true); + tap_nl_final(pmd->intr_handle.fd); - return rte_intr_callback_unregister(&pmd->intr_handle, - tap_dev_intr_handler, dev); + pmd->intr_handle.fd = -1; + + return 0; } static int @@ -1498,8 +1715,11 @@ tap_intr_handle_set(struct rte_eth_dev *dev, int set) int err; err = tap_lsc_intr_handle_set(dev, set); - if (err) + if (err < 0) { + if (!set) + tap_rx_intr_vec_set(dev, 0); return err; + } err = tap_rx_intr_vec_set(dev, set); if (err && set) tap_lsc_intr_handle_set(dev, 0); @@ -1652,20 +1872,20 @@ static const struct eth_dev_ops ops = { }; static int -eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name, - char *remote_iface, struct ether_addr *mac_addr, +eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name, + char *remote_iface, struct rte_ether_addr *mac_addr, enum rte_tuntap_type type) { int numa_node = rte_socket_id(); struct rte_eth_dev *dev; struct pmd_internals *pmd; struct pmd_process_private *process_private; + const char *tuntap_name = tuntap_types[type]; struct rte_eth_dev_data *data; struct ifreq ifr; int i; - TAP_LOG(DEBUG, "%s device on numa %u", - tuntap_name, rte_socket_id()); + TAP_LOG(DEBUG, "%s device on numa %u", tuntap_name, rte_socket_id()); dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd)); if (!dev) { @@ -1685,8 +1905,11 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name, pmd = dev->data->dev_private; dev->process_private = process_private; pmd->dev = dev; - snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name); + strlcpy(pmd->name, tap_name, sizeof(pmd->name)); pmd->type = type; + pmd->ka_fd = -1; + pmd->nlsk_fd = -1; + pmd->gso_ctx_mp = NULL; pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0); if (pmd->ioctl_sock == -1) { @@ -1699,7 +1922,8 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name, /* Setup some default values */ data = dev->data; data->dev_private = pmd; - data->dev_flags = RTE_ETH_DEV_INTR_LSC; + data->dev_flags = RTE_ETH_DEV_INTR_LSC | + RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; data->numa_node = numa_node; data->dev_link = pmd_link; @@ -1717,15 +1941,14 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name, dev->intr_handle = &pmd->intr_handle; /* Presetup the fds to -1 as being not valid */ - pmd->ka_fd = -1; for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { process_private->rxq_fds[i] = -1; process_private->txq_fds[i] = -1; } if (pmd->type == ETH_TUNTAP_TYPE_TAP) { - if (is_zero_ether_addr(mac_addr)) - eth_random_addr((uint8_t *)&pmd->eth_addr); + if (rte_is_zero_ether_addr(mac_addr)) + rte_eth_random_addr((uint8_t *)&pmd->eth_addr); else rte_memcpy(&pmd->eth_addr, mac_addr, sizeof(*mac_addr)); } @@ -1741,6 +1964,7 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name, TAP_LOG(ERR, "Unable to create %s interface", tuntap_name); goto error_exit; } + TAP_LOG(DEBUG, "allocated %s", pmd->name); ifr.ifr_mtu = dev->data->mtu; if (tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE) < 0) @@ -1750,7 +1974,7 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name, memset(&ifr, 0, sizeof(struct ifreq)); ifr.ifr_hwaddr.sa_family = AF_LOCAL; rte_memcpy(ifr.ifr_hwaddr.sa_data, &pmd->eth_addr, - ETHER_ADDR_LEN); + RTE_ETHER_ADDR_LEN); if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0) goto error_exit; } @@ -1793,8 +2017,7 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name, pmd->name, remote_iface); goto error_remote; } - snprintf(pmd->remote_iface, RTE_ETH_NAME_MAX_LEN, - "%s", remote_iface); + strlcpy(pmd->remote_iface, remote_iface, RTE_ETH_NAME_MAX_LEN); /* Save state of remote device */ tap_ioctl(pmd, SIOCGIFFLAGS, &pmd->remote_initial_flags, 0, REMOTE_ONLY); @@ -1806,7 +2029,7 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name, goto error_remote; } rte_memcpy(&pmd->eth_addr, ifr.ifr_hwaddr.sa_data, - ETHER_ADDR_LEN); + RTE_ETHER_ADDR_LEN); /* The desired MAC is already in ifreq after SIOCGIFHWADDR. */ if (tap_ioctl(pmd, SIOCSIFHWADDR, &ifr, 0, LOCAL_ONLY) < 0) { TAP_LOG(ERR, "%s: failed to get %s MAC address.", @@ -1848,6 +2071,7 @@ disable_rte_flow: TAP_LOG(ERR, "Remote feature requires flow support."); goto error_exit; } + rte_eth_dev_probing_finish(dev); return 0; error_remote: @@ -1856,7 +2080,11 @@ error_remote: tap_flow_implicit_flush(pmd, NULL); error_exit: - if (pmd->ioctl_sock > 0) + if (pmd->nlsk_fd != -1) + close(pmd->nlsk_fd); + if (pmd->ka_fd != -1) + close(pmd->ka_fd); + if (pmd->ioctl_sock != -1) close(pmd->ioctl_sock); /* mac_addrs must not be freed alone because part of dev_private */ dev->data->mac_addrs = NULL; @@ -1869,6 +2097,24 @@ error_exit_nodev: return -EINVAL; } +/* make sure name is a possible Linux network device name */ +static bool +is_valid_iface(const char *name) +{ + if (*name == '\0') + return false; + + if (strnlen(name, IFNAMSIZ) == IFNAMSIZ) + return false; + + while (*name) { + if (*name == '/' || *name == ':' || isspace(*name)) + return false; + name++; + } + return true; +} + static int set_interface_name(const char *key __rte_unused, const char *value, @@ -1876,12 +2122,17 @@ set_interface_name(const char *key __rte_unused, { char *name = (char *)extra_args; - if (value) - strlcpy(name, value, RTE_ETH_NAME_MAX_LEN - 1); - else - snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s%d", - DEFAULT_TAP_NAME, (tap_unit - 1)); - + if (value) { + if (!is_valid_iface(value)) { + TAP_LOG(ERR, "TAP invalid remote interface name (%s)", + value); + return -1; + } + strlcpy(name, value, RTE_ETH_NAME_MAX_LEN); + } else { + /* use tap%d which causes kernel to choose next available */ + strlcpy(name, DEFAULT_TAP_NAME "%d", RTE_ETH_NAME_MAX_LEN); + } return 0; } @@ -1892,13 +2143,19 @@ set_remote_iface(const char *key __rte_unused, { char *name = (char *)extra_args; - if (value) + if (value) { + if (!is_valid_iface(value)) { + TAP_LOG(ERR, "TAP invalid remote interface name (%s)", + value); + return -1; + } strlcpy(name, value, RTE_ETH_NAME_MAX_LEN); + } return 0; } -static int parse_user_mac(struct ether_addr *user_mac, +static int parse_user_mac(struct rte_ether_addr *user_mac, const char *value) { unsigned int index = 0; @@ -1926,7 +2183,7 @@ set_mac_type(const char *key __rte_unused, const char *value, void *extra_args) { - struct ether_addr *user_mac = extra_args; + struct rte_ether_addr *user_mac = extra_args; if (!value) return 0; @@ -1935,8 +2192,10 @@ set_mac_type(const char *key __rte_unused, static int iface_idx; /* fixed mac = 00:64:74:61:70: */ - memcpy((char *)user_mac->addr_bytes, "\0dtap", ETHER_ADDR_LEN); - user_mac->addr_bytes[ETHER_ADDR_LEN - 1] = iface_idx++ + '0'; + memcpy((char *)user_mac->addr_bytes, "\0dtap", + RTE_ETHER_ADDR_LEN); + user_mac->addr_bytes[RTE_ETHER_ADDR_LEN - 1] = + iface_idx++ + '0'; goto success; } @@ -1968,8 +2227,6 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev) char remote_iface[RTE_ETH_NAME_MAX_LEN]; struct rte_eth_dev *eth_dev; - strcpy(tuntap_name, "TUN"); - name = rte_vdev_device_name(dev); params = rte_vdev_device_args(dev); memset(remote_iface, 0, RTE_ETH_NAME_MAX_LEN); @@ -1987,8 +2244,8 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev) return 0; } - snprintf(tun_name, sizeof(tun_name), "%s%u", - DEFAULT_TUN_NAME, tun_unit++); + /* use tun%d which causes kernel to choose next available */ + strlcpy(tun_name, DEFAULT_TUN_NAME "%d", RTE_ETH_NAME_MAX_LEN); if (params && (params[0] != '\0')) { TAP_LOG(DEBUG, "parameters (%s)", params); @@ -2008,17 +2265,15 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev) } pmd_link.link_speed = ETH_SPEED_NUM_10G; - TAP_LOG(NOTICE, "Initializing pmd_tun for %s as %s", - name, tun_name); + TAP_LOG(DEBUG, "Initializing pmd_tun for %s", name); ret = eth_dev_tap_create(dev, tun_name, remote_iface, 0, - ETH_TUNTAP_TYPE_TUN); + ETH_TUNTAP_TYPE_TUN); leave: if (ret == -1) { TAP_LOG(ERR, "Failed to create pmd for %s as %s", name, tun_name); - tun_unit--; /* Restore the unit number */ } rte_kvargs_free(kvlist); @@ -2039,13 +2294,14 @@ tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev) int queue, fd_iterator; /* Prepare the request */ + memset(&request, 0, sizeof(request)); strlcpy(request.name, TAP_MP_KEY, sizeof(request.name)); strlcpy(request_param->port_name, port_name, sizeof(request_param->port_name)); request.len_param = sizeof(*request_param); /* Send request and receive reply */ ret = rte_mp_request_sync(&request, &replies, &timeout); - if (ret < 0) { + if (ret < 0 || replies.nb_received != 1) { TAP_LOG(ERR, "Failed to request queues from primary: %d", rte_errno); return -1; @@ -2055,6 +2311,11 @@ tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev) TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name); /* Attach the queues from received file descriptors */ + if (reply_param->rxq_count + reply_param->txq_count != reply->num_fds) { + TAP_LOG(ERR, "Unexpected number of fds received"); + return -1; + } + dev->data->nb_rx_queues = reply_param->rxq_count; dev->data->nb_tx_queues = reply_param->txq_count; fd_iterator = 0; @@ -2062,7 +2323,7 @@ tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev) process_private->rxq_fds[queue] = reply->fds[fd_iterator++]; for (queue = 0; queue < reply_param->txq_count; queue++) process_private->txq_fds[queue] = reply->fds[fd_iterator++]; - + free(reply); return 0; } @@ -2095,19 +2356,24 @@ tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer) /* Fill file descriptors for all queues */ reply.num_fds = 0; reply_param->rxq_count = 0; + if (dev->data->nb_rx_queues + dev->data->nb_tx_queues > + RTE_MP_MAX_FD_NUM){ + TAP_LOG(ERR, "Number of rx/tx queues exceeds max number of fds"); + return -1; + } + for (queue = 0; queue < dev->data->nb_rx_queues; queue++) { reply.fds[reply.num_fds++] = process_private->rxq_fds[queue]; reply_param->rxq_count++; } RTE_ASSERT(reply_param->rxq_count == dev->data->nb_rx_queues); - RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues); - RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM); reply_param->txq_count = 0; for (queue = 0; queue < dev->data->nb_tx_queues; queue++) { reply.fds[reply.num_fds++] = process_private->txq_fds[queue]; reply_param->txq_count++; } + RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues); /* Send reply */ strlcpy(reply.name, request->name, sizeof(reply.name)); @@ -2132,12 +2398,10 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev) int speed; char tap_name[RTE_ETH_NAME_MAX_LEN]; char remote_iface[RTE_ETH_NAME_MAX_LEN]; - struct ether_addr user_mac = { .addr_bytes = {0} }; + struct rte_ether_addr user_mac = { .addr_bytes = {0} }; struct rte_eth_dev *eth_dev; int tap_devices_count_increased = 0; - strcpy(tuntap_name, "TAP"); - name = rte_vdev_device_name(dev); params = rte_vdev_device_args(dev); @@ -2174,8 +2438,9 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev) } speed = ETH_SPEED_NUM_10G; - snprintf(tap_name, sizeof(tap_name), "%s%u", - DEFAULT_TAP_NAME, tap_unit++); + + /* use tap%d which causes kernel to choose next available */ + strlcpy(tap_name, DEFAULT_TAP_NAME "%d", RTE_ETH_NAME_MAX_LEN); memset(remote_iface, 0, RTE_ETH_NAME_MAX_LEN); if (params && (params[0] != '\0')) { @@ -2213,15 +2478,14 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev) } pmd_link.link_speed = speed; - TAP_LOG(NOTICE, "Initializing pmd_tap for %s as %s", - name, tap_name); + TAP_LOG(DEBUG, "Initializing pmd_tap for %s", name); /* Register IPC feed callback */ if (!tap_devices_count) { ret = rte_mp_action_register(TAP_MP_KEY, tap_mp_sync_queues); - if (ret < 0) { - TAP_LOG(ERR, "%s: Failed to register IPC callback: %s", - tuntap_name, strerror(rte_errno)); + if (ret < 0 && rte_errno != ENOTSUP) { + TAP_LOG(ERR, "tap: Failed to register IPC callback: %s", + strerror(rte_errno)); goto leave; } } @@ -2239,7 +2503,6 @@ leave: rte_mp_action_unregister(TAP_MP_KEY); tap_devices_count--; } - tap_unit--; /* Restore the unit number */ } rte_kvargs_free(kvlist); @@ -2252,55 +2515,15 @@ static int rte_pmd_tap_remove(struct rte_vdev_device *dev) { struct rte_eth_dev *eth_dev = NULL; - struct pmd_internals *internals; - struct pmd_process_private *process_private; - int i; /* find the ethdev entry */ eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev)); if (!eth_dev) - return -ENODEV; - - /* mac_addrs must not be freed alone because part of dev_private */ - eth_dev->data->mac_addrs = NULL; - - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return rte_eth_dev_release_port(eth_dev); - - internals = eth_dev->data->dev_private; - process_private = eth_dev->process_private; - - TAP_LOG(DEBUG, "Closing %s Ethernet device on numa %u", - (internals->type == ETH_TUNTAP_TYPE_TAP) ? "TAP" : "TUN", - rte_socket_id()); - - if (internals->nlsk_fd) { - tap_flow_flush(eth_dev, NULL); - tap_flow_implicit_flush(internals, NULL); - tap_nl_final(internals->nlsk_fd); - } - for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { - if (process_private->rxq_fds[i] != -1) { - close(process_private->rxq_fds[i]); - process_private->rxq_fds[i] = -1; - } - if (process_private->txq_fds[i] != -1) { - close(process_private->txq_fds[i]); - process_private->txq_fds[i] = -1; - } - } + return 0; - close(internals->ioctl_sock); - rte_free(eth_dev->process_private); - if (tap_devices_count == 1) - rte_mp_action_unregister(TAP_MP_KEY); - tap_devices_count--; + tap_dev_close(eth_dev); rte_eth_dev_release_port(eth_dev); - if (internals->ka_fd != -1) { - close(internals->ka_fd); - internals->ka_fd = -1; - } return 0; } @@ -2323,11 +2546,4 @@ RTE_PMD_REGISTER_PARAM_STRING(net_tap, ETH_TAP_IFACE_ARG "= " ETH_TAP_MAC_ARG "=" ETH_TAP_MAC_ARG_FMT " " ETH_TAP_REMOTE_ARG "="); -int tap_logtype; - -RTE_INIT(tap_init_log) -{ - tap_logtype = rte_log_register("pmd.net.tap"); - if (tap_logtype >= 0) - rte_log_set_level(tap_logtype, RTE_LOG_NOTICE); -} +RTE_LOG_REGISTER(tap_logtype, pmd.net.tap, NOTICE);