X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fvhost%2Frte_eth_vhost.c;h=d198fc8a8e9d7cf72c96e475f3a7fc21dce39ce7;hb=56175f74eaa565cdfde931d83be1db9faaa81667;hp=bbf79b2c0e81b37ab3e9fd829c88b7e342b34427;hpb=604052ae5395882601f6cc53eeafd15846d73d37;p=dpdk.git diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c index bbf79b2c0e..d198fc8a8e 100644 --- a/drivers/net/vhost/rte_eth_vhost.c +++ b/drivers/net/vhost/rte_eth_vhost.c @@ -5,10 +5,11 @@ #include #include #include +#include #include -#include -#include +#include +#include #include #include #include @@ -28,7 +29,6 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; #define ETH_VHOST_IFACE_ARG "iface" #define ETH_VHOST_QUEUES_ARG "queues" #define ETH_VHOST_CLIENT_ARG "client" -#define ETH_VHOST_DEQUEUE_ZERO_COPY "dequeue-zero-copy" #define ETH_VHOST_IOMMU_SUPPORT "iommu-support" #define ETH_VHOST_POSTCOPY_SUPPORT "postcopy-support" #define ETH_VHOST_VIRTIO_NET_F_HOST_TSO "tso" @@ -40,7 +40,6 @@ static const char *valid_arguments[] = { ETH_VHOST_IFACE_ARG, ETH_VHOST_QUEUES_ARG, ETH_VHOST_CLIENT_ARG, - ETH_VHOST_DEQUEUE_ZERO_COPY, ETH_VHOST_IOMMU_SUPPORT, ETH_VHOST_POSTCOPY_SUPPORT, ETH_VHOST_VIRTIO_NET_F_HOST_TSO, @@ -72,6 +71,9 @@ enum vhost_xstats_pkts { VHOST_BROADCAST_PKT, VHOST_MULTICAST_PKT, VHOST_UNICAST_PKT, + VHOST_PKT, + VHOST_BYTE, + VHOST_MISSED_PKT, VHOST_ERRORS_PKT, VHOST_ERRORS_FRAGMENTED, VHOST_ERRORS_JABBER, @@ -94,8 +96,9 @@ struct vhost_queue { struct rte_mempool *mb_pool; uint16_t port; uint16_t virtqueue_id; - bool intr_en; struct vhost_stats stats; + int intr_enable; + rte_spinlock_t intr_lock; }; struct pmd_internal { @@ -147,11 +150,11 @@ struct vhost_xstats_name_off { /* [rx]_is prepended to the name string here */ static const struct vhost_xstats_name_off vhost_rxport_stat_strings[] = { {"good_packets", - offsetof(struct vhost_queue, stats.pkts)}, + offsetof(struct vhost_queue, stats.xstats[VHOST_PKT])}, {"total_bytes", - offsetof(struct vhost_queue, stats.bytes)}, + offsetof(struct vhost_queue, stats.xstats[VHOST_BYTE])}, {"missed_pkts", - offsetof(struct vhost_queue, stats.missed_pkts)}, + offsetof(struct vhost_queue, stats.xstats[VHOST_MISSED_PKT])}, {"broadcast_packets", offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, {"multicast_packets", @@ -187,11 +190,11 @@ static const struct vhost_xstats_name_off vhost_rxport_stat_strings[] = { /* [tx]_ is prepended to the name string here */ static const struct vhost_xstats_name_off vhost_txport_stat_strings[] = { {"good_packets", - offsetof(struct vhost_queue, stats.pkts)}, + offsetof(struct vhost_queue, stats.xstats[VHOST_PKT])}, {"total_bytes", - offsetof(struct vhost_queue, stats.bytes)}, + offsetof(struct vhost_queue, stats.xstats[VHOST_BYTE])}, {"missed_pkts", - offsetof(struct vhost_queue, stats.missed_pkts)}, + offsetof(struct vhost_queue, stats.xstats[VHOST_MISSED_PKT])}, {"broadcast_packets", offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, {"multicast_packets", @@ -285,23 +288,6 @@ vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, if (n < nxstats) return nxstats; - for (i = 0; i < dev->data->nb_rx_queues; i++) { - vq = dev->data->rx_queues[i]; - if (!vq) - continue; - vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts - - (vq->stats.xstats[VHOST_BROADCAST_PKT] - + vq->stats.xstats[VHOST_MULTICAST_PKT]); - } - for (i = 0; i < dev->data->nb_tx_queues; i++) { - vq = dev->data->tx_queues[i]; - if (!vq) - continue; - vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts - + vq->stats.missed_pkts - - (vq->stats.xstats[VHOST_BROADCAST_PKT] - + vq->stats.xstats[VHOST_MULTICAST_PKT]); - } for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) { xstats[count].value = 0; for (i = 0; i < dev->data->nb_rx_queues; i++) { @@ -332,7 +318,7 @@ vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, } static inline void -vhost_count_multicast_broadcast(struct vhost_queue *vq, +vhost_count_xcast_packets(struct vhost_queue *vq, struct rte_mbuf *mbuf) { struct rte_ether_addr *ea = NULL; @@ -344,20 +330,27 @@ vhost_count_multicast_broadcast(struct vhost_queue *vq, pstats->xstats[VHOST_BROADCAST_PKT]++; else pstats->xstats[VHOST_MULTICAST_PKT]++; + } else { + pstats->xstats[VHOST_UNICAST_PKT]++; } } static void -vhost_update_packet_xstats(struct vhost_queue *vq, - struct rte_mbuf **bufs, - uint16_t count) +vhost_update_packet_xstats(struct vhost_queue *vq, struct rte_mbuf **bufs, + uint16_t count, uint64_t nb_bytes, + uint64_t nb_missed) { uint32_t pkt_len = 0; uint64_t i = 0; uint64_t index; struct vhost_stats *pstats = &vq->stats; + pstats->xstats[VHOST_BYTE] += nb_bytes; + pstats->xstats[VHOST_MISSED_PKT] += nb_missed; + pstats->xstats[VHOST_UNICAST_PKT] += nb_missed; + for (i = 0; i < count ; i++) { + pstats->xstats[VHOST_PKT]++; pkt_len = bufs[i]->pkt_len; if (pkt_len == 64) { pstats->xstats[VHOST_64_PKT]++; @@ -373,7 +366,7 @@ vhost_update_packet_xstats(struct vhost_queue *vq, else if (pkt_len > 1522) pstats->xstats[VHOST_1523_TO_MAX_PKT]++; } - vhost_count_multicast_broadcast(vq, bufs[i]); + vhost_count_xcast_packets(vq, bufs[i]); } } @@ -383,6 +376,7 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) struct vhost_queue *r = q; uint16_t i, nb_rx = 0; uint16_t nb_receive = nb_bufs; + uint64_t nb_bytes = 0; if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) return 0; @@ -417,10 +411,11 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) if (r->internal->vlan_strip) rte_vlan_strip(bufs[i]); - r->stats.bytes += bufs[i]->pkt_len; + nb_bytes += bufs[i]->pkt_len; } - vhost_update_packet_xstats(r, bufs, nb_rx); + r->stats.bytes += nb_bytes; + vhost_update_packet_xstats(r, bufs, nb_rx, nb_bytes, 0); out: rte_atomic32_set(&r->while_queuing, 0); @@ -434,6 +429,8 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) struct vhost_queue *r = q; uint16_t i, nb_tx = 0; uint16_t nb_send = 0; + uint64_t nb_bytes = 0; + uint64_t nb_missed = 0; if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) return 0; @@ -474,20 +471,23 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) break; } + for (i = 0; likely(i < nb_tx); i++) + nb_bytes += bufs[i]->pkt_len; + + nb_missed = nb_bufs - nb_tx; + r->stats.pkts += nb_tx; + r->stats.bytes += nb_bytes; r->stats.missed_pkts += nb_bufs - nb_tx; - for (i = 0; likely(i < nb_tx); i++) - r->stats.bytes += bufs[i]->pkt_len; - - vhost_update_packet_xstats(r, bufs, nb_tx); + vhost_update_packet_xstats(r, bufs, nb_tx, nb_bytes, nb_missed); - /* According to RFC2863 page42 section ifHCOutMulticastPkts and - * ifHCOutBroadcastPkts, the counters "multicast" and "broadcast" - * are increased when packets are not transmitted successfully. + /* According to RFC2863, ifHCOutUcastPkts, ifHCOutMulticastPkts and + * ifHCOutBroadcastPkts counters are increased when packets are not + * transmitted successfully. */ for (i = nb_tx; i < nb_bufs; i++) - vhost_count_multicast_broadcast(r, bufs[i]); + vhost_count_xcast_packets(r, bufs[i]); for (i = 0; likely(i < nb_tx); i++) rte_pktmbuf_free(bufs[i]); @@ -525,12 +525,58 @@ find_internal_resource(char *ifname) return list; } +static int +eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx) +{ + struct rte_intr_handle *handle = eth_dev->intr_handle; + struct rte_epoll_event rev; + int epfd, ret; + + if (!handle) + return 0; + + if (handle->efds[rxq_idx] == handle->elist[rxq_idx].fd) + return 0; + + VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating handler.\n", + rxq_idx); + + if (handle->elist[rxq_idx].fd != -1) + VHOST_LOG(ERR, "Unexpected previous kickfd value (Got %d, expected -1).\n", + handle->elist[rxq_idx].fd); + + /* + * First remove invalid epoll event, and then install + * the new one. May be solved with a proper API in the + * future. + */ + epfd = handle->elist[rxq_idx].epfd; + rev = handle->elist[rxq_idx]; + ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd, + &handle->elist[rxq_idx]); + if (ret) { + VHOST_LOG(ERR, "Delete epoll event failed.\n"); + return ret; + } + + rev.fd = handle->efds[rxq_idx]; + handle->elist[rxq_idx] = rev; + ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd, + &handle->elist[rxq_idx]); + if (ret) { + VHOST_LOG(ERR, "Add epoll event failed.\n"); + return ret; + } + + return 0; +} + static int eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid) { struct rte_vhost_vring vring; struct vhost_queue *vq; - int ret = 0; + int old_intr_enable, ret = 0; vq = dev->data->rx_queues[qid]; if (!vq) { @@ -538,6 +584,18 @@ eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid) return -1; } + rte_spinlock_lock(&vq->intr_lock); + old_intr_enable = vq->intr_enable; + vq->intr_enable = 1; + ret = eth_vhost_update_intr(dev, qid); + rte_spinlock_unlock(&vq->intr_lock); + + if (ret < 0) { + VHOST_LOG(ERR, "Failed to update rxq%d's intr\n", qid); + vq->intr_enable = old_intr_enable; + return ret; + } + ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring); if (ret < 0) { VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid); @@ -547,8 +605,6 @@ eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid) rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1); rte_wmb(); - vq->intr_en = true; - return ret; } @@ -574,7 +630,7 @@ eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid) rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0); rte_wmb(); - vq->intr_en = false; + vq->intr_enable = 0; return 0; } @@ -598,7 +654,6 @@ eth_vhost_install_intr(struct rte_eth_dev *dev) { struct rte_vhost_vring vring; struct vhost_queue *vq; - int count = 0; int nb_rxq = dev->data->nb_rx_queues; int i; int ret; @@ -628,6 +683,8 @@ eth_vhost_install_intr(struct rte_eth_dev *dev) VHOST_LOG(INFO, "Prepare intr vec\n"); for (i = 0; i < nb_rxq; i++) { + dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i; + dev->intr_handle->efds[i] = -1; vq = dev->data->rx_queues[i]; if (!vq) { VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i); @@ -646,14 +703,12 @@ eth_vhost_install_intr(struct rte_eth_dev *dev) "rxq-%d's kickfd is invalid, skip!\n", i); continue; } - dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i; dev->intr_handle->efds[i] = vring.kickfd; - count++; VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i); } - dev->intr_handle->nb_efd = count; - dev->intr_handle->max_intr = count + 1; + dev->intr_handle->nb_efd = nb_rxq; + dev->intr_handle->max_intr = nb_rxq + 1; dev->intr_handle->type = RTE_INTR_HANDLE_VDEV; return 0; @@ -775,7 +830,7 @@ new_device(int vid) VHOST_LOG(INFO, "Vhost device %d created\n", vid); - _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); + rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); return 0; } @@ -832,7 +887,7 @@ destroy_device(int vid) VHOST_LOG(INFO, "Vhost device %d destroyed\n", vid); eth_vhost_uninstall_intr(eth_dev); - _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); + rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); } static int @@ -840,8 +895,8 @@ vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id) { struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf; struct pmd_internal *internal = eth_dev->data->dev_private; - struct rte_vhost_vring vring; struct vhost_queue *vq; + struct rte_vhost_vring vring; int rx_idx = vring_id % 2 ? (vring_id - 1) >> 1 : -1; int ret = 0; @@ -853,22 +908,24 @@ vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id) rte_atomic32_read(&internal->dev_attached) && rte_atomic32_read(&internal->started) && dev_conf->intr_conf.rxq) { - vq = eth_dev->data->rx_queues[rx_idx]; ret = rte_vhost_get_vhost_vring(vid, vring_id, &vring); - if (!ret) { - if (vring.kickfd != - eth_dev->intr_handle->efds[rx_idx]) { - VHOST_LOG(INFO, - "kickfd for rxq-%d was changed.\n", - rx_idx); - eth_dev->intr_handle->efds[rx_idx] = - vring.kickfd; - } + if (ret) { + VHOST_LOG(ERR, "Failed to get vring %d information.\n", + vring_id); + return ret; + } + eth_dev->intr_handle->efds[rx_idx] = vring.kickfd; - rte_vhost_enable_guest_notification(vid, vring_id, - vq->intr_en); - rte_wmb(); + vq = eth_dev->data->rx_queues[rx_idx]; + if (!vq) { + VHOST_LOG(ERR, "rxq%d is not setup yet\n", rx_idx); + return -1; } + + rte_spinlock_lock(&vq->intr_lock); + if (vq->intr_enable) + ret = eth_vhost_update_intr(eth_dev, rx_idx); + rte_spinlock_unlock(&vq->intr_lock); } return ret; @@ -909,7 +966,7 @@ vring_state_changed(int vid, uint16_t vring, int enable) VHOST_LOG(INFO, "vring%u is %s\n", vring, enable ? "enabled" : "disabled"); - _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL); + rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL); return 0; } @@ -1096,27 +1153,33 @@ eth_dev_start(struct rte_eth_dev *eth_dev) return 0; } -static void +static int eth_dev_stop(struct rte_eth_dev *dev) { struct pmd_internal *internal = dev->data->dev_private; + dev->data->dev_started = 0; rte_atomic32_set(&internal->started, 0); update_queuing_status(dev); + + return 0; } -static void +static int eth_dev_close(struct rte_eth_dev *dev) { struct pmd_internal *internal; struct internal_list *list; - unsigned int i; + unsigned int i, ret; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; internal = dev->data->dev_private; if (!internal) - return; + return 0; - eth_dev_stop(dev); + ret = eth_dev_stop(dev); list = find_internal_resource(internal->iface_name); if (list) { @@ -1142,6 +1205,8 @@ eth_dev_close(struct rte_eth_dev *dev) rte_free(vring_states[dev->data->port_id]); vring_states[dev->data->port_id] = NULL; + + return ret; } static int @@ -1162,6 +1227,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, vq->mb_pool = mb_pool; vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ; + rte_spinlock_init(&vq->intr_lock); dev->data->rx_queues[rx_queue_id] = vq; return 0; @@ -1183,6 +1249,7 @@ eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, } vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ; + rte_spinlock_init(&vq->intr_lock); dev->data->tx_queues[tx_queue_id] = vq; return 0; @@ -1324,7 +1391,6 @@ static const struct eth_dev_ops ops = { .rx_queue_release = eth_queue_release, .tx_queue_release = eth_queue_release, .tx_done_cleanup = eth_tx_done_cleanup, - .rx_queue_count = eth_rx_queue_count, .link_update = eth_link_update, .stats_get = eth_stats_get, .stats_reset = eth_stats_reset, @@ -1381,11 +1447,13 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, internal->flags = flags; internal->disable_flags = disable_flags; data->dev_link = pmd_link; - data->dev_flags = RTE_ETH_DEV_INTR_LSC | RTE_ETH_DEV_CLOSE_REMOVE; + data->dev_flags = RTE_ETH_DEV_INTR_LSC | + RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; data->promiscuous = 1; data->all_multicast = 1; eth_dev->dev_ops = &ops; + eth_dev->rx_queue_count = eth_rx_queue_count; /* finally assign rx and tx ops */ eth_dev->rx_pkt_burst = eth_vhost_rx; @@ -1440,7 +1508,6 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev) uint64_t flags = 0; uint64_t disable_flags = 0; int client_mode = 0; - int dequeue_zero_copy = 0; int iommu_support = 0; int postcopy_support = 0; int tso = 0; @@ -1500,16 +1567,6 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev) flags |= RTE_VHOST_USER_CLIENT; } - if (rte_kvargs_count(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY) == 1) { - ret = rte_kvargs_process(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY, - &open_int, &dequeue_zero_copy); - if (ret < 0) - goto out_free; - - if (dequeue_zero_copy) - flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY; - } - if (rte_kvargs_count(kvlist, ETH_VHOST_IOMMU_SUPPORT) == 1) { ret = rte_kvargs_process(kvlist, ETH_VHOST_IOMMU_SUPPORT, &open_int, &iommu_support); @@ -1592,11 +1649,7 @@ rte_pmd_vhost_remove(struct rte_vdev_device *dev) if (eth_dev == NULL) return 0; - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return rte_eth_dev_release_port(eth_dev); - eth_dev_close(eth_dev); - rte_eth_dev_release_port(eth_dev); return 0; @@ -1613,7 +1666,6 @@ RTE_PMD_REGISTER_PARAM_STRING(net_vhost, "iface= " "queues= " "client=<0|1> " - "dequeue-zero-copy=<0|1> " "iommu-support=<0|1> " "postcopy-support=<0|1> " "tso=<0|1> "