X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fvhost%2Frte_eth_vhost.c;h=6705e90dba6b19a49f9db14ba96d204549841bec;hb=6d13ea8e8e49ab957deae2bba5ecf4a4bfe747d1;hp=c1d09a02308455f3988949d62452a862aed388f8;hpb=50a3345fa9ea6482f197567497b0b8d201173a59;p=dpdk.git diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c index c1d09a0230..6705e90dba 100644 --- a/drivers/net/vhost/rte_eth_vhost.c +++ b/drivers/net/vhost/rte_eth_vhost.c @@ -1,67 +1,49 @@ -/*- - * BSD LICENSE - * - * Copyright (c) 2016 IGEL Co., Ltd. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of IGEL Co.,Ltd. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016 IGEL Co., Ltd. + * Copyright(c) 2016-2018 Intel Corporation */ #include #include #include -#ifdef RTE_LIBRTE_VHOST_NUMA -#include -#endif #include -#include +#include +#include #include #include -#include +#include #include -#include +#include #include #include "rte_eth_vhost.h" +static int vhost_logtype; + +#define VHOST_LOG(level, ...) \ + rte_log(RTE_LOG_ ## level, vhost_logtype, __VA_ARGS__) + +enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; + #define ETH_VHOST_IFACE_ARG "iface" #define ETH_VHOST_QUEUES_ARG "queues" #define ETH_VHOST_CLIENT_ARG "client" - -static const char *drivername = "VHOST PMD"; +#define ETH_VHOST_DEQUEUE_ZERO_COPY "dequeue-zero-copy" +#define ETH_VHOST_IOMMU_SUPPORT "iommu-support" +#define ETH_VHOST_POSTCOPY_SUPPORT "postcopy-support" +#define VHOST_MAX_PKT_BURST 32 static const char *valid_arguments[] = { ETH_VHOST_IFACE_ARG, ETH_VHOST_QUEUES_ARG, ETH_VHOST_CLIENT_ARG, + ETH_VHOST_DEQUEUE_ZERO_COPY, + ETH_VHOST_IOMMU_SUPPORT, + ETH_VHOST_POSTCOPY_SUPPORT, NULL }; -static struct ether_addr base_eth_addr = { +static struct rte_ether_addr base_eth_addr = { .addr_bytes = { 0x56 /* V */, 0x48 /* H */, @@ -104,18 +86,19 @@ struct vhost_queue { rte_atomic32_t while_queuing; struct pmd_internal *internal; struct rte_mempool *mb_pool; - uint8_t port; + uint16_t port; uint16_t virtqueue_id; struct vhost_stats stats; }; struct pmd_internal { + rte_atomic32_t dev_attached; char *dev_name; char *iface_name; uint16_t max_queues; - uint64_t flags; - - volatile uint16_t once; + int vid; + rte_atomic32_t started; + uint8_t vlan_strip; }; struct internal_list { @@ -129,9 +112,6 @@ static struct internal_list_head internal_list = static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER; -static rte_atomic16_t nb_started_ports; -static pthread_t session_th; - static struct rte_eth_link pmd_link = { .link_speed = 10000, .link_duplex = ETH_LINK_FULL_DUPLEX, @@ -322,6 +302,7 @@ vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, *(uint64_t *)(((char *)vq) + vhost_rxport_stat_strings[t].offset); } + xstats[count].id = count; count++; } for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) { @@ -334,6 +315,7 @@ vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, *(uint64_t *)(((char *)vq) + vhost_txport_stat_strings[t].offset); } + xstats[count].id = count; count++; } return count; @@ -343,10 +325,10 @@ static inline void vhost_count_multicast_broadcast(struct vhost_queue *vq, struct rte_mbuf *mbuf) { - struct ether_addr *ea = NULL; + struct rte_ether_addr *ea = NULL; struct vhost_stats *pstats = &vq->stats; - ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *); + ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *); if (is_multicast_ether_addr(ea)) { if (is_broadcast_ether_addr(ea)) pstats->xstats[VHOST_BROADCAST_PKT]++; @@ -390,6 +372,7 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) { struct vhost_queue *r = q; uint16_t i, nb_rx = 0; + uint16_t nb_receive = nb_bufs; if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) return 0; @@ -400,13 +383,30 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) goto out; /* Dequeue packets from guest TX queue */ - nb_rx = rte_vhost_dequeue_burst(r->vid, - r->virtqueue_id, r->mb_pool, bufs, nb_bufs); + while (nb_receive) { + uint16_t nb_pkts; + uint16_t num = (uint16_t)RTE_MIN(nb_receive, + VHOST_MAX_PKT_BURST); + + nb_pkts = rte_vhost_dequeue_burst(r->vid, r->virtqueue_id, + r->mb_pool, &bufs[nb_rx], + num); + + nb_rx += nb_pkts; + nb_receive -= nb_pkts; + if (nb_pkts < num) + break; + } r->stats.pkts += nb_rx; for (i = 0; likely(i < nb_rx); i++) { bufs[i]->port = r->port; + bufs[i]->vlan_tci = 0; + + if (r->internal->vlan_strip) + rte_vlan_strip(bufs[i]); + r->stats.bytes += bufs[i]->pkt_len; } @@ -423,6 +423,7 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) { struct vhost_queue *r = q; uint16_t i, nb_tx = 0; + uint16_t nb_send = 0; if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) return 0; @@ -432,9 +433,36 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) goto out; + for (i = 0; i < nb_bufs; i++) { + struct rte_mbuf *m = bufs[i]; + + /* Do VLAN tag insertion */ + if (m->ol_flags & PKT_TX_VLAN_PKT) { + int error = rte_vlan_insert(&m); + if (unlikely(error)) { + rte_pktmbuf_free(m); + continue; + } + } + + bufs[nb_send] = m; + ++nb_send; + } + /* Enqueue packets to guest RX queue */ - nb_tx = rte_vhost_enqueue_burst(r->vid, - r->virtqueue_id, bufs, nb_bufs); + while (nb_send) { + uint16_t nb_pkts; + uint16_t num = (uint16_t)RTE_MIN(nb_send, + VHOST_MAX_PKT_BURST); + + nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id, + &bufs[nb_tx], num); + + nb_tx += nb_pkts; + nb_send -= nb_pkts; + if (nb_pkts < num) + break; + } r->stats.pkts += nb_tx; r->stats.missed_pkts += nb_bufs - nb_tx; @@ -462,6 +490,11 @@ out: static int eth_dev_configure(struct rte_eth_dev *dev __rte_unused) { + struct pmd_internal *internal = dev->data->dev_private; + const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; + + internal->vlan_strip = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP); + return 0; } @@ -493,13 +526,202 @@ find_internal_resource(char *ifname) return list; } +static int +eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid) +{ + struct rte_vhost_vring vring; + struct vhost_queue *vq; + int ret = 0; + + vq = dev->data->rx_queues[qid]; + if (!vq) { + VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid); + return -1; + } + + ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring); + if (ret < 0) { + VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid); + return ret; + } + VHOST_LOG(INFO, "Enable interrupt for rxq%d\n", qid); + rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1); + rte_wmb(); + + return ret; +} + +static int +eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid) +{ + struct rte_vhost_vring vring; + struct vhost_queue *vq; + int ret = 0; + + vq = dev->data->rx_queues[qid]; + if (!vq) { + VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid); + return -1; + } + + ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring); + if (ret < 0) { + VHOST_LOG(ERR, "Failed to get rxq%d's vring", qid); + return ret; + } + VHOST_LOG(INFO, "Disable interrupt for rxq%d\n", qid); + rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0); + rte_wmb(); + + return 0; +} + +static void +eth_vhost_uninstall_intr(struct rte_eth_dev *dev) +{ + struct rte_intr_handle *intr_handle = dev->intr_handle; + + if (intr_handle) { + if (intr_handle->intr_vec) + free(intr_handle->intr_vec); + free(intr_handle); + } + + dev->intr_handle = NULL; +} + +static int +eth_vhost_install_intr(struct rte_eth_dev *dev) +{ + struct rte_vhost_vring vring; + struct vhost_queue *vq; + int count = 0; + int nb_rxq = dev->data->nb_rx_queues; + int i; + int ret; + + /* uninstall firstly if we are reconnecting */ + if (dev->intr_handle) + eth_vhost_uninstall_intr(dev); + + dev->intr_handle = malloc(sizeof(*dev->intr_handle)); + if (!dev->intr_handle) { + VHOST_LOG(ERR, "Fail to allocate intr_handle\n"); + return -ENOMEM; + } + memset(dev->intr_handle, 0, sizeof(*dev->intr_handle)); + + dev->intr_handle->efd_counter_size = sizeof(uint64_t); + + dev->intr_handle->intr_vec = + malloc(nb_rxq * sizeof(dev->intr_handle->intr_vec[0])); + + if (!dev->intr_handle->intr_vec) { + VHOST_LOG(ERR, + "Failed to allocate memory for interrupt vector\n"); + free(dev->intr_handle); + return -ENOMEM; + } + + VHOST_LOG(INFO, "Prepare intr vec\n"); + for (i = 0; i < nb_rxq; i++) { + vq = dev->data->rx_queues[i]; + if (!vq) { + VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i); + continue; + } + + ret = rte_vhost_get_vhost_vring(vq->vid, (i << 1) + 1, &vring); + if (ret < 0) { + VHOST_LOG(INFO, + "Failed to get rxq-%d's vring, skip!\n", i); + continue; + } + + if (vring.kickfd < 0) { + VHOST_LOG(INFO, + "rxq-%d's kickfd is invalid, skip!\n", i); + continue; + } + dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i; + dev->intr_handle->efds[i] = vring.kickfd; + count++; + VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i); + } + + dev->intr_handle->nb_efd = count; + dev->intr_handle->max_intr = count + 1; + dev->intr_handle->type = RTE_INTR_HANDLE_VDEV; + + return 0; +} + +static void +update_queuing_status(struct rte_eth_dev *dev) +{ + struct pmd_internal *internal = dev->data->dev_private; + struct vhost_queue *vq; + unsigned int i; + int allow_queuing = 1; + + if (!dev->data->rx_queues || !dev->data->tx_queues) + return; + + if (rte_atomic32_read(&internal->started) == 0 || + rte_atomic32_read(&internal->dev_attached) == 0) + allow_queuing = 0; + + /* Wait until rx/tx_pkt_burst stops accessing vhost device */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + vq = dev->data->rx_queues[i]; + if (vq == NULL) + continue; + rte_atomic32_set(&vq->allow_queuing, allow_queuing); + while (rte_atomic32_read(&vq->while_queuing)) + rte_pause(); + } + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + vq = dev->data->tx_queues[i]; + if (vq == NULL) + continue; + rte_atomic32_set(&vq->allow_queuing, allow_queuing); + while (rte_atomic32_read(&vq->while_queuing)) + rte_pause(); + } +} + +static void +queue_setup(struct rte_eth_dev *eth_dev, struct pmd_internal *internal) +{ + struct vhost_queue *vq; + int i; + + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + vq = eth_dev->data->rx_queues[i]; + if (!vq) + continue; + vq->vid = internal->vid; + vq->internal = internal; + vq->port = eth_dev->data->port_id; + } + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { + vq = eth_dev->data->tx_queues[i]; + if (!vq) + continue; + vq->vid = internal->vid; + vq->internal = internal; + vq->port = eth_dev->data->port_id; + } +} + static int new_device(int vid) { struct rte_eth_dev *eth_dev; struct internal_list *list; struct pmd_internal *internal; - struct vhost_queue *vq; + struct rte_eth_conf *dev_conf; unsigned i; char ifname[PATH_MAX]; #ifdef RTE_LIBRTE_VHOST_NUMA @@ -509,12 +731,13 @@ new_device(int vid) rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); list = find_internal_resource(ifname); if (list == NULL) { - RTE_LOG(INFO, PMD, "Invalid device name: %s\n", ifname); + VHOST_LOG(INFO, "Invalid device name: %s\n", ifname); return -1; } eth_dev = list->eth_dev; internal = eth_dev->data->dev_private; + dev_conf = ð_dev->data->dev_conf; #ifdef RTE_LIBRTE_VHOST_NUMA newnode = rte_vhost_get_numa_node(vid); @@ -522,44 +745,34 @@ new_device(int vid) eth_dev->data->numa_node = newnode; #endif - for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { - vq = eth_dev->data->rx_queues[i]; - if (vq == NULL) - continue; - vq->vid = vid; - vq->internal = internal; - vq->port = eth_dev->data->port_id; - } - for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { - vq = eth_dev->data->tx_queues[i]; - if (vq == NULL) - continue; - vq->vid = vid; - vq->internal = internal; - vq->port = eth_dev->data->port_id; + internal->vid = vid; + if (rte_atomic32_read(&internal->started) == 1) { + queue_setup(eth_dev, internal); + + if (dev_conf->intr_conf.rxq) { + if (eth_vhost_install_intr(eth_dev) < 0) { + VHOST_LOG(INFO, + "Failed to install interrupt handler."); + return -1; + } + } + } else { + VHOST_LOG(INFO, "RX/TX queues not exist yet\n"); } - for (i = 0; i < rte_vhost_get_queue_num(vid) * VIRTIO_QNUM; i++) + for (i = 0; i < rte_vhost_get_vring_num(vid); i++) rte_vhost_enable_guest_notification(vid, i, 0); + rte_vhost_get_mtu(vid, ð_dev->data->mtu); + eth_dev->data->dev_link.link_status = ETH_LINK_UP; - for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { - vq = eth_dev->data->rx_queues[i]; - if (vq == NULL) - continue; - rte_atomic32_set(&vq->allow_queuing, 1); - } - for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { - vq = eth_dev->data->tx_queues[i]; - if (vq == NULL) - continue; - rte_atomic32_set(&vq->allow_queuing, 1); - } + rte_atomic32_set(&internal->dev_attached, 1); + update_queuing_status(eth_dev); - RTE_LOG(INFO, PMD, "New connection established\n"); + VHOST_LOG(INFO, "Vhost device %d created\n", vid); - _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC); + _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); return 0; } @@ -568,6 +781,7 @@ static void destroy_device(int vid) { struct rte_eth_dev *eth_dev; + struct pmd_internal *internal; struct vhost_queue *vq; struct internal_list *list; char ifname[PATH_MAX]; @@ -577,42 +791,30 @@ destroy_device(int vid) rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); list = find_internal_resource(ifname); if (list == NULL) { - RTE_LOG(ERR, PMD, "Invalid interface name: %s\n", ifname); + VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname); return; } eth_dev = list->eth_dev; + internal = eth_dev->data->dev_private; - /* Wait until rx/tx_pkt_burst stops accessing vhost device */ - for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { - vq = eth_dev->data->rx_queues[i]; - if (vq == NULL) - continue; - rte_atomic32_set(&vq->allow_queuing, 0); - while (rte_atomic32_read(&vq->while_queuing)) - rte_pause(); - } - for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { - vq = eth_dev->data->tx_queues[i]; - if (vq == NULL) - continue; - rte_atomic32_set(&vq->allow_queuing, 0); - while (rte_atomic32_read(&vq->while_queuing)) - rte_pause(); - } + rte_atomic32_set(&internal->dev_attached, 0); + update_queuing_status(eth_dev); eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; - for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { - vq = eth_dev->data->rx_queues[i]; - if (vq == NULL) - continue; - vq->vid = -1; - } - for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { - vq = eth_dev->data->tx_queues[i]; - if (vq == NULL) - continue; - vq->vid = -1; + if (eth_dev->data->rx_queues && eth_dev->data->tx_queues) { + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + vq = eth_dev->data->rx_queues[i]; + if (!vq) + continue; + vq->vid = -1; + } + for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { + vq = eth_dev->data->tx_queues[i]; + if (!vq) + continue; + vq->vid = -1; + } } state = vring_states[eth_dev->data->port_id]; @@ -624,9 +826,10 @@ destroy_device(int vid) state->max_vring = 0; rte_spinlock_unlock(&state->lock); - RTE_LOG(INFO, PMD, "Connection closed\n"); + VHOST_LOG(INFO, "Vhost device %d destroyed\n", vid); + eth_vhost_uninstall_intr(eth_dev); - _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC); + _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); } static int @@ -640,7 +843,7 @@ vring_state_changed(int vid, uint16_t vring, int enable) rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); list = find_internal_resource(ifname); if (list == NULL) { - RTE_LOG(ERR, PMD, "Invalid interface name: %s\n", ifname); + VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname); return -1; } @@ -652,16 +855,22 @@ vring_state_changed(int vid, uint16_t vring, int enable) state->max_vring = RTE_MAX(vring, state->max_vring); rte_spinlock_unlock(&state->lock); - RTE_LOG(INFO, PMD, "vring%u is %s\n", + VHOST_LOG(INFO, "vring%u is %s\n", vring, enable ? "enabled" : "disabled"); - _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE); + _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL); return 0; } +static struct vhost_device_ops vhost_ops = { + .new_device = new_device, + .destroy_device = destroy_device, + .vring_state_changed = vring_state_changed, +}; + int -rte_eth_vhost_get_queue_event(uint8_t port_id, +rte_eth_vhost_get_queue_event(uint16_t port_id, struct rte_eth_vhost_queue_event *event) { struct rte_vhost_vring_state *state; @@ -669,13 +878,13 @@ rte_eth_vhost_get_queue_event(uint8_t port_id, int idx; if (port_id >= RTE_MAX_ETHPORTS) { - RTE_LOG(ERR, PMD, "Invalid port id\n"); + VHOST_LOG(ERR, "Invalid port id\n"); return -1; } state = vring_states[port_id]; if (!state) { - RTE_LOG(ERR, PMD, "Unused port\n"); + VHOST_LOG(ERR, "Unused port\n"); return -1; } @@ -698,7 +907,7 @@ rte_eth_vhost_get_queue_event(uint8_t port_id, } int -rte_eth_vhost_get_vid_from_port_id(uint8_t port_id) +rte_eth_vhost_get_vid_from_port_id(uint16_t port_id) { struct internal_list *list; struct rte_eth_dev *eth_dev; @@ -726,81 +935,76 @@ rte_eth_vhost_get_vid_from_port_id(uint8_t port_id) return vid; } -static void * -vhost_driver_session(void *param __rte_unused) +static int +eth_dev_start(struct rte_eth_dev *eth_dev) { - static struct virtio_net_device_ops vhost_ops; + struct pmd_internal *internal = eth_dev->data->dev_private; + struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf; + + queue_setup(eth_dev, internal); - /* set vhost arguments */ - vhost_ops.new_device = new_device; - vhost_ops.destroy_device = destroy_device; - vhost_ops.vring_state_changed = vring_state_changed; - if (rte_vhost_driver_callback_register(&vhost_ops) < 0) - RTE_LOG(ERR, PMD, "Can't register callbacks\n"); + if (rte_atomic32_read(&internal->dev_attached) == 1) { + if (dev_conf->intr_conf.rxq) { + if (eth_vhost_install_intr(eth_dev) < 0) { + VHOST_LOG(INFO, + "Failed to install interrupt handler."); + return -1; + } + } + } - /* start event handling */ - rte_vhost_driver_session_start(); + rte_atomic32_set(&internal->started, 1); + update_queuing_status(eth_dev); - return NULL; + return 0; } -static int -vhost_driver_session_start(void) +static void +eth_dev_stop(struct rte_eth_dev *dev) { - int ret; - - ret = pthread_create(&session_th, - NULL, vhost_driver_session, NULL); - if (ret) - RTE_LOG(ERR, PMD, "Can't create a thread\n"); + struct pmd_internal *internal = dev->data->dev_private; - return ret; + rte_atomic32_set(&internal->started, 0); + update_queuing_status(dev); } static void -vhost_driver_session_stop(void) +eth_dev_close(struct rte_eth_dev *dev) { - int ret; + struct pmd_internal *internal; + struct internal_list *list; + unsigned int i; - ret = pthread_cancel(session_th); - if (ret) - RTE_LOG(ERR, PMD, "Can't cancel the thread\n"); + internal = dev->data->dev_private; + if (!internal) + return; - ret = pthread_join(session_th, NULL); - if (ret) - RTE_LOG(ERR, PMD, "Can't join the thread\n"); -} + eth_dev_stop(dev); -static int -eth_dev_start(struct rte_eth_dev *dev) -{ - struct pmd_internal *internal = dev->data->dev_private; - int ret = 0; + rte_vhost_driver_unregister(internal->iface_name); - if (rte_atomic16_cmpset(&internal->once, 0, 1)) { - ret = rte_vhost_driver_register(internal->iface_name, - internal->flags); - if (ret) - return ret; - } + list = find_internal_resource(internal->iface_name); + if (!list) + return; - /* We need only one message handling thread */ - if (rte_atomic16_add_return(&nb_started_ports, 1) == 1) - ret = vhost_driver_session_start(); + pthread_mutex_lock(&internal_list_lock); + TAILQ_REMOVE(&internal_list, list, next); + pthread_mutex_unlock(&internal_list_lock); + rte_free(list); - return ret; -} + if (dev->data->rx_queues) + for (i = 0; i < dev->data->nb_rx_queues; i++) + rte_free(dev->data->rx_queues[i]); -static void -eth_dev_stop(struct rte_eth_dev *dev) -{ - struct pmd_internal *internal = dev->data->dev_private; + if (dev->data->tx_queues) + for (i = 0; i < dev->data->nb_tx_queues; i++) + rte_free(dev->data->tx_queues[i]); - if (rte_atomic16_cmpset(&internal->once, 1, 0)) - rte_vhost_driver_unregister(internal->iface_name); + free(internal->dev_name); + free(internal->iface_name); + rte_free(internal); - if (rte_atomic16_sub_return(&nb_started_ports, 1) == 0) - vhost_driver_session_stop(); + dev->data->dev_private = NULL; } static int @@ -815,7 +1019,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), RTE_CACHE_LINE_SIZE, socket_id); if (vq == NULL) { - RTE_LOG(ERR, PMD, "Failed to allocate memory for rx queue\n"); + VHOST_LOG(ERR, "Failed to allocate memory for rx queue\n"); return -ENOMEM; } @@ -837,7 +1041,7 @@ eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue), RTE_CACHE_LINE_SIZE, socket_id); if (vq == NULL) { - RTE_LOG(ERR, PMD, "Failed to allocate memory for tx queue\n"); + VHOST_LOG(ERR, "Failed to allocate memory for tx queue\n"); return -ENOMEM; } @@ -855,19 +1059,22 @@ eth_dev_info(struct rte_eth_dev *dev, internal = dev->data->dev_private; if (internal == NULL) { - RTE_LOG(ERR, PMD, "Invalid device specified\n"); + VHOST_LOG(ERR, "Invalid device specified\n"); return; } - dev_info->driver_name = drivername; dev_info->max_mac_addrs = 1; dev_info->max_rx_pktlen = (uint32_t)-1; dev_info->max_rx_queues = internal->max_queues; dev_info->max_tx_queues = internal->max_queues; dev_info->min_rx_bufsize = 0; + + dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | + DEV_TX_OFFLOAD_VLAN_INSERT; + dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; } -static void +static int eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { unsigned i; @@ -905,6 +1112,8 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) stats->oerrors = tx_missed_total; stats->ibytes = rx_total_bytes; stats->obytes = tx_total_bytes; + + return 0; } static void @@ -937,91 +1146,87 @@ eth_queue_release(void *q) } static int -eth_link_update(struct rte_eth_dev *dev __rte_unused, - int wait_to_complete __rte_unused) +eth_tx_done_cleanup(void *txq __rte_unused, uint32_t free_cnt __rte_unused) { + /* + * vHost does not hang onto mbuf. eth_vhost_tx() copies packet data + * and releases mbuf, so nothing to cleanup. + */ return 0; } -/** - * Disable features in feature_mask. Returns 0 on success. - */ -int -rte_eth_vhost_feature_disable(uint64_t feature_mask) +static int +eth_link_update(struct rte_eth_dev *dev __rte_unused, + int wait_to_complete __rte_unused) { - return rte_vhost_feature_disable(feature_mask); + return 0; } -/** - * Enable features in feature_mask. Returns 0 on success. - */ -int -rte_eth_vhost_feature_enable(uint64_t feature_mask) +static uint32_t +eth_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) { - return rte_vhost_feature_enable(feature_mask); -} + struct vhost_queue *vq; -/* Returns currently supported vhost features */ -uint64_t -rte_eth_vhost_feature_get(void) -{ - return rte_vhost_feature_get(); + vq = dev->data->rx_queues[rx_queue_id]; + if (vq == NULL) + return 0; + + return rte_vhost_rx_queue_count(vq->vid, vq->virtqueue_id); } static const struct eth_dev_ops ops = { .dev_start = eth_dev_start, .dev_stop = eth_dev_stop, + .dev_close = eth_dev_close, .dev_configure = eth_dev_configure, .dev_infos_get = eth_dev_info, .rx_queue_setup = eth_rx_queue_setup, .tx_queue_setup = eth_tx_queue_setup, .rx_queue_release = eth_queue_release, .tx_queue_release = eth_queue_release, + .tx_done_cleanup = eth_tx_done_cleanup, + .rx_queue_count = eth_rx_queue_count, .link_update = eth_link_update, .stats_get = eth_stats_get, .stats_reset = eth_stats_reset, .xstats_reset = vhost_dev_xstats_reset, .xstats_get = vhost_dev_xstats_get, .xstats_get_names = vhost_dev_xstats_get_names, + .rx_queue_intr_enable = eth_rxq_intr_enable, + .rx_queue_intr_disable = eth_rxq_intr_disable, }; +static struct rte_vdev_driver pmd_vhost_drv; + static int -eth_dev_vhost_create(const char *name, char *iface_name, int16_t queues, - const unsigned numa_node, uint64_t flags) +eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, + int16_t queues, const unsigned int numa_node, uint64_t flags) { - struct rte_eth_dev_data *data = NULL; + const char *name = rte_vdev_device_name(dev); + struct rte_eth_dev_data *data; struct pmd_internal *internal = NULL; struct rte_eth_dev *eth_dev = NULL; - struct ether_addr *eth_addr = NULL; + struct rte_ether_addr *eth_addr = NULL; struct rte_vhost_vring_state *vring_state = NULL; struct internal_list *list = NULL; - RTE_LOG(INFO, PMD, "Creating VHOST-USER backend on numa socket %u\n", + VHOST_LOG(INFO, "Creating VHOST-USER backend on numa socket %u\n", numa_node); - /* now do all data allocation - for eth_dev structure, dummy pci driver - * and internal (private) data - */ - data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node); - if (data == NULL) - goto error; - - internal = rte_zmalloc_socket(name, sizeof(*internal), 0, numa_node); - if (internal == NULL) - goto error; - list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node); if (list == NULL) goto error; /* reserve an ethdev entry */ - eth_dev = rte_eth_dev_allocate(name); + eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal)); if (eth_dev == NULL) goto error; + data = eth_dev->data; eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node); if (eth_addr == NULL) goto error; + data->mac_addrs = eth_addr; *eth_addr = base_eth_addr; eth_addr->addr_bytes[5] = eth_dev->data->port_id; @@ -1030,21 +1235,18 @@ eth_dev_vhost_create(const char *name, char *iface_name, int16_t queues, if (vring_state == NULL) goto error; - TAILQ_INIT(ð_dev->link_intr_cbs); - /* now put it all together * - store queue data in internal, - * - store numa_node info in ethdev data * - point eth_dev_data to internals * - and point eth_dev structure to new eth_dev_data structure */ + internal = eth_dev->data->dev_private; internal->dev_name = strdup(name); if (internal->dev_name == NULL) goto error; internal->iface_name = strdup(iface_name); if (internal->iface_name == NULL) goto error; - internal->flags = flags; list->eth_dev = eth_dev; pthread_mutex_lock(&internal_list_lock); @@ -1054,43 +1256,44 @@ eth_dev_vhost_create(const char *name, char *iface_name, int16_t queues, rte_spinlock_init(&vring_state->lock); vring_states[eth_dev->data->port_id] = vring_state; - data->dev_private = internal; - data->port_id = eth_dev->data->port_id; - memmove(data->name, eth_dev->data->name, sizeof(data->name)); data->nb_rx_queues = queues; data->nb_tx_queues = queues; internal->max_queues = queues; + internal->vid = -1; data->dev_link = pmd_link; - data->mac_addrs = eth_addr; + data->dev_flags = RTE_ETH_DEV_INTR_LSC; - /* We'll replace the 'data' originally allocated by eth_dev. So the - * vhost PMD resources won't be shared between multi processes. - */ - eth_dev->data = data; eth_dev->dev_ops = &ops; - eth_dev->driver = NULL; - data->dev_flags = - RTE_ETH_DEV_DETACHABLE | RTE_ETH_DEV_INTR_LSC; - data->kdrv = RTE_KDRV_NONE; - data->drv_name = internal->dev_name; - data->numa_node = numa_node; /* finally assign rx and tx ops */ eth_dev->rx_pkt_burst = eth_vhost_rx; eth_dev->tx_pkt_burst = eth_vhost_tx; + if (rte_vhost_driver_register(iface_name, flags)) + goto error; + + if (rte_vhost_driver_callback_register(iface_name, &vhost_ops) < 0) { + VHOST_LOG(ERR, "Can't register callbacks\n"); + goto error; + } + + if (rte_vhost_driver_start(iface_name) < 0) { + VHOST_LOG(ERR, "Failed to start driver for %s\n", + iface_name); + goto error; + } + + rte_eth_dev_probing_finish(eth_dev); return data->port_id; error: - if (internal) + if (internal) { + free(internal->iface_name); free(internal->dev_name); + } rte_free(vring_state); - rte_free(eth_addr); - if (eth_dev) - rte_eth_dev_release_port(eth_dev); - rte_free(internal); + rte_eth_dev_release_port(eth_dev); rte_free(list); - rte_free(data); return -1; } @@ -1124,7 +1327,7 @@ open_int(const char *key __rte_unused, const char *value, void *extra_args) } static int -rte_pmd_vhost_probe(const char *name, const char *params) +rte_pmd_vhost_probe(struct rte_vdev_device *dev) { struct rte_kvargs *kvlist = NULL; int ret = 0; @@ -1132,10 +1335,28 @@ rte_pmd_vhost_probe(const char *name, const char *params) uint16_t queues; uint64_t flags = 0; int client_mode = 0; + int dequeue_zero_copy = 0; + int iommu_support = 0; + int postcopy_support = 0; + struct rte_eth_dev *eth_dev; + const char *name = rte_vdev_device_name(dev); - RTE_LOG(INFO, PMD, "Initializing pmd_vhost for %s\n", name); + VHOST_LOG(INFO, "Initializing pmd_vhost for %s\n", name); + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) { + VHOST_LOG(ERR, "Failed to probe %s\n", name); + return -1; + } + /* TODO: request info from primary to set up Rx and Tx */ + eth_dev->dev_ops = &ops; + eth_dev->device = &dev->device; + rte_eth_dev_probing_finish(eth_dev); + return 0; + } - kvlist = rte_kvargs_parse(params, valid_arguments); + kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); if (kvlist == NULL) return -1; @@ -1168,7 +1389,41 @@ rte_pmd_vhost_probe(const char *name, const char *params) flags |= RTE_VHOST_USER_CLIENT; } - eth_dev_vhost_create(name, iface_name, queues, rte_socket_id(), flags); + if (rte_kvargs_count(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY) == 1) { + ret = rte_kvargs_process(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY, + &open_int, &dequeue_zero_copy); + if (ret < 0) + goto out_free; + + if (dequeue_zero_copy) + flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY; + } + + if (rte_kvargs_count(kvlist, ETH_VHOST_IOMMU_SUPPORT) == 1) { + ret = rte_kvargs_process(kvlist, ETH_VHOST_IOMMU_SUPPORT, + &open_int, &iommu_support); + if (ret < 0) + goto out_free; + + if (iommu_support) + flags |= RTE_VHOST_USER_IOMMU_SUPPORT; + } + + if (rte_kvargs_count(kvlist, ETH_VHOST_POSTCOPY_SUPPORT) == 1) { + ret = rte_kvargs_process(kvlist, ETH_VHOST_POSTCOPY_SUPPORT, + &open_int, &postcopy_support); + if (ret < 0) + goto out_free; + + if (postcopy_support) + flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT; + } + + if (dev->device.numa_node == SOCKET_ID_ANY) + dev->device.numa_node = rte_socket_id(); + + eth_dev_vhost_create(dev, iface_name, queues, dev->device.numa_node, + flags); out_free: rte_kvargs_free(kvlist); @@ -1176,50 +1431,27 @@ out_free: } static int -rte_pmd_vhost_remove(const char *name) +rte_pmd_vhost_remove(struct rte_vdev_device *dev) { + const char *name; struct rte_eth_dev *eth_dev = NULL; - struct pmd_internal *internal; - struct internal_list *list; - unsigned int i; - RTE_LOG(INFO, PMD, "Un-Initializing pmd_vhost for %s\n", name); + name = rte_vdev_device_name(dev); + VHOST_LOG(INFO, "Un-Initializing pmd_vhost for %s\n", name); /* find an ethdev entry */ eth_dev = rte_eth_dev_allocated(name); if (eth_dev == NULL) return -ENODEV; - internal = eth_dev->data->dev_private; - if (internal == NULL) - return -ENODEV; - - list = find_internal_resource(internal->iface_name); - if (list == NULL) - return -ENODEV; - - pthread_mutex_lock(&internal_list_lock); - TAILQ_REMOVE(&internal_list, list, next); - pthread_mutex_unlock(&internal_list_lock); - rte_free(list); + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return rte_eth_dev_release_port(eth_dev); - eth_dev_stop(eth_dev); + eth_dev_close(eth_dev); rte_free(vring_states[eth_dev->data->port_id]); vring_states[eth_dev->data->port_id] = NULL; - free(internal->dev_name); - free(internal->iface_name); - - for (i = 0; i < eth_dev->data->nb_rx_queues; i++) - rte_free(eth_dev->data->rx_queues[i]); - for (i = 0; i < eth_dev->data->nb_tx_queues; i++) - rte_free(eth_dev->data->tx_queues[i]); - - rte_free(eth_dev->data->mac_addrs); - rte_free(eth_dev->data); - rte_free(internal); - rte_eth_dev_release_port(eth_dev); return 0; @@ -1230,7 +1462,19 @@ static struct rte_vdev_driver pmd_vhost_drv = { .remove = rte_pmd_vhost_remove, }; -DRIVER_REGISTER_VDEV(net_vhost, pmd_vhost_drv); -DRIVER_REGISTER_PARAM_STRING(net_vhost, +RTE_PMD_REGISTER_VDEV(net_vhost, pmd_vhost_drv); +RTE_PMD_REGISTER_ALIAS(net_vhost, eth_vhost); +RTE_PMD_REGISTER_PARAM_STRING(net_vhost, "iface= " - "queues="); + "queues= " + "client=<0|1> " + "dequeue-zero-copy=<0|1> " + "iommu-support=<0|1> " + "postcopy-support=<0|1>"); + +RTE_INIT(vhost_init_log) +{ + vhost_logtype = rte_log_register("pmd.net.vhost"); + if (vhost_logtype >= 0) + rte_log_set_level(vhost_logtype, RTE_LOG_NOTICE); +}