X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fvhost%2Frte_eth_vhost.c;h=070f0e6dfd5387daef042787c56d73125fd027a5;hb=9fda31c3229ca6e036cae80392578ed6e5a51119;hp=cea2ead2dab650f934aa17234f3d9883bdcd8279;hpb=e045e858444349323cb02e0951cd977d4a0edd0d;p=dpdk.git diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c index cea2ead2da..070f0e6dfd 100644 --- a/drivers/net/vhost/rte_eth_vhost.c +++ b/drivers/net/vhost/rte_eth_vhost.c @@ -5,10 +5,11 @@ #include #include #include +#include #include -#include -#include +#include +#include #include #include #include @@ -18,7 +19,7 @@ #include "rte_eth_vhost.h" -static int vhost_logtype; +RTE_LOG_REGISTER_DEFAULT(vhost_logtype, NOTICE); #define VHOST_LOG(level, ...) \ rte_log(RTE_LOG_ ## level, vhost_logtype, __VA_ARGS__) @@ -28,20 +29,22 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; #define ETH_VHOST_IFACE_ARG "iface" #define ETH_VHOST_QUEUES_ARG "queues" #define ETH_VHOST_CLIENT_ARG "client" -#define ETH_VHOST_DEQUEUE_ZERO_COPY "dequeue-zero-copy" #define ETH_VHOST_IOMMU_SUPPORT "iommu-support" #define ETH_VHOST_POSTCOPY_SUPPORT "postcopy-support" #define ETH_VHOST_VIRTIO_NET_F_HOST_TSO "tso" +#define ETH_VHOST_LINEAR_BUF "linear-buffer" +#define ETH_VHOST_EXT_BUF "ext-buffer" #define VHOST_MAX_PKT_BURST 32 static const char *valid_arguments[] = { ETH_VHOST_IFACE_ARG, ETH_VHOST_QUEUES_ARG, ETH_VHOST_CLIENT_ARG, - ETH_VHOST_DEQUEUE_ZERO_COPY, ETH_VHOST_IOMMU_SUPPORT, ETH_VHOST_POSTCOPY_SUPPORT, ETH_VHOST_VIRTIO_NET_F_HOST_TSO, + ETH_VHOST_LINEAR_BUF, + ETH_VHOST_EXT_BUF, NULL }; @@ -68,6 +71,9 @@ enum vhost_xstats_pkts { VHOST_BROADCAST_PKT, VHOST_MULTICAST_PKT, VHOST_UNICAST_PKT, + VHOST_PKT, + VHOST_BYTE, + VHOST_MISSED_PKT, VHOST_ERRORS_PKT, VHOST_ERRORS_FRAGMENTED, VHOST_ERRORS_JABBER, @@ -91,12 +97,15 @@ struct vhost_queue { uint16_t port; uint16_t virtqueue_id; struct vhost_stats stats; + int intr_enable; + rte_spinlock_t intr_lock; }; struct pmd_internal { rte_atomic32_t dev_attached; - char *dev_name; char *iface_name; + uint64_t flags; + uint64_t disable_flags; uint16_t max_queues; int vid; rte_atomic32_t started; @@ -116,8 +125,8 @@ static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER; static struct rte_eth_link pmd_link = { .link_speed = 10000, - .link_duplex = ETH_LINK_FULL_DUPLEX, - .link_status = ETH_LINK_DOWN + .link_duplex = RTE_ETH_LINK_FULL_DUPLEX, + .link_status = RTE_ETH_LINK_DOWN }; struct rte_vhost_vring_state { @@ -141,11 +150,11 @@ struct vhost_xstats_name_off { /* [rx]_is prepended to the name string here */ static const struct vhost_xstats_name_off vhost_rxport_stat_strings[] = { {"good_packets", - offsetof(struct vhost_queue, stats.pkts)}, + offsetof(struct vhost_queue, stats.xstats[VHOST_PKT])}, {"total_bytes", - offsetof(struct vhost_queue, stats.bytes)}, + offsetof(struct vhost_queue, stats.xstats[VHOST_BYTE])}, {"missed_pkts", - offsetof(struct vhost_queue, stats.missed_pkts)}, + offsetof(struct vhost_queue, stats.xstats[VHOST_MISSED_PKT])}, {"broadcast_packets", offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, {"multicast_packets", @@ -181,11 +190,11 @@ static const struct vhost_xstats_name_off vhost_rxport_stat_strings[] = { /* [tx]_ is prepended to the name string here */ static const struct vhost_xstats_name_off vhost_txport_stat_strings[] = { {"good_packets", - offsetof(struct vhost_queue, stats.pkts)}, + offsetof(struct vhost_queue, stats.xstats[VHOST_PKT])}, {"total_bytes", - offsetof(struct vhost_queue, stats.bytes)}, + offsetof(struct vhost_queue, stats.xstats[VHOST_BYTE])}, {"missed_pkts", - offsetof(struct vhost_queue, stats.missed_pkts)}, + offsetof(struct vhost_queue, stats.xstats[VHOST_MISSED_PKT])}, {"broadcast_packets", offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])}, {"multicast_packets", @@ -279,23 +288,6 @@ vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, if (n < nxstats) return nxstats; - for (i = 0; i < dev->data->nb_rx_queues; i++) { - vq = dev->data->rx_queues[i]; - if (!vq) - continue; - vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts - - (vq->stats.xstats[VHOST_BROADCAST_PKT] - + vq->stats.xstats[VHOST_MULTICAST_PKT]); - } - for (i = 0; i < dev->data->nb_tx_queues; i++) { - vq = dev->data->tx_queues[i]; - if (!vq) - continue; - vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts - + vq->stats.missed_pkts - - (vq->stats.xstats[VHOST_BROADCAST_PKT] - + vq->stats.xstats[VHOST_MULTICAST_PKT]); - } for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) { xstats[count].value = 0; for (i = 0; i < dev->data->nb_rx_queues; i++) { @@ -326,7 +318,7 @@ vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, } static inline void -vhost_count_multicast_broadcast(struct vhost_queue *vq, +vhost_count_xcast_packets(struct vhost_queue *vq, struct rte_mbuf *mbuf) { struct rte_ether_addr *ea = NULL; @@ -338,37 +330,35 @@ vhost_count_multicast_broadcast(struct vhost_queue *vq, pstats->xstats[VHOST_BROADCAST_PKT]++; else pstats->xstats[VHOST_MULTICAST_PKT]++; + } else { + pstats->xstats[VHOST_UNICAST_PKT]++; } } -static void -vhost_update_packet_xstats(struct vhost_queue *vq, - struct rte_mbuf **bufs, - uint16_t count) +static __rte_always_inline void +vhost_update_single_packet_xstats(struct vhost_queue *vq, struct rte_mbuf *buf) { uint32_t pkt_len = 0; - uint64_t i = 0; uint64_t index; struct vhost_stats *pstats = &vq->stats; - for (i = 0; i < count ; i++) { - pkt_len = bufs[i]->pkt_len; - if (pkt_len == 64) { - pstats->xstats[VHOST_64_PKT]++; - } else if (pkt_len > 64 && pkt_len < 1024) { - index = (sizeof(pkt_len) * 8) - - __builtin_clz(pkt_len) - 5; - pstats->xstats[index]++; - } else { - if (pkt_len < 64) - pstats->xstats[VHOST_UNDERSIZE_PKT]++; - else if (pkt_len <= 1522) - pstats->xstats[VHOST_1024_TO_1522_PKT]++; - else if (pkt_len > 1522) - pstats->xstats[VHOST_1523_TO_MAX_PKT]++; - } - vhost_count_multicast_broadcast(vq, bufs[i]); + pstats->xstats[VHOST_PKT]++; + pkt_len = buf->pkt_len; + if (pkt_len == 64) { + pstats->xstats[VHOST_64_PKT]++; + } else if (pkt_len > 64 && pkt_len < 1024) { + index = (sizeof(pkt_len) * 8) + - __builtin_clz(pkt_len) - 5; + pstats->xstats[index]++; + } else { + if (pkt_len < 64) + pstats->xstats[VHOST_UNDERSIZE_PKT]++; + else if (pkt_len <= 1522) + pstats->xstats[VHOST_1024_TO_1522_PKT]++; + else if (pkt_len > 1522) + pstats->xstats[VHOST_1523_TO_MAX_PKT]++; } + vhost_count_xcast_packets(vq, buf); } static uint16_t @@ -412,9 +402,10 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) rte_vlan_strip(bufs[i]); r->stats.bytes += bufs[i]->pkt_len; - } + r->stats.xstats[VHOST_BYTE] += bufs[i]->pkt_len; - vhost_update_packet_xstats(r, bufs, nb_rx); + vhost_update_single_packet_xstats(r, bufs[i]); + } out: rte_atomic32_set(&r->while_queuing, 0); @@ -428,6 +419,8 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) struct vhost_queue *r = q; uint16_t i, nb_tx = 0; uint16_t nb_send = 0; + uint64_t nb_bytes = 0; + uint64_t nb_missed = 0; if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0)) return 0; @@ -441,7 +434,7 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) struct rte_mbuf *m = bufs[i]; /* Do VLAN tag insertion */ - if (m->ol_flags & PKT_TX_VLAN_PKT) { + if (m->ol_flags & RTE_MBUF_F_TX_VLAN) { int error = rte_vlan_insert(&m); if (unlikely(error)) { rte_pktmbuf_free(m); @@ -468,20 +461,27 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) break; } - r->stats.pkts += nb_tx; - r->stats.missed_pkts += nb_bufs - nb_tx; + for (i = 0; likely(i < nb_tx); i++) { + nb_bytes += bufs[i]->pkt_len; + vhost_update_single_packet_xstats(r, bufs[i]); + } - for (i = 0; likely(i < nb_tx); i++) - r->stats.bytes += bufs[i]->pkt_len; + nb_missed = nb_bufs - nb_tx; - vhost_update_packet_xstats(r, bufs, nb_tx); + r->stats.pkts += nb_tx; + r->stats.bytes += nb_bytes; + r->stats.missed_pkts += nb_missed; + + r->stats.xstats[VHOST_BYTE] += nb_bytes; + r->stats.xstats[VHOST_MISSED_PKT] += nb_missed; + r->stats.xstats[VHOST_UNICAST_PKT] += nb_missed; - /* According to RFC2863 page42 section ifHCOutMulticastPkts and - * ifHCOutBroadcastPkts, the counters "multicast" and "broadcast" - * are increased when packets are not transmitted successfully. + /* According to RFC2863, ifHCOutUcastPkts, ifHCOutMulticastPkts and + * ifHCOutBroadcastPkts counters are increased when packets are not + * transmitted successfully. */ for (i = nb_tx; i < nb_bufs; i++) - vhost_count_multicast_broadcast(r, bufs[i]); + vhost_count_xcast_packets(r, bufs[i]); for (i = 0; likely(i < nb_tx); i++) rte_pktmbuf_free(bufs[i]); @@ -491,17 +491,6 @@ out: return nb_tx; } -static int -eth_dev_configure(struct rte_eth_dev *dev __rte_unused) -{ - struct pmd_internal *internal = dev->data->dev_private; - const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; - - internal->vlan_strip = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP); - - return 0; -} - static inline struct internal_list * find_internal_resource(char *ifname) { @@ -530,12 +519,61 @@ find_internal_resource(char *ifname) return list; } +static int +eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx) +{ + struct rte_intr_handle *handle = eth_dev->intr_handle; + struct rte_epoll_event rev, *elist; + int epfd, ret; + + if (handle == NULL) + return 0; + + elist = rte_intr_elist_index_get(handle, rxq_idx); + if (rte_intr_efds_index_get(handle, rxq_idx) == elist->fd) + return 0; + + VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating handler.\n", + rxq_idx); + + if (elist->fd != -1) + VHOST_LOG(ERR, "Unexpected previous kickfd value (Got %d, expected -1).\n", + elist->fd); + + /* + * First remove invalid epoll event, and then install + * the new one. May be solved with a proper API in the + * future. + */ + epfd = elist->epfd; + rev = *elist; + ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd, + elist); + if (ret) { + VHOST_LOG(ERR, "Delete epoll event failed.\n"); + return ret; + } + + rev.fd = rte_intr_efds_index_get(handle, rxq_idx); + if (rte_intr_elist_index_set(handle, rxq_idx, rev)) + return -rte_errno; + + elist = rte_intr_elist_index_get(handle, rxq_idx); + ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd, elist); + if (ret) { + VHOST_LOG(ERR, "Add epoll event failed.\n"); + return ret; + } + + return 0; +} + static int eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid) { struct rte_vhost_vring vring; struct vhost_queue *vq; - int ret = 0; + int old_intr_enable, ret = 0; vq = dev->data->rx_queues[qid]; if (!vq) { @@ -543,6 +581,18 @@ eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid) return -1; } + rte_spinlock_lock(&vq->intr_lock); + old_intr_enable = vq->intr_enable; + vq->intr_enable = 1; + ret = eth_vhost_update_intr(dev, qid); + rte_spinlock_unlock(&vq->intr_lock); + + if (ret < 0) { + VHOST_LOG(ERR, "Failed to update rxq%d's intr\n", qid); + vq->intr_enable = old_intr_enable; + return ret; + } + ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring); if (ret < 0) { VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid); @@ -577,6 +627,8 @@ eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid) rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0); rte_wmb(); + vq->intr_enable = 0; + return 0; } @@ -585,12 +637,10 @@ eth_vhost_uninstall_intr(struct rte_eth_dev *dev) { struct rte_intr_handle *intr_handle = dev->intr_handle; - if (intr_handle) { - if (intr_handle->intr_vec) - free(intr_handle->intr_vec); - free(intr_handle); + if (intr_handle != NULL) { + rte_intr_vec_list_free(intr_handle); + rte_intr_instance_free(intr_handle); } - dev->intr_handle = NULL; } @@ -599,36 +649,36 @@ eth_vhost_install_intr(struct rte_eth_dev *dev) { struct rte_vhost_vring vring; struct vhost_queue *vq; - int count = 0; int nb_rxq = dev->data->nb_rx_queues; int i; int ret; /* uninstall firstly if we are reconnecting */ - if (dev->intr_handle) + if (dev->intr_handle != NULL) eth_vhost_uninstall_intr(dev); - dev->intr_handle = malloc(sizeof(*dev->intr_handle)); - if (!dev->intr_handle) { + dev->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE); + if (dev->intr_handle == NULL) { VHOST_LOG(ERR, "Fail to allocate intr_handle\n"); return -ENOMEM; } - memset(dev->intr_handle, 0, sizeof(*dev->intr_handle)); - - dev->intr_handle->efd_counter_size = sizeof(uint64_t); + if (rte_intr_efd_counter_size_set(dev->intr_handle, sizeof(uint64_t))) + return -rte_errno; - dev->intr_handle->intr_vec = - malloc(nb_rxq * sizeof(dev->intr_handle->intr_vec[0])); - - if (!dev->intr_handle->intr_vec) { + if (rte_intr_vec_list_alloc(dev->intr_handle, NULL, nb_rxq)) { VHOST_LOG(ERR, "Failed to allocate memory for interrupt vector\n"); - free(dev->intr_handle); + rte_intr_instance_free(dev->intr_handle); return -ENOMEM; } + VHOST_LOG(INFO, "Prepare intr vec\n"); for (i = 0; i < nb_rxq; i++) { + if (rte_intr_vec_list_index_set(dev->intr_handle, i, RTE_INTR_VEC_RXTX_OFFSET + i)) + return -rte_errno; + if (rte_intr_efds_index_set(dev->intr_handle, i, -1)) + return -rte_errno; vq = dev->data->rx_queues[i]; if (!vq) { VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i); @@ -647,15 +697,20 @@ eth_vhost_install_intr(struct rte_eth_dev *dev) "rxq-%d's kickfd is invalid, skip!\n", i); continue; } - dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i; - dev->intr_handle->efds[i] = vring.kickfd; - count++; + + if (rte_intr_efds_index_set(dev->intr_handle, i, vring.kickfd)) + continue; VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i); } - dev->intr_handle->nb_efd = count; - dev->intr_handle->max_intr = count + 1; - dev->intr_handle->type = RTE_INTR_HANDLE_VDEV; + if (rte_intr_nb_efd_set(dev->intr_handle, nb_rxq)) + return -rte_errno; + + if (rte_intr_max_intr_set(dev->intr_handle, nb_rxq + 1)) + return -rte_errno; + + if (rte_intr_type_set(dev->intr_handle, RTE_INTR_HANDLE_VDEV)) + return -rte_errno; return 0; } @@ -769,14 +824,14 @@ new_device(int vid) rte_vhost_get_mtu(vid, ð_dev->data->mtu); - eth_dev->data->dev_link.link_status = ETH_LINK_UP; + eth_dev->data->dev_link.link_status = RTE_ETH_LINK_UP; rte_atomic32_set(&internal->dev_attached, 1); update_queuing_status(eth_dev); VHOST_LOG(INFO, "Vhost device %d created\n", vid); - _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); + rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); return 0; } @@ -804,7 +859,7 @@ destroy_device(int vid) rte_atomic32_set(&internal->dev_attached, 0); update_queuing_status(eth_dev); - eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; + eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN; if (eth_dev->data->rx_queues && eth_dev->data->tx_queues) { for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { @@ -833,7 +888,51 @@ destroy_device(int vid) VHOST_LOG(INFO, "Vhost device %d destroyed\n", vid); eth_vhost_uninstall_intr(eth_dev); - _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); + rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); +} + +static int +vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id) +{ + struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf; + struct pmd_internal *internal = eth_dev->data->dev_private; + struct vhost_queue *vq; + struct rte_vhost_vring vring; + int rx_idx = vring_id % 2 ? (vring_id - 1) >> 1 : -1; + int ret = 0; + + /* + * The vring kickfd may be changed after the new device notification. + * Update it when the vring state is updated. + */ + if (rx_idx >= 0 && rx_idx < eth_dev->data->nb_rx_queues && + rte_atomic32_read(&internal->dev_attached) && + rte_atomic32_read(&internal->started) && + dev_conf->intr_conf.rxq) { + ret = rte_vhost_get_vhost_vring(vid, vring_id, &vring); + if (ret) { + VHOST_LOG(ERR, "Failed to get vring %d information.\n", + vring_id); + return ret; + } + + if (rte_intr_efds_index_set(eth_dev->intr_handle, rx_idx, + vring.kickfd)) + return -rte_errno; + + vq = eth_dev->data->rx_queues[rx_idx]; + if (!vq) { + VHOST_LOG(ERR, "rxq%d is not setup yet\n", rx_idx); + return -1; + } + + rte_spinlock_lock(&vq->intr_lock); + if (vq->intr_enable) + ret = eth_vhost_update_intr(eth_dev, rx_idx); + rte_spinlock_unlock(&vq->intr_lock); + } + + return ret; } static int @@ -854,6 +953,11 @@ vring_state_changed(int vid, uint16_t vring, int enable) eth_dev = list->eth_dev; /* won't be NULL */ state = vring_states[eth_dev->data->port_id]; + + if (enable && vring_conf_update(vid, eth_dev, vring)) + VHOST_LOG(INFO, "Failed to update vring-%d configuration.\n", + (int)vring); + rte_spinlock_lock(&state->lock); if (state->cur[vring] == enable) { rte_spinlock_unlock(&state->lock); @@ -866,17 +970,85 @@ vring_state_changed(int vid, uint16_t vring, int enable) VHOST_LOG(INFO, "vring%u is %s\n", vring, enable ? "enabled" : "disabled"); - _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL); + rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL); return 0; } -static struct vhost_device_ops vhost_ops = { +static struct rte_vhost_device_ops vhost_ops = { .new_device = new_device, .destroy_device = destroy_device, .vring_state_changed = vring_state_changed, }; +static int +vhost_driver_setup(struct rte_eth_dev *eth_dev) +{ + struct pmd_internal *internal = eth_dev->data->dev_private; + struct internal_list *list = NULL; + struct rte_vhost_vring_state *vring_state = NULL; + unsigned int numa_node = eth_dev->device->numa_node; + const char *name = eth_dev->device->name; + + /* Don't try to setup again if it has already been done. */ + list = find_internal_resource(internal->iface_name); + if (list) + return 0; + + list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node); + if (list == NULL) + return -1; + + vring_state = rte_zmalloc_socket(name, sizeof(*vring_state), + 0, numa_node); + if (vring_state == NULL) + goto free_list; + + list->eth_dev = eth_dev; + pthread_mutex_lock(&internal_list_lock); + TAILQ_INSERT_TAIL(&internal_list, list, next); + pthread_mutex_unlock(&internal_list_lock); + + rte_spinlock_init(&vring_state->lock); + vring_states[eth_dev->data->port_id] = vring_state; + + if (rte_vhost_driver_register(internal->iface_name, internal->flags)) + goto list_remove; + + if (internal->disable_flags) { + if (rte_vhost_driver_disable_features(internal->iface_name, + internal->disable_flags)) + goto drv_unreg; + } + + if (rte_vhost_driver_callback_register(internal->iface_name, + &vhost_ops) < 0) { + VHOST_LOG(ERR, "Can't register callbacks\n"); + goto drv_unreg; + } + + if (rte_vhost_driver_start(internal->iface_name) < 0) { + VHOST_LOG(ERR, "Failed to start driver for %s\n", + internal->iface_name); + goto drv_unreg; + } + + return 0; + +drv_unreg: + rte_vhost_driver_unregister(internal->iface_name); +list_remove: + vring_states[eth_dev->data->port_id] = NULL; + pthread_mutex_lock(&internal_list_lock); + TAILQ_REMOVE(&internal_list, list, next); + pthread_mutex_unlock(&internal_list_lock); + rte_free(vring_state); +free_list: + rte_free(list); + + return -1; +} + int rte_eth_vhost_get_queue_event(uint16_t port_id, struct rte_eth_vhost_queue_event *event) @@ -943,6 +1115,24 @@ rte_eth_vhost_get_vid_from_port_id(uint16_t port_id) return vid; } +static int +eth_dev_configure(struct rte_eth_dev *dev) +{ + struct pmd_internal *internal = dev->data->dev_private; + const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; + + /* NOTE: the same process has to operate a vhost interface + * from beginning to end (from eth_dev configure to eth_dev close). + * It is user's responsibility at the moment. + */ + if (vhost_driver_setup(dev) < 0) + return -1; + + internal->vlan_strip = !!(rxmode->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP); + + return 0; +} + static int eth_dev_start(struct rte_eth_dev *eth_dev) { @@ -967,38 +1157,42 @@ eth_dev_start(struct rte_eth_dev *eth_dev) return 0; } -static void +static int eth_dev_stop(struct rte_eth_dev *dev) { struct pmd_internal *internal = dev->data->dev_private; + dev->data->dev_started = 0; rte_atomic32_set(&internal->started, 0); update_queuing_status(dev); + + return 0; } -static void +static int eth_dev_close(struct rte_eth_dev *dev) { struct pmd_internal *internal; struct internal_list *list; - unsigned int i; + unsigned int i, ret; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; internal = dev->data->dev_private; if (!internal) - return; - - eth_dev_stop(dev); + return 0; - rte_vhost_driver_unregister(internal->iface_name); + ret = eth_dev_stop(dev); list = find_internal_resource(internal->iface_name); - if (!list) - return; - - pthread_mutex_lock(&internal_list_lock); - TAILQ_REMOVE(&internal_list, list, next); - pthread_mutex_unlock(&internal_list_lock); - rte_free(list); + if (list) { + rte_vhost_driver_unregister(internal->iface_name); + pthread_mutex_lock(&internal_list_lock); + TAILQ_REMOVE(&internal_list, list, next); + pthread_mutex_unlock(&internal_list_lock); + rte_free(list); + } if (dev->data->rx_queues) for (i = 0; i < dev->data->nb_rx_queues; i++) @@ -1008,7 +1202,6 @@ eth_dev_close(struct rte_eth_dev *dev) for (i = 0; i < dev->data->nb_tx_queues; i++) rte_free(dev->data->tx_queues[i]); - free(internal->dev_name); rte_free(internal->iface_name); rte_free(internal); @@ -1016,6 +1209,8 @@ eth_dev_close(struct rte_eth_dev *dev) rte_free(vring_states[dev->data->port_id]); vring_states[dev->data->port_id] = NULL; + + return ret; } static int @@ -1036,6 +1231,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, vq->mb_pool = mb_pool; vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ; + rte_spinlock_init(&vq->intr_lock); dev->data->rx_queues[rx_queue_id] = vq; return 0; @@ -1057,6 +1253,7 @@ eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, } vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ; + rte_spinlock_init(&vq->intr_lock); dev->data->tx_queues[tx_queue_id] = vq; return 0; @@ -1080,9 +1277,9 @@ eth_dev_info(struct rte_eth_dev *dev, dev_info->max_tx_queues = internal->max_queues; dev_info->min_rx_bufsize = 0; - dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | - DEV_TX_OFFLOAD_VLAN_INSERT; - dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; + dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS | + RTE_ETH_TX_OFFLOAD_VLAN_INSERT; + dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP; return 0; } @@ -1153,9 +1350,15 @@ eth_stats_reset(struct rte_eth_dev *dev) } static void -eth_queue_release(void *q) +eth_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) +{ + rte_free(dev->data->rx_queues[qid]); +} + +static void +eth_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) { - rte_free(q); + rte_free(dev->data->tx_queues[qid]); } static int @@ -1176,17 +1379,56 @@ eth_link_update(struct rte_eth_dev *dev __rte_unused, } static uint32_t -eth_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) +eth_rx_queue_count(void *rx_queue) { struct vhost_queue *vq; - vq = dev->data->rx_queues[rx_queue_id]; + vq = rx_queue; if (vq == NULL) return 0; return rte_vhost_rx_queue_count(vq->vid, vq->virtqueue_id); } +#define CLB_VAL_IDX 0 +#define CLB_MSK_IDX 1 +#define CLB_MATCH_IDX 2 +static int +vhost_monitor_callback(const uint64_t value, + const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ]) +{ + const uint64_t m = opaque[CLB_MSK_IDX]; + const uint64_t v = opaque[CLB_VAL_IDX]; + const uint64_t c = opaque[CLB_MATCH_IDX]; + + if (c) + return (value & m) == v ? -1 : 0; + else + return (value & m) == v ? 0 : -1; +} + +static int +vhost_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc) +{ + struct vhost_queue *vq = rx_queue; + struct rte_vhost_power_monitor_cond vhost_pmc; + int ret; + if (vq == NULL) + return -EINVAL; + ret = rte_vhost_get_monitor_addr(vq->vid, vq->virtqueue_id, + &vhost_pmc); + if (ret < 0) + return -EINVAL; + pmc->addr = vhost_pmc.addr; + pmc->opaque[CLB_VAL_IDX] = vhost_pmc.val; + pmc->opaque[CLB_MSK_IDX] = vhost_pmc.mask; + pmc->opaque[CLB_MATCH_IDX] = vhost_pmc.match; + pmc->size = vhost_pmc.size; + pmc->fn = vhost_monitor_callback; + + return 0; +} + static const struct eth_dev_ops ops = { .dev_start = eth_dev_start, .dev_stop = eth_dev_stop, @@ -1195,10 +1437,9 @@ static const struct eth_dev_ops ops = { .dev_infos_get = eth_dev_info, .rx_queue_setup = eth_rx_queue_setup, .tx_queue_setup = eth_tx_queue_setup, - .rx_queue_release = eth_queue_release, - .tx_queue_release = eth_queue_release, + .rx_queue_release = eth_rx_queue_release, + .tx_queue_release = eth_tx_queue_release, .tx_done_cleanup = eth_tx_done_cleanup, - .rx_queue_count = eth_rx_queue_count, .link_update = eth_link_update, .stats_get = eth_stats_get, .stats_reset = eth_stats_reset, @@ -1207,6 +1448,7 @@ static const struct eth_dev_ops ops = { .xstats_get_names = vhost_dev_xstats_get_names, .rx_queue_intr_enable = eth_rxq_intr_enable, .rx_queue_intr_disable = eth_rxq_intr_disable, + .get_monitor_addr = vhost_get_monitor_addr, }; static int @@ -1219,16 +1461,10 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, struct pmd_internal *internal = NULL; struct rte_eth_dev *eth_dev = NULL; struct rte_ether_addr *eth_addr = NULL; - struct rte_vhost_vring_state *vring_state = NULL; - struct internal_list *list = NULL; VHOST_LOG(INFO, "Creating VHOST-USER backend on numa socket %u\n", numa_node); - list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node); - if (list == NULL) - goto error; - /* reserve an ethdev entry */ eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal)); if (eth_dev == NULL) @@ -1242,78 +1478,44 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, *eth_addr = base_eth_addr; eth_addr->addr_bytes[5] = eth_dev->data->port_id; - vring_state = rte_zmalloc_socket(name, - sizeof(*vring_state), 0, numa_node); - if (vring_state == NULL) - goto error; - /* now put it all together * - store queue data in internal, * - point eth_dev_data to internals * - and point eth_dev structure to new eth_dev_data structure */ internal = eth_dev->data->dev_private; - internal->dev_name = strdup(name); - if (internal->dev_name == NULL) - goto error; internal->iface_name = rte_malloc_socket(name, strlen(iface_name) + 1, 0, numa_node); if (internal->iface_name == NULL) goto error; strcpy(internal->iface_name, iface_name); - list->eth_dev = eth_dev; - pthread_mutex_lock(&internal_list_lock); - TAILQ_INSERT_TAIL(&internal_list, list, next); - pthread_mutex_unlock(&internal_list_lock); - - rte_spinlock_init(&vring_state->lock); - vring_states[eth_dev->data->port_id] = vring_state; - data->nb_rx_queues = queues; data->nb_tx_queues = queues; internal->max_queues = queues; internal->vid = -1; + internal->flags = flags; + internal->disable_flags = disable_flags; data->dev_link = pmd_link; - data->dev_flags = RTE_ETH_DEV_INTR_LSC | RTE_ETH_DEV_CLOSE_REMOVE; + data->dev_flags = RTE_ETH_DEV_INTR_LSC | + RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + data->promiscuous = 1; + data->all_multicast = 1; eth_dev->dev_ops = &ops; + eth_dev->rx_queue_count = eth_rx_queue_count; /* finally assign rx and tx ops */ eth_dev->rx_pkt_burst = eth_vhost_rx; eth_dev->tx_pkt_burst = eth_vhost_tx; - if (rte_vhost_driver_register(iface_name, flags)) - goto error; - - if (disable_flags) { - if (rte_vhost_driver_disable_features(iface_name, - disable_flags)) - goto error; - } - - if (rte_vhost_driver_callback_register(iface_name, &vhost_ops) < 0) { - VHOST_LOG(ERR, "Can't register callbacks\n"); - goto error; - } - - if (rte_vhost_driver_start(iface_name) < 0) { - VHOST_LOG(ERR, "Failed to start driver for %s\n", - iface_name); - goto error; - } - rte_eth_dev_probing_finish(eth_dev); return 0; error: - if (internal) { + if (internal) rte_free(internal->iface_name); - free(internal->dev_name); - } - rte_free(vring_state); rte_eth_dev_release_port(eth_dev); - rte_free(list); return -1; } @@ -1356,10 +1558,11 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev) uint64_t flags = 0; uint64_t disable_flags = 0; int client_mode = 0; - int dequeue_zero_copy = 0; int iommu_support = 0; int postcopy_support = 0; int tso = 0; + int linear_buf = 0; + int ext_buf = 0; struct rte_eth_dev *eth_dev; const char *name = rte_vdev_device_name(dev); @@ -1371,8 +1574,11 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev) VHOST_LOG(ERR, "Failed to probe %s\n", name); return -1; } - /* TODO: request info from primary to set up Rx and Tx */ + eth_dev->rx_pkt_burst = eth_vhost_rx; + eth_dev->tx_pkt_burst = eth_vhost_tx; eth_dev->dev_ops = &ops; + if (dev->device.numa_node == SOCKET_ID_ANY) + dev->device.numa_node = rte_socket_id(); eth_dev->device = &dev->device; rte_eth_dev_probing_finish(eth_dev); return 0; @@ -1411,16 +1617,6 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev) flags |= RTE_VHOST_USER_CLIENT; } - if (rte_kvargs_count(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY) == 1) { - ret = rte_kvargs_process(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY, - &open_int, &dequeue_zero_copy); - if (ret < 0) - goto out_free; - - if (dequeue_zero_copy) - flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY; - } - if (rte_kvargs_count(kvlist, ETH_VHOST_IOMMU_SUPPORT) == 1) { ret = rte_kvargs_process(kvlist, ETH_VHOST_IOMMU_SUPPORT, &open_int, &iommu_support); @@ -1454,6 +1650,28 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev) } } + if (rte_kvargs_count(kvlist, ETH_VHOST_LINEAR_BUF) == 1) { + ret = rte_kvargs_process(kvlist, + ETH_VHOST_LINEAR_BUF, + &open_int, &linear_buf); + if (ret < 0) + goto out_free; + + if (linear_buf == 1) + flags |= RTE_VHOST_USER_LINEARBUF_SUPPORT; + } + + if (rte_kvargs_count(kvlist, ETH_VHOST_EXT_BUF) == 1) { + ret = rte_kvargs_process(kvlist, + ETH_VHOST_EXT_BUF, + &open_int, &ext_buf); + if (ret < 0) + goto out_free; + + if (ext_buf == 1) + flags |= RTE_VHOST_USER_EXTBUF_SUPPORT; + } + if (dev->device.numa_node == SOCKET_ID_ANY) dev->device.numa_node = rte_socket_id(); @@ -1481,11 +1699,7 @@ rte_pmd_vhost_remove(struct rte_vdev_device *dev) if (eth_dev == NULL) return 0; - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return rte_eth_dev_release_port(eth_dev); - eth_dev_close(eth_dev); - rte_eth_dev_release_port(eth_dev); return 0; @@ -1502,14 +1716,8 @@ RTE_PMD_REGISTER_PARAM_STRING(net_vhost, "iface= " "queues= " "client=<0|1> " - "dequeue-zero-copy=<0|1> " "iommu-support=<0|1> " "postcopy-support=<0|1> " - "tso=<0|1>"); - -RTE_INIT(vhost_init_log) -{ - vhost_logtype = rte_log_register("pmd.net.vhost"); - if (vhost_logtype >= 0) - rte_log_set_level(vhost_logtype, RTE_LOG_NOTICE); -} + "tso=<0|1> " + "linear-buffer=<0|1> " + "ext-buffer=<0|1>");