X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_vhost%2Frte_vhost.h;h=c7b619ae0d29d03826cf56a03127047a623d2492;hb=966f89d998a20eddb45666f06dd42a3c3fc94574;hp=22d0db23db85ca246a90349b62149afc85309e3a;hpb=c0583d98a9153549383d236e5cc5464bbbe8dd89;p=dpdk.git diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h index 22d0db23db..c7b619ae0d 100644 --- a/lib/librte_vhost/rte_vhost.h +++ b/lib/librte_vhost/rte_vhost.h @@ -1,34 +1,5 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation */ #ifndef _RTE_VHOST_H_ @@ -39,6 +10,7 @@ * Interface to vhost-user */ +#include #include #include @@ -52,10 +24,67 @@ extern "C" { /* These are not C++-aware. */ #include #include +#include #define RTE_VHOST_USER_CLIENT (1ULL << 0) #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1) #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2) +#define RTE_VHOST_USER_IOMMU_SUPPORT (1ULL << 3) +#define RTE_VHOST_USER_POSTCOPY_SUPPORT (1ULL << 4) +/* support mbuf with external buffer attached */ +#define RTE_VHOST_USER_EXTBUF_SUPPORT (1ULL << 5) +/* support only linear buffers (no chained mbufs) */ +#define RTE_VHOST_USER_LINEARBUF_SUPPORT (1ULL << 6) + +/** Protocol features. */ +#ifndef VHOST_USER_PROTOCOL_F_MQ +#define VHOST_USER_PROTOCOL_F_MQ 0 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_LOG_SHMFD +#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_RARP +#define VHOST_USER_PROTOCOL_F_RARP 2 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_REPLY_ACK +#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_NET_MTU +#define VHOST_USER_PROTOCOL_F_NET_MTU 4 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_SLAVE_REQ +#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_CRYPTO_SESSION +#define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_PAGEFAULT +#define VHOST_USER_PROTOCOL_F_PAGEFAULT 8 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD +#define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_HOST_NOTIFIER +#define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12 +#endif + +/** Indicate whether protocol features negotiation is supported. */ +#ifndef VHOST_USER_F_PROTOCOL_FEATURES +#define VHOST_USER_F_PROTOCOL_FEATURES 30 +#endif /** * Information relating to memory regions including offsets to @@ -79,17 +108,130 @@ struct rte_vhost_memory { struct rte_vhost_mem_region regions[]; }; +struct rte_vhost_inflight_desc_split { + uint8_t inflight; + uint8_t padding[5]; + uint16_t next; + uint64_t counter; +}; + +struct rte_vhost_inflight_info_split { + uint64_t features; + uint16_t version; + uint16_t desc_num; + uint16_t last_inflight_io; + uint16_t used_idx; + struct rte_vhost_inflight_desc_split desc[0]; +}; + +struct rte_vhost_inflight_desc_packed { + uint8_t inflight; + uint8_t padding; + uint16_t next; + uint16_t last; + uint16_t num; + uint64_t counter; + uint16_t id; + uint16_t flags; + uint32_t len; + uint64_t addr; +}; + +struct rte_vhost_inflight_info_packed { + uint64_t features; + uint16_t version; + uint16_t desc_num; + uint16_t free_head; + uint16_t old_free_head; + uint16_t used_idx; + uint16_t old_used_idx; + uint8_t used_wrap_counter; + uint8_t old_used_wrap_counter; + uint8_t padding[7]; + struct rte_vhost_inflight_desc_packed desc[0]; +}; + +struct rte_vhost_resubmit_desc { + uint16_t index; + uint64_t counter; +}; + +struct rte_vhost_resubmit_info { + struct rte_vhost_resubmit_desc *resubmit_list; + uint16_t resubmit_num; +}; + +struct rte_vhost_ring_inflight { + union { + struct rte_vhost_inflight_info_split *inflight_split; + struct rte_vhost_inflight_info_packed *inflight_packed; + }; + + struct rte_vhost_resubmit_info *resubmit_inflight; +}; + struct rte_vhost_vring { - struct vring_desc *desc; - struct vring_avail *avail; - struct vring_used *used; + union { + struct vring_desc *desc; + struct vring_packed_desc *desc_packed; + }; + union { + struct vring_avail *avail; + struct vring_packed_desc_event *driver_event; + }; + union { + struct vring_used *used; + struct vring_packed_desc_event *device_event; + }; uint64_t log_guest_addr; + /** Deprecated, use rte_vhost_vring_call() instead. */ int callfd; + int kickfd; uint16_t size; }; +/** + * Possible results of the vhost user message handling callbacks + */ +enum rte_vhost_msg_result { + /* Message handling failed */ + RTE_VHOST_MSG_RESULT_ERR = -1, + /* Message handling successful */ + RTE_VHOST_MSG_RESULT_OK = 0, + /* Message handling successful and reply prepared */ + RTE_VHOST_MSG_RESULT_REPLY = 1, + /* Message not handled */ + RTE_VHOST_MSG_RESULT_NOT_HANDLED, +}; + +/** + * Function prototype for the vhost backend to handle specific vhost user + * messages. + * + * @param vid + * vhost device id + * @param msg + * Message pointer. + * @return + * RTE_VHOST_MSG_RESULT_OK on success, + * RTE_VHOST_MSG_RESULT_REPLY on success with reply, + * RTE_VHOST_MSG_RESULT_ERR on failure, + * RTE_VHOST_MSG_RESULT_NOT_HANDLED if message was not handled. + */ +typedef enum rte_vhost_msg_result (*rte_vhost_msg_handle)(int vid, void *msg); + +/** + * Optional vhost user message handlers. + */ +struct rte_vhost_user_extern_ops { + /* Called prior to the master message handling. */ + rte_vhost_msg_handle pre_msg_handle; + /* Called after the master message handling. */ + rte_vhost_msg_handle post_msg_handle; +}; + /** * Device and vring operations. */ @@ -107,12 +249,28 @@ struct vhost_device_ops { */ int (*features_changed)(int vid, uint64_t features); - void *reserved[4]; /**< Reserved for future extension */ + int (*new_connection)(int vid); + void (*destroy_connection)(int vid); + + /** + * This callback gets called each time a guest gets notified + * about waiting packets. This is the interrupt handling trough + * the eventfd_write(callfd), which can be used for counting these + * "slow" syscalls. + */ + void (*guest_notified)(int vid); + + void *reserved[1]; /**< Reserved for future extension */ }; /** * Convert guest physical address to host virtual address * + * This function is deprecated because unsafe. + * New rte_vhost_va_from_guest_pa() should be used instead to ensure + * guest physical ranges are fully and contiguously mapped into + * process virtual address space. + * * @param mem * the guest memory regions * @param gpa @@ -120,6 +278,7 @@ struct vhost_device_ops { * @return * the host virtual address on success, 0 on failure */ +__rte_deprecated static __rte_always_inline uint64_t rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa) { @@ -138,6 +297,47 @@ rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa) return 0; } +/** + * Convert guest physical address to host virtual address safely + * + * This variant of rte_vhost_gpa_to_vva() takes care all the + * requested length is mapped and contiguous in process address + * space. + * + * @param mem + * the guest memory regions + * @param gpa + * the guest physical address for querying + * @param len + * the size of the requested area to map, updated with actual size mapped + * @return + * the host virtual address on success, 0 on failure + */ +__rte_experimental +static __rte_always_inline uint64_t +rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem, + uint64_t gpa, uint64_t *len) +{ + struct rte_vhost_mem_region *r; + uint32_t i; + + for (i = 0; i < mem->nregions; i++) { + r = &mem->regions[i]; + if (gpa >= r->guest_phys_addr && + gpa < r->guest_phys_addr + r->size) { + + if (unlikely(*len > r->guest_phys_addr + r->size - gpa)) + *len = r->guest_phys_addr + r->size - gpa; + + return gpa - r->guest_phys_addr + + r->host_user_addr; + } + } + *len = 0; + + return 0; +} + #define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL)) /** @@ -154,7 +354,7 @@ rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa) * @param vid * vhost device ID * @param addr - * the starting address for write + * the starting address for write (in guest physical address space) * @param len * the length to write */ @@ -192,6 +392,44 @@ int rte_vhost_driver_register(const char *path, uint64_t flags); /* Unregister vhost driver. This is only meaningful to vhost user. */ int rte_vhost_driver_unregister(const char *path); +/** + * Set the vdpa device id, enforce single connection per socket + * + * @param path + * The vhost-user socket file path + * @param did + * Device id + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_driver_attach_vdpa_device(const char *path, int did); + +/** + * Unset the vdpa device id + * + * @param path + * The vhost-user socket file path + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_driver_detach_vdpa_device(const char *path); + +/** + * Get the device id + * + * @param path + * The vhost-user socket file path + * @return + * Device id, -1 on failure + */ +__rte_experimental +int +rte_vhost_driver_get_vdpa_device_id(const char *path); + /** * Set the feature bits the vhost-user driver supports. * @@ -247,6 +485,50 @@ int rte_vhost_driver_disable_features(const char *path, uint64_t features); */ int rte_vhost_driver_get_features(const char *path, uint64_t *features); +/** + * Set the protocol feature bits before feature negotiation. + * + * @param path + * The vhost-user socket file path + * @param protocol_features + * Supported protocol features + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_driver_set_protocol_features(const char *path, + uint64_t protocol_features); + +/** + * Get the protocol feature bits before feature negotiation. + * + * @param path + * The vhost-user socket file path + * @param protocol_features + * A pointer to store the queried protocol feature bits + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_driver_get_protocol_features(const char *path, + uint64_t *protocol_features); + +/** + * Get the queue number bits before feature negotiation. + * + * @param path + * The vhost-user socket file path + * @param queue_num + * A pointer to store the queried queue number bits + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num); + /** * Get the feature bits after negotiation * @@ -356,7 +638,7 @@ int rte_vhost_get_ifname(int vid, char *buf, size_t len); * virtio queue index * * @return - * num of avail entires left + * num of avail entries left */ uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id); @@ -365,7 +647,7 @@ struct rte_mempool; /** * This function adds buffers to the virtio devices RX virtqueue. Buffers can * be received from the physical port or from another virtual device. A packet - * count is returned to indicate the number of packets that were succesfully + * count is returned to indicate the number of packets that were successfully * added to the RX queue. * @param vid * vhost device ID @@ -404,7 +686,7 @@ uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, /** * Get guest mem table: a list of memory regions. * - * An rte_vhost_vhost_memory object will be allocated internaly, to hold the + * An rte_vhost_vhost_memory object will be allocated internally, to hold the * guest memory regions. Application should free it at destroy_device() * callback. * @@ -432,6 +714,284 @@ int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, struct rte_vhost_vring *vring); +/** + * Get guest inflight vring info, including inflight ring and resubmit list. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param vring + * the structure to hold the requested inflight vring info + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx, + struct rte_vhost_ring_inflight *vring); + +/** + * Set split inflight descriptor. + * + * This function save descriptors that has been comsumed in available + * ring + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t idx); + +/** + * Set packed inflight descriptor and get corresponding inflight entry + * + * This function save descriptors that has been comsumed + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param head + * head of descriptors + * @param last + * last of descriptors + * @param inflight_entry + * corresponding inflight entry + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx, + uint16_t head, uint16_t last, uint16_t *inflight_entry); + +/** + * Save the head of list that the last batch of used descriptors. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param idx + * descriptor entry index + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_set_last_inflight_io_split(int vid, + uint16_t vring_idx, uint16_t idx); + +/** + * Update the inflight free_head, used_idx and used_wrap_counter. + * + * This function will update status first before updating descriptors + * to used + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param head + * head of descriptors + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_set_last_inflight_io_packed(int vid, + uint16_t vring_idx, uint16_t head); + +/** + * Clear the split inflight status. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param last_used_idx + * last used idx of used ring + * @param idx + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, + uint16_t last_used_idx, uint16_t idx); + +/** + * Clear the packed inflight status. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @param head + * inflight entry index + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx, + uint16_t head); + +/** + * Notify the guest that used descriptors have been added to the vring. This + * function acts as a memory barrier. + * + * @param vid + * vhost device ID + * @param vring_idx + * vring index + * @return + * 0 on success, -1 on failure + */ +int rte_vhost_vring_call(int vid, uint16_t vring_idx); + +/** + * Get vhost RX queue avail count. + * + * @param vid + * vhost device ID + * @param qid + * virtio queue index in mq case + * @return + * num of desc available + */ +uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid); + +/** + * Get log base and log size of the vhost device + * + * @param vid + * vhost device ID + * @param log_base + * vhost log base + * @param log_size + * vhost log size + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_get_log_base(int vid, uint64_t *log_base, uint64_t *log_size); + +/** + * Get last_avail/used_idx of the vhost virtqueue + * + * @param vid + * vhost device ID + * @param queue_id + * vhost queue index + * @param last_avail_idx + * vhost last_avail_idx to get + * @param last_used_idx + * vhost last_used_idx to get + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_get_vring_base(int vid, uint16_t queue_id, + uint16_t *last_avail_idx, uint16_t *last_used_idx); + +/** + * Get last_avail/last_used of the vhost virtqueue + * + * This function is designed for the reconnection and it's specific for + * the packed ring as we can get the two parameters from the inflight + * queueregion + * + * @param vid + * vhost device ID + * @param queue_id + * vhost queue index + * @param last_avail_idx + * vhost last_avail_idx to get + * @param last_used_idx + * vhost last_used_idx to get + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_get_vring_base_from_inflight(int vid, + uint16_t queue_id, uint16_t *last_avail_idx, uint16_t *last_used_idx); + +/** + * Set last_avail/used_idx of the vhost virtqueue + * + * @param vid + * vhost device ID + * @param queue_id + * vhost queue index + * @param last_avail_idx + * last_avail_idx to set + * @param last_used_idx + * last_used_idx to set + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_set_vring_base(int vid, uint16_t queue_id, + uint16_t last_avail_idx, uint16_t last_used_idx); + +/** + * Register external message handling callbacks + * + * @param vid + * vhost device ID + * @param ops + * virtio external callbacks to register + * @param ctx + * additional context passed to the callbacks + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_extern_callback_register(int vid, + struct rte_vhost_user_extern_ops const * const ops, void *ctx); + +/** + * Get vdpa device id for vhost device. + * + * @param vid + * vhost device id + * @return + * device id + */ +__rte_experimental +int +rte_vhost_get_vdpa_device_id(int vid); + +/** + * Notify the guest that should get virtio configuration space from backend. + * + * @param vid + * vhost device ID + * @param need_reply + * wait for the master response the status of this operation + * @return + * 0 on success, < 0 on failure + */ +__rte_experimental +int +rte_vhost_slave_config_change(int vid, bool need_reply); + #ifdef __cplusplus } #endif