* Interface to vhost-user
*/
+#include <stdbool.h>
#include <stdint.h>
#include <sys/eventfd.h>
/* These are not C++-aware. */
#include <linux/vhost.h>
#include <linux/virtio_ring.h>
+#include <linux/virtio_net.h>
#define RTE_VHOST_USER_CLIENT (1ULL << 0)
#define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
#define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
#define RTE_VHOST_USER_IOMMU_SUPPORT (1ULL << 3)
+#define RTE_VHOST_USER_POSTCOPY_SUPPORT (1ULL << 4)
+/* support mbuf with external buffer attached */
+#define RTE_VHOST_USER_EXTBUF_SUPPORT (1ULL << 5)
+/* support only linear buffers (no chained mbufs) */
+#define RTE_VHOST_USER_LINEARBUF_SUPPORT (1ULL << 6)
/** Protocol features. */
#ifndef VHOST_USER_PROTOCOL_F_MQ
#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
#endif
+#ifndef VHOST_USER_PROTOCOL_F_CRYPTO_SESSION
+#define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_PAGEFAULT
+#define VHOST_USER_PROTOCOL_F_PAGEFAULT 8
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD
+#define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_HOST_NOTIFIER
+#define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD
+#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
+#endif
+
/** Indicate whether protocol features negotiation is supported. */
#ifndef VHOST_USER_F_PROTOCOL_FEATURES
#define VHOST_USER_F_PROTOCOL_FEATURES 30
struct rte_vhost_mem_region regions[];
};
+struct rte_vhost_inflight_desc_split {
+ uint8_t inflight;
+ uint8_t padding[5];
+ uint16_t next;
+ uint64_t counter;
+};
+
+struct rte_vhost_inflight_info_split {
+ uint64_t features;
+ uint16_t version;
+ uint16_t desc_num;
+ uint16_t last_inflight_io;
+ uint16_t used_idx;
+ struct rte_vhost_inflight_desc_split desc[0];
+};
+
+struct rte_vhost_inflight_desc_packed {
+ uint8_t inflight;
+ uint8_t padding;
+ uint16_t next;
+ uint16_t last;
+ uint16_t num;
+ uint64_t counter;
+ uint16_t id;
+ uint16_t flags;
+ uint32_t len;
+ uint64_t addr;
+};
+
+struct rte_vhost_inflight_info_packed {
+ uint64_t features;
+ uint16_t version;
+ uint16_t desc_num;
+ uint16_t free_head;
+ uint16_t old_free_head;
+ uint16_t used_idx;
+ uint16_t old_used_idx;
+ uint8_t used_wrap_counter;
+ uint8_t old_used_wrap_counter;
+ uint8_t padding[7];
+ struct rte_vhost_inflight_desc_packed desc[0];
+};
+
+struct rte_vhost_resubmit_desc {
+ uint16_t index;
+ uint64_t counter;
+};
+
+struct rte_vhost_resubmit_info {
+ struct rte_vhost_resubmit_desc *resubmit_list;
+ uint16_t resubmit_num;
+};
+
+struct rte_vhost_ring_inflight {
+ union {
+ struct rte_vhost_inflight_info_split *inflight_split;
+ struct rte_vhost_inflight_info_packed *inflight_packed;
+ };
+
+ struct rte_vhost_resubmit_info *resubmit_inflight;
+};
+
struct rte_vhost_vring {
- struct vring_desc *desc;
- struct vring_avail *avail;
- struct vring_used *used;
+ union {
+ struct vring_desc *desc;
+ struct vring_packed_desc *desc_packed;
+ };
+ union {
+ struct vring_avail *avail;
+ struct vring_packed_desc_event *driver_event;
+ };
+ union {
+ struct vring_used *used;
+ struct vring_packed_desc_event *device_event;
+ };
uint64_t log_guest_addr;
/** Deprecated, use rte_vhost_vring_call() instead. */
uint16_t size;
};
+/**
+ * Possible results of the vhost user message handling callbacks
+ */
+enum rte_vhost_msg_result {
+ /* Message handling failed */
+ RTE_VHOST_MSG_RESULT_ERR = -1,
+ /* Message handling successful */
+ RTE_VHOST_MSG_RESULT_OK = 0,
+ /* Message handling successful and reply prepared */
+ RTE_VHOST_MSG_RESULT_REPLY = 1,
+ /* Message not handled */
+ RTE_VHOST_MSG_RESULT_NOT_HANDLED,
+};
+
+/**
+ * Function prototype for the vhost backend to handle specific vhost user
+ * messages.
+ *
+ * @param vid
+ * vhost device id
+ * @param msg
+ * Message pointer.
+ * @return
+ * RTE_VHOST_MSG_RESULT_OK on success,
+ * RTE_VHOST_MSG_RESULT_REPLY on success with reply,
+ * RTE_VHOST_MSG_RESULT_ERR on failure,
+ * RTE_VHOST_MSG_RESULT_NOT_HANDLED if message was not handled.
+ */
+typedef enum rte_vhost_msg_result (*rte_vhost_msg_handle)(int vid, void *msg);
+
+/**
+ * Optional vhost user message handlers.
+ */
+struct rte_vhost_user_extern_ops {
+ /* Called prior to the master message handling. */
+ rte_vhost_msg_handle pre_msg_handle;
+ /* Called after the master message handling. */
+ rte_vhost_msg_handle post_msg_handle;
+};
+
/**
* Device and vring operations.
*/
int (*new_connection)(int vid);
void (*destroy_connection)(int vid);
- void *reserved[2]; /**< Reserved for future extension */
+ /**
+ * This callback gets called each time a guest gets notified
+ * about waiting packets. This is the interrupt handling trough
+ * the eventfd_write(callfd), which can be used for counting these
+ * "slow" syscalls.
+ */
+ void (*guest_notified)(int vid);
+
+ void *reserved[1]; /**< Reserved for future extension */
};
/**
* Convert guest physical address to host virtual address
*
+ * This function is deprecated because unsafe.
+ * New rte_vhost_va_from_guest_pa() should be used instead to ensure
+ * guest physical ranges are fully and contiguously mapped into
+ * process virtual address space.
+ *
* @param mem
* the guest memory regions
* @param gpa
* @return
* the host virtual address on success, 0 on failure
*/
+__rte_deprecated
static __rte_always_inline uint64_t
rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa)
{
return 0;
}
+/**
+ * Convert guest physical address to host virtual address safely
+ *
+ * This variant of rte_vhost_gpa_to_vva() takes care all the
+ * requested length is mapped and contiguous in process address
+ * space.
+ *
+ * @param mem
+ * the guest memory regions
+ * @param gpa
+ * the guest physical address for querying
+ * @param len
+ * the size of the requested area to map, updated with actual size mapped
+ * @return
+ * the host virtual address on success, 0 on failure
+ */
+__rte_experimental
+static __rte_always_inline uint64_t
+rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem,
+ uint64_t gpa, uint64_t *len)
+{
+ struct rte_vhost_mem_region *r;
+ uint32_t i;
+
+ for (i = 0; i < mem->nregions; i++) {
+ r = &mem->regions[i];
+ if (gpa >= r->guest_phys_addr &&
+ gpa < r->guest_phys_addr + r->size) {
+
+ if (unlikely(*len > r->guest_phys_addr + r->size - gpa))
+ *len = r->guest_phys_addr + r->size - gpa;
+
+ return gpa - r->guest_phys_addr +
+ r->host_user_addr;
+ }
+ }
+ *len = 0;
+
+ return 0;
+}
+
#define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL))
/**
* @param vid
* vhost device ID
* @param addr
- * the starting address for write
+ * the starting address for write (in guest physical address space)
* @param len
* the length to write
*/
/* Unregister vhost driver. This is only meaningful to vhost user. */
int rte_vhost_driver_unregister(const char *path);
+/**
+ * Set the vdpa device id, enforce single connection per socket
+ *
+ * @param path
+ * The vhost-user socket file path
+ * @param did
+ * Device id
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_driver_attach_vdpa_device(const char *path, int did);
+
+/**
+ * Unset the vdpa device id
+ *
+ * @param path
+ * The vhost-user socket file path
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_driver_detach_vdpa_device(const char *path);
+
+/**
+ * Get the device id
+ *
+ * @param path
+ * The vhost-user socket file path
+ * @return
+ * Device id, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_driver_get_vdpa_device_id(const char *path);
+
/**
* Set the feature bits the vhost-user driver supports.
*
*/
int rte_vhost_driver_get_features(const char *path, uint64_t *features);
+/**
+ * Set the protocol feature bits before feature negotiation.
+ *
+ * @param path
+ * The vhost-user socket file path
+ * @param protocol_features
+ * Supported protocol features
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_driver_set_protocol_features(const char *path,
+ uint64_t protocol_features);
+
+/**
+ * Get the protocol feature bits before feature negotiation.
+ *
+ * @param path
+ * The vhost-user socket file path
+ * @param protocol_features
+ * A pointer to store the queried protocol feature bits
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_driver_get_protocol_features(const char *path,
+ uint64_t *protocol_features);
+
+/**
+ * Get the queue number bits before feature negotiation.
+ *
+ * @param path
+ * The vhost-user socket file path
+ * @param queue_num
+ * A pointer to store the queried queue number bits
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num);
+
/**
* Get the feature bits after negotiation
*
* virtio queue index
*
* @return
- * num of avail entires left
+ * num of avail entries left
*/
uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
/**
* Get guest mem table: a list of memory regions.
*
- * An rte_vhost_vhost_memory object will be allocated internaly, to hold the
+ * An rte_vhost_vhost_memory object will be allocated internally, to hold the
* guest memory regions. Application should free it at destroy_device()
* callback.
*
int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
struct rte_vhost_vring *vring);
+/**
+ * Get guest inflight vring info, including inflight ring and resubmit list.
+ *
+ * @param vid
+ * vhost device ID
+ * @param vring_idx
+ * vring index
+ * @param vring
+ * the structure to hold the requested inflight vring info
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
+ struct rte_vhost_ring_inflight *vring);
+
+/**
+ * Set split inflight descriptor.
+ *
+ * This function save descriptors that has been comsumed in available
+ * ring
+ *
+ * @param vid
+ * vhost device ID
+ * @param vring_idx
+ * vring index
+ * @param idx
+ * inflight entry index
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
+ uint16_t idx);
+
+/**
+ * Set packed inflight descriptor and get corresponding inflight entry
+ *
+ * This function save descriptors that has been comsumed
+ *
+ * @param vid
+ * vhost device ID
+ * @param vring_idx
+ * vring index
+ * @param head
+ * head of descriptors
+ * @param last
+ * last of descriptors
+ * @param inflight_entry
+ * corresponding inflight entry
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
+ uint16_t head, uint16_t last, uint16_t *inflight_entry);
+
+/**
+ * Save the head of list that the last batch of used descriptors.
+ *
+ * @param vid
+ * vhost device ID
+ * @param vring_idx
+ * vring index
+ * @param idx
+ * descriptor entry index
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_set_last_inflight_io_split(int vid,
+ uint16_t vring_idx, uint16_t idx);
+
+/**
+ * Update the inflight free_head, used_idx and used_wrap_counter.
+ *
+ * This function will update status first before updating descriptors
+ * to used
+ *
+ * @param vid
+ * vhost device ID
+ * @param vring_idx
+ * vring index
+ * @param head
+ * head of descriptors
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_set_last_inflight_io_packed(int vid,
+ uint16_t vring_idx, uint16_t head);
+
+/**
+ * Clear the split inflight status.
+ *
+ * @param vid
+ * vhost device ID
+ * @param vring_idx
+ * vring index
+ * @param last_used_idx
+ * last used idx of used ring
+ * @param idx
+ * inflight entry index
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
+ uint16_t last_used_idx, uint16_t idx);
+
+/**
+ * Clear the packed inflight status.
+ *
+ * @param vid
+ * vhost device ID
+ * @param vring_idx
+ * vring index
+ * @param head
+ * inflight entry index
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
+ uint16_t head);
+
/**
* Notify the guest that used descriptors have been added to the vring. This
* function acts as a memory barrier.
*/
uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
+/**
+ * Get log base and log size of the vhost device
+ *
+ * @param vid
+ * vhost device ID
+ * @param log_base
+ * vhost log base
+ * @param log_size
+ * vhost log size
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_get_log_base(int vid, uint64_t *log_base, uint64_t *log_size);
+
+/**
+ * Get last_avail/used_idx of the vhost virtqueue
+ *
+ * @param vid
+ * vhost device ID
+ * @param queue_id
+ * vhost queue index
+ * @param last_avail_idx
+ * vhost last_avail_idx to get
+ * @param last_used_idx
+ * vhost last_used_idx to get
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_get_vring_base(int vid, uint16_t queue_id,
+ uint16_t *last_avail_idx, uint16_t *last_used_idx);
+
+/**
+ * Get last_avail/last_used of the vhost virtqueue
+ *
+ * This function is designed for the reconnection and it's specific for
+ * the packed ring as we can get the two parameters from the inflight
+ * queueregion
+ *
+ * @param vid
+ * vhost device ID
+ * @param queue_id
+ * vhost queue index
+ * @param last_avail_idx
+ * vhost last_avail_idx to get
+ * @param last_used_idx
+ * vhost last_used_idx to get
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_get_vring_base_from_inflight(int vid,
+ uint16_t queue_id, uint16_t *last_avail_idx, uint16_t *last_used_idx);
+
+/**
+ * Set last_avail/used_idx of the vhost virtqueue
+ *
+ * @param vid
+ * vhost device ID
+ * @param queue_id
+ * vhost queue index
+ * @param last_avail_idx
+ * last_avail_idx to set
+ * @param last_used_idx
+ * last_used_idx to set
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_set_vring_base(int vid, uint16_t queue_id,
+ uint16_t last_avail_idx, uint16_t last_used_idx);
+
+/**
+ * Register external message handling callbacks
+ *
+ * @param vid
+ * vhost device ID
+ * @param ops
+ * virtio external callbacks to register
+ * @param ctx
+ * additional context passed to the callbacks
+ * @return
+ * 0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_extern_callback_register(int vid,
+ struct rte_vhost_user_extern_ops const * const ops, void *ctx);
+
+/**
+ * Get vdpa device id for vhost device.
+ *
+ * @param vid
+ * vhost device id
+ * @return
+ * device id
+ */
+__rte_experimental
+int
+rte_vhost_get_vdpa_device_id(int vid);
+
+/**
+ * Notify the guest that should get virtio configuration space from backend.
+ *
+ * @param vid
+ * vhost device ID
+ * @param need_reply
+ * wait for the master response the status of this operation
+ * @return
+ * 0 on success, < 0 on failure
+ */
+__rte_experimental
+int
+rte_vhost_slave_config_change(int vid, bool need_reply);
+
#ifdef __cplusplus
}
#endif