1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
10 * Interface to vhost-user
14 #include <sys/eventfd.h>
16 #include <rte_memory.h>
17 #include <rte_mempool.h>
23 /* These are not C++-aware. */
24 #include <linux/vhost.h>
25 #include <linux/virtio_ring.h>
27 #define RTE_VHOST_USER_CLIENT (1ULL << 0)
28 #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
29 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
30 #define RTE_VHOST_USER_IOMMU_SUPPORT (1ULL << 3)
31 #define RTE_VHOST_USER_POSTCOPY_SUPPORT (1ULL << 4)
33 /** Protocol features. */
34 #ifndef VHOST_USER_PROTOCOL_F_MQ
35 #define VHOST_USER_PROTOCOL_F_MQ 0
38 #ifndef VHOST_USER_PROTOCOL_F_LOG_SHMFD
39 #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
42 #ifndef VHOST_USER_PROTOCOL_F_RARP
43 #define VHOST_USER_PROTOCOL_F_RARP 2
46 #ifndef VHOST_USER_PROTOCOL_F_REPLY_ACK
47 #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
50 #ifndef VHOST_USER_PROTOCOL_F_NET_MTU
51 #define VHOST_USER_PROTOCOL_F_NET_MTU 4
54 #ifndef VHOST_USER_PROTOCOL_F_SLAVE_REQ
55 #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
58 #ifndef VHOST_USER_PROTOCOL_F_CRYPTO_SESSION
59 #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
62 #ifndef VHOST_USER_PROTOCOL_F_PAGEFAULT
63 #define VHOST_USER_PROTOCOL_F_PAGEFAULT 8
66 #ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD
67 #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
70 #ifndef VHOST_USER_PROTOCOL_F_HOST_NOTIFIER
71 #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
74 /** Indicate whether protocol features negotiation is supported. */
75 #ifndef VHOST_USER_F_PROTOCOL_FEATURES
76 #define VHOST_USER_F_PROTOCOL_FEATURES 30
80 * Information relating to memory regions including offsets to
81 * addresses in QEMUs memory file.
83 struct rte_vhost_mem_region {
84 uint64_t guest_phys_addr;
85 uint64_t guest_user_addr;
86 uint64_t host_user_addr;
94 * Memory structure includes region and mapping information.
96 struct rte_vhost_memory {
98 struct rte_vhost_mem_region regions[];
101 struct rte_vhost_vring {
102 struct vring_desc *desc;
103 struct vring_avail *avail;
104 struct vring_used *used;
105 uint64_t log_guest_addr;
107 /** Deprecated, use rte_vhost_vring_call() instead. */
115 * Possible results of the vhost user message handling callbacks
117 enum rte_vhost_msg_result {
118 /* Message handling failed */
119 RTE_VHOST_MSG_RESULT_ERR = -1,
120 /* Message handling successful */
121 RTE_VHOST_MSG_RESULT_OK = 0,
122 /* Message handling successful and reply prepared */
123 RTE_VHOST_MSG_RESULT_REPLY = 1,
127 * Function prototype for the vhost backend to handler specific vhost user
128 * messages prior to the master message handling
135 * If the handler requires skipping the master message handling, this variable
136 * shall be written 1, otherwise 0.
138 * VH_RESULT_OK on success, VH_RESULT_REPLY on success with reply,
139 * VH_RESULT_ERR on failure
141 typedef enum rte_vhost_msg_result (*rte_vhost_msg_pre_handle)(int vid,
142 void *msg, uint32_t *skip_master);
145 * Function prototype for the vhost backend to handler specific vhost user
146 * messages after the master message handling is done
153 * VH_RESULT_OK on success, VH_RESULT_REPLY on success with reply,
154 * VH_RESULT_ERR on failure
156 typedef enum rte_vhost_msg_result (*rte_vhost_msg_post_handle)(int vid,
160 * Optional vhost user message handlers.
162 struct rte_vhost_user_extern_ops {
163 rte_vhost_msg_pre_handle pre_msg_handle;
164 rte_vhost_msg_post_handle post_msg_handle;
168 * Device and vring operations.
170 struct vhost_device_ops {
171 int (*new_device)(int vid); /**< Add device. */
172 void (*destroy_device)(int vid); /**< Remove device. */
174 int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
177 * Features could be changed after the feature negotiation.
178 * For example, VHOST_F_LOG_ALL will be set/cleared at the
179 * start/end of live migration, respectively. This callback
180 * is used to inform the application on such change.
182 int (*features_changed)(int vid, uint64_t features);
184 int (*new_connection)(int vid);
185 void (*destroy_connection)(int vid);
187 void *reserved[2]; /**< Reserved for future extension */
191 * Convert guest physical address to host virtual address
193 * This function is deprecated because unsafe.
194 * New rte_vhost_va_from_guest_pa() should be used instead to ensure
195 * guest physical ranges are fully and contiguously mapped into
196 * process virtual address space.
199 * the guest memory regions
201 * the guest physical address for querying
203 * the host virtual address on success, 0 on failure
206 static __rte_always_inline uint64_t
207 rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa)
209 struct rte_vhost_mem_region *reg;
212 for (i = 0; i < mem->nregions; i++) {
213 reg = &mem->regions[i];
214 if (gpa >= reg->guest_phys_addr &&
215 gpa < reg->guest_phys_addr + reg->size) {
216 return gpa - reg->guest_phys_addr +
225 * Convert guest physical address to host virtual address safely
227 * This variant of rte_vhost_gpa_to_vva() takes care all the
228 * requested length is mapped and contiguous in process address
232 * the guest memory regions
234 * the guest physical address for querying
236 * the size of the requested area to map, updated with actual size mapped
238 * the host virtual address on success, 0 on failure
240 static __rte_always_inline uint64_t
241 rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem,
242 uint64_t gpa, uint64_t *len)
244 struct rte_vhost_mem_region *r;
247 for (i = 0; i < mem->nregions; i++) {
248 r = &mem->regions[i];
249 if (gpa >= r->guest_phys_addr &&
250 gpa < r->guest_phys_addr + r->size) {
252 if (unlikely(*len > r->guest_phys_addr + r->size - gpa))
253 *len = r->guest_phys_addr + r->size - gpa;
255 return gpa - r->guest_phys_addr +
264 #define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL))
267 * Log the memory write start with given address.
269 * This function only need be invoked when the live migration starts.
270 * Therefore, we won't need call it at all in the most of time. For
271 * making the performance impact be minimum, it's suggested to do a
272 * check before calling it:
274 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
275 * rte_vhost_log_write(vid, addr, len);
280 * the starting address for write
282 * the length to write
284 void rte_vhost_log_write(int vid, uint64_t addr, uint64_t len);
287 * Log the used ring update start at given offset.
289 * Same as rte_vhost_log_write, it's suggested to do a check before
292 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
293 * rte_vhost_log_used_vring(vid, vring_idx, offset, len);
300 * the offset inside the used ring
302 * the length to write
304 void rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
305 uint64_t offset, uint64_t len);
307 int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
310 * Register vhost driver. path could be different for multiple
313 int rte_vhost_driver_register(const char *path, uint64_t flags);
315 /* Unregister vhost driver. This is only meaningful to vhost user. */
316 int rte_vhost_driver_unregister(const char *path);
319 * Set the vdpa device id, enforce single connection per socket
322 * The vhost-user socket file path
326 * 0 on success, -1 on failure
328 int __rte_experimental
329 rte_vhost_driver_attach_vdpa_device(const char *path, int did);
332 * Unset the vdpa device id
335 * The vhost-user socket file path
337 * 0 on success, -1 on failure
339 int __rte_experimental
340 rte_vhost_driver_detach_vdpa_device(const char *path);
346 * The vhost-user socket file path
348 * Device id, -1 on failure
350 int __rte_experimental
351 rte_vhost_driver_get_vdpa_device_id(const char *path);
354 * Set the feature bits the vhost-user driver supports.
357 * The vhost-user socket file path
361 * 0 on success, -1 on failure
363 int rte_vhost_driver_set_features(const char *path, uint64_t features);
366 * Enable vhost-user driver features.
369 * - the param features should be a subset of the feature bits provided
370 * by rte_vhost_driver_set_features().
371 * - it must be invoked before vhost-user negotiation starts.
374 * The vhost-user socket file path
378 * 0 on success, -1 on failure
380 int rte_vhost_driver_enable_features(const char *path, uint64_t features);
383 * Disable vhost-user driver features.
385 * The two notes at rte_vhost_driver_enable_features() also apply here.
388 * The vhost-user socket file path
390 * Features to disable
392 * 0 on success, -1 on failure
394 int rte_vhost_driver_disable_features(const char *path, uint64_t features);
397 * Get the feature bits before feature negotiation.
400 * The vhost-user socket file path
402 * A pointer to store the queried feature bits
404 * 0 on success, -1 on failure
406 int rte_vhost_driver_get_features(const char *path, uint64_t *features);
409 * Set the protocol feature bits before feature negotiation.
412 * The vhost-user socket file path
413 * @param protocol_features
414 * Supported protocol features
416 * 0 on success, -1 on failure
418 int __rte_experimental
419 rte_vhost_driver_set_protocol_features(const char *path,
420 uint64_t protocol_features);
423 * Get the protocol feature bits before feature negotiation.
426 * The vhost-user socket file path
427 * @param protocol_features
428 * A pointer to store the queried protocol feature bits
430 * 0 on success, -1 on failure
432 int __rte_experimental
433 rte_vhost_driver_get_protocol_features(const char *path,
434 uint64_t *protocol_features);
437 * Get the queue number bits before feature negotiation.
440 * The vhost-user socket file path
442 * A pointer to store the queried queue number bits
444 * 0 on success, -1 on failure
446 int __rte_experimental
447 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num);
450 * Get the feature bits after negotiation
455 * A pointer to store the queried feature bits
457 * 0 on success, -1 on failure
459 int rte_vhost_get_negotiated_features(int vid, uint64_t *features);
461 /* Register callbacks. */
462 int rte_vhost_driver_callback_register(const char *path,
463 struct vhost_device_ops const * const ops);
467 * Start the vhost-user driver.
469 * This function triggers the vhost-user negotiation.
472 * The vhost-user socket file path
474 * 0 on success, -1 on failure
476 int rte_vhost_driver_start(const char *path);
479 * Get the MTU value of the device if set in QEMU.
482 * virtio-net device ID
484 * The variable to store the MTU value
488 * -EAGAIN: device not yet started
489 * -ENOTSUP: device does not support MTU feature
491 int rte_vhost_get_mtu(int vid, uint16_t *mtu);
494 * Get the numa node from which the virtio net device's memory
501 * The numa node, -1 on failure
503 int rte_vhost_get_numa_node(int vid);
507 * Get the number of queues the device supports.
509 * Note this function is deprecated, as it returns a queue pair number,
510 * which is vhost specific. Instead, rte_vhost_get_vring_num should
517 * The number of queues, 0 on failure
520 uint32_t rte_vhost_get_queue_num(int vid);
523 * Get the number of vrings the device supports.
529 * The number of vrings, 0 on failure
531 uint16_t rte_vhost_get_vring_num(int vid);
534 * Get the virtio net device's ifname, which is the vhost-user socket
540 * The buffer to stored the queried ifname
545 * 0 on success, -1 on failure
547 int rte_vhost_get_ifname(int vid, char *buf, size_t len);
550 * Get how many avail entries are left in the queue
558 * num of avail entires left
560 uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
565 * This function adds buffers to the virtio devices RX virtqueue. Buffers can
566 * be received from the physical port or from another virtual device. A packet
567 * count is returned to indicate the number of packets that were successfully
568 * added to the RX queue.
572 * virtio queue index in mq case
574 * array to contain packets to be enqueued
576 * packets num to be enqueued
578 * num of packets enqueued
580 uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
581 struct rte_mbuf **pkts, uint16_t count);
584 * This function gets guest buffers from the virtio device TX virtqueue,
585 * construct host mbufs, copies guest buffer content to host mbufs and
586 * store them in pkts to be processed.
590 * virtio queue index in mq case
592 * mbuf_pool where host mbuf is allocated.
594 * array to contain packets to be dequeued
596 * packets num to be dequeued
598 * num of packets dequeued
600 uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
601 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
604 * Get guest mem table: a list of memory regions.
606 * An rte_vhost_vhost_memory object will be allocated internaly, to hold the
607 * guest memory regions. Application should free it at destroy_device()
613 * To store the returned mem regions
615 * 0 on success, -1 on failure
617 int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
620 * Get guest vring info, including the vring address, vring size, etc.
627 * the structure to hold the requested vring info
629 * 0 on success, -1 on failure
631 int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
632 struct rte_vhost_vring *vring);
635 * Notify the guest that used descriptors have been added to the vring. This
636 * function acts as a memory barrier.
643 * 0 on success, -1 on failure
645 int rte_vhost_vring_call(int vid, uint16_t vring_idx);
648 * Get vhost RX queue avail count.
653 * virtio queue index in mq case
655 * num of desc available
657 uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
660 * Get log base and log size of the vhost device
669 * 0 on success, -1 on failure
671 int __rte_experimental
672 rte_vhost_get_log_base(int vid, uint64_t *log_base, uint64_t *log_size);
675 * Get last_avail/used_idx of the vhost virtqueue
681 * @param last_avail_idx
682 * vhost last_avail_idx to get
683 * @param last_used_idx
684 * vhost last_used_idx to get
686 * 0 on success, -1 on failure
688 int __rte_experimental
689 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
690 uint16_t *last_avail_idx, uint16_t *last_used_idx);
693 * Set last_avail/used_idx of the vhost virtqueue
699 * @param last_avail_idx
700 * last_avail_idx to set
701 * @param last_used_idx
702 * last_used_idx to set
704 * 0 on success, -1 on failure
706 int __rte_experimental
707 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
708 uint16_t last_avail_idx, uint16_t last_used_idx);
711 * Register external message handling callbacks
716 * virtio external callbacks to register
718 * additional context passed to the callbacks
720 * 0 on success, -1 on failure
722 int __rte_experimental
723 rte_vhost_extern_callback_register(int vid,
724 struct rte_vhost_user_extern_ops const * const ops, void *ctx);
727 * Get vdpa device id for vhost device.
734 int __rte_experimental
735 rte_vhost_get_vdpa_device_id(int vid);
741 #endif /* _RTE_VHOST_H_ */