1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
10 * Interface to vhost-user
14 #include <sys/eventfd.h>
16 #include <rte_memory.h>
17 #include <rte_mempool.h>
23 /* These are not C++-aware. */
24 #include <linux/vhost.h>
25 #include <linux/virtio_ring.h>
26 #include <linux/virtio_net.h>
28 #define RTE_VHOST_USER_CLIENT (1ULL << 0)
29 #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
30 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
31 #define RTE_VHOST_USER_IOMMU_SUPPORT (1ULL << 3)
32 #define RTE_VHOST_USER_POSTCOPY_SUPPORT (1ULL << 4)
34 /** Protocol features. */
35 #ifndef VHOST_USER_PROTOCOL_F_MQ
36 #define VHOST_USER_PROTOCOL_F_MQ 0
39 #ifndef VHOST_USER_PROTOCOL_F_LOG_SHMFD
40 #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
43 #ifndef VHOST_USER_PROTOCOL_F_RARP
44 #define VHOST_USER_PROTOCOL_F_RARP 2
47 #ifndef VHOST_USER_PROTOCOL_F_REPLY_ACK
48 #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
51 #ifndef VHOST_USER_PROTOCOL_F_NET_MTU
52 #define VHOST_USER_PROTOCOL_F_NET_MTU 4
55 #ifndef VHOST_USER_PROTOCOL_F_SLAVE_REQ
56 #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
59 #ifndef VHOST_USER_PROTOCOL_F_CRYPTO_SESSION
60 #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
63 #ifndef VHOST_USER_PROTOCOL_F_PAGEFAULT
64 #define VHOST_USER_PROTOCOL_F_PAGEFAULT 8
67 #ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD
68 #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
71 #ifndef VHOST_USER_PROTOCOL_F_HOST_NOTIFIER
72 #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
75 /** Indicate whether protocol features negotiation is supported. */
76 #ifndef VHOST_USER_F_PROTOCOL_FEATURES
77 #define VHOST_USER_F_PROTOCOL_FEATURES 30
81 * Information relating to memory regions including offsets to
82 * addresses in QEMUs memory file.
84 struct rte_vhost_mem_region {
85 uint64_t guest_phys_addr;
86 uint64_t guest_user_addr;
87 uint64_t host_user_addr;
95 * Memory structure includes region and mapping information.
97 struct rte_vhost_memory {
99 struct rte_vhost_mem_region regions[];
102 struct rte_vhost_vring {
103 struct vring_desc *desc;
104 struct vring_avail *avail;
105 struct vring_used *used;
106 uint64_t log_guest_addr;
108 /** Deprecated, use rte_vhost_vring_call() instead. */
116 * Possible results of the vhost user message handling callbacks
118 enum rte_vhost_msg_result {
119 /* Message handling failed */
120 RTE_VHOST_MSG_RESULT_ERR = -1,
121 /* Message handling successful */
122 RTE_VHOST_MSG_RESULT_OK = 0,
123 /* Message handling successful and reply prepared */
124 RTE_VHOST_MSG_RESULT_REPLY = 1,
125 /* Message not handled */
126 RTE_VHOST_MSG_RESULT_NOT_HANDLED,
130 * Function prototype for the vhost backend to handle specific vhost user
138 * RTE_VHOST_MSG_RESULT_OK on success,
139 * RTE_VHOST_MSG_RESULT_REPLY on success with reply,
140 * RTE_VHOST_MSG_RESULT_ERR on failure,
141 * RTE_VHOST_MSG_RESULT_NOT_HANDLED if message was not handled.
143 typedef enum rte_vhost_msg_result (*rte_vhost_msg_handle)(int vid, void *msg);
146 * Optional vhost user message handlers.
148 struct rte_vhost_user_extern_ops {
149 /* Called prior to the master message handling. */
150 rte_vhost_msg_handle pre_msg_handle;
151 /* Called after the master message handling. */
152 rte_vhost_msg_handle post_msg_handle;
156 * Device and vring operations.
158 struct vhost_device_ops {
159 int (*new_device)(int vid); /**< Add device. */
160 void (*destroy_device)(int vid); /**< Remove device. */
162 int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
165 * Features could be changed after the feature negotiation.
166 * For example, VHOST_F_LOG_ALL will be set/cleared at the
167 * start/end of live migration, respectively. This callback
168 * is used to inform the application on such change.
170 int (*features_changed)(int vid, uint64_t features);
172 int (*new_connection)(int vid);
173 void (*destroy_connection)(int vid);
175 void *reserved[2]; /**< Reserved for future extension */
179 * Convert guest physical address to host virtual address
181 * This function is deprecated because unsafe.
182 * New rte_vhost_va_from_guest_pa() should be used instead to ensure
183 * guest physical ranges are fully and contiguously mapped into
184 * process virtual address space.
187 * the guest memory regions
189 * the guest physical address for querying
191 * the host virtual address on success, 0 on failure
194 static __rte_always_inline uint64_t
195 rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa)
197 struct rte_vhost_mem_region *reg;
200 for (i = 0; i < mem->nregions; i++) {
201 reg = &mem->regions[i];
202 if (gpa >= reg->guest_phys_addr &&
203 gpa < reg->guest_phys_addr + reg->size) {
204 return gpa - reg->guest_phys_addr +
213 * Convert guest physical address to host virtual address safely
215 * This variant of rte_vhost_gpa_to_vva() takes care all the
216 * requested length is mapped and contiguous in process address
220 * the guest memory regions
222 * the guest physical address for querying
224 * the size of the requested area to map, updated with actual size mapped
226 * the host virtual address on success, 0 on failure
228 static __rte_always_inline uint64_t
229 rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem,
230 uint64_t gpa, uint64_t *len)
232 struct rte_vhost_mem_region *r;
235 for (i = 0; i < mem->nregions; i++) {
236 r = &mem->regions[i];
237 if (gpa >= r->guest_phys_addr &&
238 gpa < r->guest_phys_addr + r->size) {
240 if (unlikely(*len > r->guest_phys_addr + r->size - gpa))
241 *len = r->guest_phys_addr + r->size - gpa;
243 return gpa - r->guest_phys_addr +
252 #define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL))
255 * Log the memory write start with given address.
257 * This function only need be invoked when the live migration starts.
258 * Therefore, we won't need call it at all in the most of time. For
259 * making the performance impact be minimum, it's suggested to do a
260 * check before calling it:
262 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
263 * rte_vhost_log_write(vid, addr, len);
268 * the starting address for write
270 * the length to write
272 void rte_vhost_log_write(int vid, uint64_t addr, uint64_t len);
275 * Log the used ring update start at given offset.
277 * Same as rte_vhost_log_write, it's suggested to do a check before
280 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
281 * rte_vhost_log_used_vring(vid, vring_idx, offset, len);
288 * the offset inside the used ring
290 * the length to write
292 void rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
293 uint64_t offset, uint64_t len);
295 int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
298 * Register vhost driver. path could be different for multiple
301 int rte_vhost_driver_register(const char *path, uint64_t flags);
303 /* Unregister vhost driver. This is only meaningful to vhost user. */
304 int rte_vhost_driver_unregister(const char *path);
307 * Set the vdpa device id, enforce single connection per socket
310 * The vhost-user socket file path
314 * 0 on success, -1 on failure
316 int __rte_experimental
317 rte_vhost_driver_attach_vdpa_device(const char *path, int did);
320 * Unset the vdpa device id
323 * The vhost-user socket file path
325 * 0 on success, -1 on failure
327 int __rte_experimental
328 rte_vhost_driver_detach_vdpa_device(const char *path);
334 * The vhost-user socket file path
336 * Device id, -1 on failure
338 int __rte_experimental
339 rte_vhost_driver_get_vdpa_device_id(const char *path);
342 * Set the feature bits the vhost-user driver supports.
345 * The vhost-user socket file path
349 * 0 on success, -1 on failure
351 int rte_vhost_driver_set_features(const char *path, uint64_t features);
354 * Enable vhost-user driver features.
357 * - the param features should be a subset of the feature bits provided
358 * by rte_vhost_driver_set_features().
359 * - it must be invoked before vhost-user negotiation starts.
362 * The vhost-user socket file path
366 * 0 on success, -1 on failure
368 int rte_vhost_driver_enable_features(const char *path, uint64_t features);
371 * Disable vhost-user driver features.
373 * The two notes at rte_vhost_driver_enable_features() also apply here.
376 * The vhost-user socket file path
378 * Features to disable
380 * 0 on success, -1 on failure
382 int rte_vhost_driver_disable_features(const char *path, uint64_t features);
385 * Get the feature bits before feature negotiation.
388 * The vhost-user socket file path
390 * A pointer to store the queried feature bits
392 * 0 on success, -1 on failure
394 int rte_vhost_driver_get_features(const char *path, uint64_t *features);
397 * Set the protocol feature bits before feature negotiation.
400 * The vhost-user socket file path
401 * @param protocol_features
402 * Supported protocol features
404 * 0 on success, -1 on failure
406 int __rte_experimental
407 rte_vhost_driver_set_protocol_features(const char *path,
408 uint64_t protocol_features);
411 * Get the protocol feature bits before feature negotiation.
414 * The vhost-user socket file path
415 * @param protocol_features
416 * A pointer to store the queried protocol feature bits
418 * 0 on success, -1 on failure
420 int __rte_experimental
421 rte_vhost_driver_get_protocol_features(const char *path,
422 uint64_t *protocol_features);
425 * Get the queue number bits before feature negotiation.
428 * The vhost-user socket file path
430 * A pointer to store the queried queue number bits
432 * 0 on success, -1 on failure
434 int __rte_experimental
435 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num);
438 * Get the feature bits after negotiation
443 * A pointer to store the queried feature bits
445 * 0 on success, -1 on failure
447 int rte_vhost_get_negotiated_features(int vid, uint64_t *features);
449 /* Register callbacks. */
450 int rte_vhost_driver_callback_register(const char *path,
451 struct vhost_device_ops const * const ops);
455 * Start the vhost-user driver.
457 * This function triggers the vhost-user negotiation.
460 * The vhost-user socket file path
462 * 0 on success, -1 on failure
464 int rte_vhost_driver_start(const char *path);
467 * Get the MTU value of the device if set in QEMU.
470 * virtio-net device ID
472 * The variable to store the MTU value
476 * -EAGAIN: device not yet started
477 * -ENOTSUP: device does not support MTU feature
479 int rte_vhost_get_mtu(int vid, uint16_t *mtu);
482 * Get the numa node from which the virtio net device's memory
489 * The numa node, -1 on failure
491 int rte_vhost_get_numa_node(int vid);
495 * Get the number of queues the device supports.
497 * Note this function is deprecated, as it returns a queue pair number,
498 * which is vhost specific. Instead, rte_vhost_get_vring_num should
505 * The number of queues, 0 on failure
508 uint32_t rte_vhost_get_queue_num(int vid);
511 * Get the number of vrings the device supports.
517 * The number of vrings, 0 on failure
519 uint16_t rte_vhost_get_vring_num(int vid);
522 * Get the virtio net device's ifname, which is the vhost-user socket
528 * The buffer to stored the queried ifname
533 * 0 on success, -1 on failure
535 int rte_vhost_get_ifname(int vid, char *buf, size_t len);
538 * Get how many avail entries are left in the queue
546 * num of avail entries left
548 uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
553 * This function adds buffers to the virtio devices RX virtqueue. Buffers can
554 * be received from the physical port or from another virtual device. A packet
555 * count is returned to indicate the number of packets that were successfully
556 * added to the RX queue.
560 * virtio queue index in mq case
562 * array to contain packets to be enqueued
564 * packets num to be enqueued
566 * num of packets enqueued
568 uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
569 struct rte_mbuf **pkts, uint16_t count);
572 * This function gets guest buffers from the virtio device TX virtqueue,
573 * construct host mbufs, copies guest buffer content to host mbufs and
574 * store them in pkts to be processed.
578 * virtio queue index in mq case
580 * mbuf_pool where host mbuf is allocated.
582 * array to contain packets to be dequeued
584 * packets num to be dequeued
586 * num of packets dequeued
588 uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
589 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
592 * Get guest mem table: a list of memory regions.
594 * An rte_vhost_vhost_memory object will be allocated internally, to hold the
595 * guest memory regions. Application should free it at destroy_device()
601 * To store the returned mem regions
603 * 0 on success, -1 on failure
605 int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
608 * Get guest vring info, including the vring address, vring size, etc.
615 * the structure to hold the requested vring info
617 * 0 on success, -1 on failure
619 int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
620 struct rte_vhost_vring *vring);
623 * Notify the guest that used descriptors have been added to the vring. This
624 * function acts as a memory barrier.
631 * 0 on success, -1 on failure
633 int rte_vhost_vring_call(int vid, uint16_t vring_idx);
636 * Get vhost RX queue avail count.
641 * virtio queue index in mq case
643 * num of desc available
645 uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
648 * Get log base and log size of the vhost device
657 * 0 on success, -1 on failure
659 int __rte_experimental
660 rte_vhost_get_log_base(int vid, uint64_t *log_base, uint64_t *log_size);
663 * Get last_avail/used_idx of the vhost virtqueue
669 * @param last_avail_idx
670 * vhost last_avail_idx to get
671 * @param last_used_idx
672 * vhost last_used_idx to get
674 * 0 on success, -1 on failure
676 int __rte_experimental
677 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
678 uint16_t *last_avail_idx, uint16_t *last_used_idx);
681 * Set last_avail/used_idx of the vhost virtqueue
687 * @param last_avail_idx
688 * last_avail_idx to set
689 * @param last_used_idx
690 * last_used_idx to set
692 * 0 on success, -1 on failure
694 int __rte_experimental
695 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
696 uint16_t last_avail_idx, uint16_t last_used_idx);
699 * Register external message handling callbacks
704 * virtio external callbacks to register
706 * additional context passed to the callbacks
708 * 0 on success, -1 on failure
710 int __rte_experimental
711 rte_vhost_extern_callback_register(int vid,
712 struct rte_vhost_user_extern_ops const * const ops, void *ctx);
715 * Get vdpa device id for vhost device.
722 int __rte_experimental
723 rte_vhost_get_vdpa_device_id(int vid);
729 #endif /* _RTE_VHOST_H_ */