1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
10 * Interface to vhost-user
14 #include <sys/eventfd.h>
16 #include <rte_memory.h>
17 #include <rte_mempool.h>
23 /* These are not C++-aware. */
24 #include <linux/vhost.h>
25 #include <linux/virtio_ring.h>
27 #define RTE_VHOST_USER_CLIENT (1ULL << 0)
28 #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
29 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
30 #define RTE_VHOST_USER_IOMMU_SUPPORT (1ULL << 3)
32 /** Protocol features. */
33 #ifndef VHOST_USER_PROTOCOL_F_MQ
34 #define VHOST_USER_PROTOCOL_F_MQ 0
37 #ifndef VHOST_USER_PROTOCOL_F_LOG_SHMFD
38 #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
41 #ifndef VHOST_USER_PROTOCOL_F_RARP
42 #define VHOST_USER_PROTOCOL_F_RARP 2
45 #ifndef VHOST_USER_PROTOCOL_F_REPLY_ACK
46 #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
49 #ifndef VHOST_USER_PROTOCOL_F_NET_MTU
50 #define VHOST_USER_PROTOCOL_F_NET_MTU 4
53 #ifndef VHOST_USER_PROTOCOL_F_SLAVE_REQ
54 #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
57 #ifndef VHOST_USER_PROTOCOL_F_CRYPTO_SESSION
58 #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
61 #ifndef VHOST_USER_PROTOCOL_F_PAGEFAULT
62 #define VHOST_USER_PROTOCOL_F_PAGEFAULT 8
65 #ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD
66 #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
69 #ifndef VHOST_USER_PROTOCOL_F_HOST_NOTIFIER
70 #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
73 /** Indicate whether protocol features negotiation is supported. */
74 #ifndef VHOST_USER_F_PROTOCOL_FEATURES
75 #define VHOST_USER_F_PROTOCOL_FEATURES 30
79 * Information relating to memory regions including offsets to
80 * addresses in QEMUs memory file.
82 struct rte_vhost_mem_region {
83 uint64_t guest_phys_addr;
84 uint64_t guest_user_addr;
85 uint64_t host_user_addr;
93 * Memory structure includes region and mapping information.
95 struct rte_vhost_memory {
97 struct rte_vhost_mem_region regions[];
100 struct rte_vhost_vring {
101 struct vring_desc *desc;
102 struct vring_avail *avail;
103 struct vring_used *used;
104 uint64_t log_guest_addr;
106 /** Deprecated, use rte_vhost_vring_call() instead. */
114 * Device and vring operations.
116 struct vhost_device_ops {
117 int (*new_device)(int vid); /**< Add device. */
118 void (*destroy_device)(int vid); /**< Remove device. */
120 int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
123 * Features could be changed after the feature negotiation.
124 * For example, VHOST_F_LOG_ALL will be set/cleared at the
125 * start/end of live migration, respectively. This callback
126 * is used to inform the application on such change.
128 int (*features_changed)(int vid, uint64_t features);
130 int (*new_connection)(int vid);
131 void (*destroy_connection)(int vid);
133 void *reserved[2]; /**< Reserved for future extension */
137 * Convert guest physical address to host virtual address
139 * This function is deprecated because unsafe.
140 * New rte_vhost_va_from_guest_pa() should be used instead to ensure
141 * guest physical ranges are fully and contiguously mapped into
142 * process virtual address space.
145 * the guest memory regions
147 * the guest physical address for querying
149 * the host virtual address on success, 0 on failure
152 static __rte_always_inline uint64_t
153 rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa)
155 struct rte_vhost_mem_region *reg;
158 for (i = 0; i < mem->nregions; i++) {
159 reg = &mem->regions[i];
160 if (gpa >= reg->guest_phys_addr &&
161 gpa < reg->guest_phys_addr + reg->size) {
162 return gpa - reg->guest_phys_addr +
171 * Convert guest physical address to host virtual address safely
173 * This variant of rte_vhost_gpa_to_vva() takes care all the
174 * requested length is mapped and contiguous in process address
178 * the guest memory regions
180 * the guest physical address for querying
182 * the size of the requested area to map, updated with actual size mapped
184 * the host virtual address on success, 0 on failure
186 static __rte_always_inline uint64_t
187 rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem,
188 uint64_t gpa, uint64_t *len)
190 struct rte_vhost_mem_region *r;
193 for (i = 0; i < mem->nregions; i++) {
194 r = &mem->regions[i];
195 if (gpa >= r->guest_phys_addr &&
196 gpa < r->guest_phys_addr + r->size) {
198 if (unlikely(*len > r->guest_phys_addr + r->size - gpa))
199 *len = r->guest_phys_addr + r->size - gpa;
201 return gpa - r->guest_phys_addr +
210 #define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL))
213 * Log the memory write start with given address.
215 * This function only need be invoked when the live migration starts.
216 * Therefore, we won't need call it at all in the most of time. For
217 * making the performance impact be minimum, it's suggested to do a
218 * check before calling it:
220 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
221 * rte_vhost_log_write(vid, addr, len);
226 * the starting address for write
228 * the length to write
230 void rte_vhost_log_write(int vid, uint64_t addr, uint64_t len);
233 * Log the used ring update start at given offset.
235 * Same as rte_vhost_log_write, it's suggested to do a check before
238 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
239 * rte_vhost_log_used_vring(vid, vring_idx, offset, len);
246 * the offset inside the used ring
248 * the length to write
250 void rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
251 uint64_t offset, uint64_t len);
253 int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
256 * Register vhost driver. path could be different for multiple
259 int rte_vhost_driver_register(const char *path, uint64_t flags);
261 /* Unregister vhost driver. This is only meaningful to vhost user. */
262 int rte_vhost_driver_unregister(const char *path);
265 * Set the vdpa device id, enforce single connection per socket
268 * The vhost-user socket file path
272 * 0 on success, -1 on failure
274 int __rte_experimental
275 rte_vhost_driver_attach_vdpa_device(const char *path, int did);
278 * Unset the vdpa device id
281 * The vhost-user socket file path
283 * 0 on success, -1 on failure
285 int __rte_experimental
286 rte_vhost_driver_detach_vdpa_device(const char *path);
292 * The vhost-user socket file path
294 * Device id, -1 on failure
296 int __rte_experimental
297 rte_vhost_driver_get_vdpa_device_id(const char *path);
300 * Set the feature bits the vhost-user driver supports.
303 * The vhost-user socket file path
307 * 0 on success, -1 on failure
309 int rte_vhost_driver_set_features(const char *path, uint64_t features);
312 * Enable vhost-user driver features.
315 * - the param features should be a subset of the feature bits provided
316 * by rte_vhost_driver_set_features().
317 * - it must be invoked before vhost-user negotiation starts.
320 * The vhost-user socket file path
324 * 0 on success, -1 on failure
326 int rte_vhost_driver_enable_features(const char *path, uint64_t features);
329 * Disable vhost-user driver features.
331 * The two notes at rte_vhost_driver_enable_features() also apply here.
334 * The vhost-user socket file path
336 * Features to disable
338 * 0 on success, -1 on failure
340 int rte_vhost_driver_disable_features(const char *path, uint64_t features);
343 * Get the feature bits before feature negotiation.
346 * The vhost-user socket file path
348 * A pointer to store the queried feature bits
350 * 0 on success, -1 on failure
352 int rte_vhost_driver_get_features(const char *path, uint64_t *features);
355 * Get the protocol feature bits before feature negotiation.
358 * The vhost-user socket file path
359 * @param protocol_features
360 * A pointer to store the queried protocol feature bits
362 * 0 on success, -1 on failure
364 int __rte_experimental
365 rte_vhost_driver_get_protocol_features(const char *path,
366 uint64_t *protocol_features);
369 * Get the queue number bits before feature negotiation.
372 * The vhost-user socket file path
374 * A pointer to store the queried queue number bits
376 * 0 on success, -1 on failure
378 int __rte_experimental
379 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num);
382 * Get the feature bits after negotiation
387 * A pointer to store the queried feature bits
389 * 0 on success, -1 on failure
391 int rte_vhost_get_negotiated_features(int vid, uint64_t *features);
393 /* Register callbacks. */
394 int rte_vhost_driver_callback_register(const char *path,
395 struct vhost_device_ops const * const ops);
399 * Start the vhost-user driver.
401 * This function triggers the vhost-user negotiation.
404 * The vhost-user socket file path
406 * 0 on success, -1 on failure
408 int rte_vhost_driver_start(const char *path);
411 * Get the MTU value of the device if set in QEMU.
414 * virtio-net device ID
416 * The variable to store the MTU value
420 * -EAGAIN: device not yet started
421 * -ENOTSUP: device does not support MTU feature
423 int rte_vhost_get_mtu(int vid, uint16_t *mtu);
426 * Get the numa node from which the virtio net device's memory
433 * The numa node, -1 on failure
435 int rte_vhost_get_numa_node(int vid);
439 * Get the number of queues the device supports.
441 * Note this function is deprecated, as it returns a queue pair number,
442 * which is vhost specific. Instead, rte_vhost_get_vring_num should
449 * The number of queues, 0 on failure
452 uint32_t rte_vhost_get_queue_num(int vid);
455 * Get the number of vrings the device supports.
461 * The number of vrings, 0 on failure
463 uint16_t rte_vhost_get_vring_num(int vid);
466 * Get the virtio net device's ifname, which is the vhost-user socket
472 * The buffer to stored the queried ifname
477 * 0 on success, -1 on failure
479 int rte_vhost_get_ifname(int vid, char *buf, size_t len);
482 * Get how many avail entries are left in the queue
490 * num of avail entires left
492 uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
497 * This function adds buffers to the virtio devices RX virtqueue. Buffers can
498 * be received from the physical port or from another virtual device. A packet
499 * count is returned to indicate the number of packets that were successfully
500 * added to the RX queue.
504 * virtio queue index in mq case
506 * array to contain packets to be enqueued
508 * packets num to be enqueued
510 * num of packets enqueued
512 uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
513 struct rte_mbuf **pkts, uint16_t count);
516 * This function gets guest buffers from the virtio device TX virtqueue,
517 * construct host mbufs, copies guest buffer content to host mbufs and
518 * store them in pkts to be processed.
522 * virtio queue index in mq case
524 * mbuf_pool where host mbuf is allocated.
526 * array to contain packets to be dequeued
528 * packets num to be dequeued
530 * num of packets dequeued
532 uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
533 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
536 * Get guest mem table: a list of memory regions.
538 * An rte_vhost_vhost_memory object will be allocated internaly, to hold the
539 * guest memory regions. Application should free it at destroy_device()
545 * To store the returned mem regions
547 * 0 on success, -1 on failure
549 int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
552 * Get guest vring info, including the vring address, vring size, etc.
559 * the structure to hold the requested vring info
561 * 0 on success, -1 on failure
563 int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
564 struct rte_vhost_vring *vring);
567 * Notify the guest that used descriptors have been added to the vring. This
568 * function acts as a memory barrier.
575 * 0 on success, -1 on failure
577 int rte_vhost_vring_call(int vid, uint16_t vring_idx);
580 * Get vhost RX queue avail count.
585 * virtio queue index in mq case
587 * num of desc available
589 uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
592 * Get log base and log size of the vhost device
601 * 0 on success, -1 on failure
603 int __rte_experimental
604 rte_vhost_get_log_base(int vid, uint64_t *log_base, uint64_t *log_size);
607 * Get last_avail/used_idx of the vhost virtqueue
613 * @param last_avail_idx
614 * vhost last_avail_idx to get
615 * @param last_used_idx
616 * vhost last_used_idx to get
618 * 0 on success, -1 on failure
620 int __rte_experimental
621 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
622 uint16_t *last_avail_idx, uint16_t *last_used_idx);
625 * Set last_avail/used_idx of the vhost virtqueue
631 * @param last_avail_idx
632 * last_avail_idx to set
633 * @param last_used_idx
634 * last_used_idx to set
636 * 0 on success, -1 on failure
638 int __rte_experimental
639 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
640 uint16_t last_avail_idx, uint16_t last_used_idx);
643 * Get vdpa device id for vhost device.
650 int __rte_experimental
651 rte_vhost_get_vdpa_device_id(int vid);
657 #endif /* _RTE_VHOST_H_ */