vdpa/sfc: support setting vring state
[dpdk.git] / lib / vhost / vhost.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #ifdef RTE_LIBRTE_VHOST_NUMA
11 #include <numa.h>
12 #include <numaif.h>
13 #endif
14
15 #include <rte_errno.h>
16 #include <rte_ethdev.h>
17 #include <rte_log.h>
18 #include <rte_string_fns.h>
19 #include <rte_memory.h>
20 #include <rte_malloc.h>
21 #include <rte_vhost.h>
22 #include <rte_rwlock.h>
23
24 #include "iotlb.h"
25 #include "vhost.h"
26 #include "vhost_user.h"
27
28 struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
29 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
30
31 /* Called with iotlb_lock read-locked */
32 uint64_t
33 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
34                     uint64_t iova, uint64_t *size, uint8_t perm)
35 {
36         uint64_t vva, tmp_size;
37
38         if (unlikely(!*size))
39                 return 0;
40
41         tmp_size = *size;
42
43         vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
44         if (tmp_size == *size)
45                 return vva;
46
47         iova += tmp_size;
48
49         if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
50                 /*
51                  * iotlb_lock is read-locked for a full burst,
52                  * but it only protects the iotlb cache.
53                  * In case of IOTLB miss, we might block on the socket,
54                  * which could cause a deadlock with QEMU if an IOTLB update
55                  * is being handled. We can safely unlock here to avoid it.
56                  */
57                 vhost_user_iotlb_rd_unlock(vq);
58
59                 vhost_user_iotlb_pending_insert(vq, iova, perm);
60                 if (vhost_user_iotlb_miss(dev, iova, perm)) {
61                         VHOST_LOG_CONFIG(ERR,
62                                 "IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
63                                 iova);
64                         vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
65                 }
66
67                 vhost_user_iotlb_rd_lock(vq);
68         }
69
70         return 0;
71 }
72
73 #define VHOST_LOG_PAGE  4096
74
75 /*
76  * Atomically set a bit in memory.
77  */
78 static __rte_always_inline void
79 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
80 {
81 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
82         /*
83          * __sync_ built-ins are deprecated, but __atomic_ ones
84          * are sub-optimized in older GCC versions.
85          */
86         __sync_fetch_and_or_1(addr, (1U << nr));
87 #else
88         __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
89 #endif
90 }
91
92 static __rte_always_inline void
93 vhost_log_page(uint8_t *log_base, uint64_t page)
94 {
95         vhost_set_bit(page % 8, &log_base[page / 8]);
96 }
97
98 void
99 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
100 {
101         uint64_t page;
102
103         if (unlikely(!dev->log_base || !len))
104                 return;
105
106         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
107                 return;
108
109         /* To make sure guest memory updates are committed before logging */
110         rte_atomic_thread_fence(__ATOMIC_RELEASE);
111
112         page = addr / VHOST_LOG_PAGE;
113         while (page * VHOST_LOG_PAGE < addr + len) {
114                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
115                 page += 1;
116         }
117 }
118
119 void
120 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
121                              uint64_t iova, uint64_t len)
122 {
123         uint64_t hva, gpa, map_len;
124         map_len = len;
125
126         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
127         if (map_len != len) {
128                 VHOST_LOG_DATA(ERR,
129                         "Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
130                         iova);
131                 return;
132         }
133
134         gpa = hva_to_gpa(dev, hva, len);
135         if (gpa)
136                 __vhost_log_write(dev, gpa, len);
137 }
138
139 void
140 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
141 {
142         unsigned long *log_base;
143         int i;
144
145         if (unlikely(!dev->log_base))
146                 return;
147
148         /* No cache, nothing to sync */
149         if (unlikely(!vq->log_cache))
150                 return;
151
152         rte_atomic_thread_fence(__ATOMIC_RELEASE);
153
154         log_base = (unsigned long *)(uintptr_t)dev->log_base;
155
156         for (i = 0; i < vq->log_cache_nb_elem; i++) {
157                 struct log_cache_entry *elem = vq->log_cache + i;
158
159 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
160                 /*
161                  * '__sync' builtins are deprecated, but '__atomic' ones
162                  * are sub-optimized in older GCC versions.
163                  */
164                 __sync_fetch_and_or(log_base + elem->offset, elem->val);
165 #else
166                 __atomic_fetch_or(log_base + elem->offset, elem->val,
167                                 __ATOMIC_RELAXED);
168 #endif
169         }
170
171         rte_atomic_thread_fence(__ATOMIC_RELEASE);
172
173         vq->log_cache_nb_elem = 0;
174 }
175
176 static __rte_always_inline void
177 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
178                         uint64_t page)
179 {
180         uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
181         uint32_t offset = page / (sizeof(unsigned long) << 3);
182         int i;
183
184         if (unlikely(!vq->log_cache)) {
185                 /* No logging cache allocated, write dirty log map directly */
186                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
187                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
188
189                 return;
190         }
191
192         for (i = 0; i < vq->log_cache_nb_elem; i++) {
193                 struct log_cache_entry *elem = vq->log_cache + i;
194
195                 if (elem->offset == offset) {
196                         elem->val |= (1UL << bit_nr);
197                         return;
198                 }
199         }
200
201         if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
202                 /*
203                  * No more room for a new log cache entry,
204                  * so write the dirty log map directly.
205                  */
206                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
207                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
208
209                 return;
210         }
211
212         vq->log_cache[i].offset = offset;
213         vq->log_cache[i].val = (1UL << bit_nr);
214         vq->log_cache_nb_elem++;
215 }
216
217 void
218 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
219                         uint64_t addr, uint64_t len)
220 {
221         uint64_t page;
222
223         if (unlikely(!dev->log_base || !len))
224                 return;
225
226         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
227                 return;
228
229         page = addr / VHOST_LOG_PAGE;
230         while (page * VHOST_LOG_PAGE < addr + len) {
231                 vhost_log_cache_page(dev, vq, page);
232                 page += 1;
233         }
234 }
235
236 void
237 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
238                              uint64_t iova, uint64_t len)
239 {
240         uint64_t hva, gpa, map_len;
241         map_len = len;
242
243         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
244         if (map_len != len) {
245                 VHOST_LOG_DATA(ERR,
246                         "Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
247                         iova);
248                 return;
249         }
250
251         gpa = hva_to_gpa(dev, hva, len);
252         if (gpa)
253                 __vhost_log_cache_write(dev, vq, gpa, len);
254 }
255
256 void *
257 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
258                 uint64_t desc_addr, uint64_t desc_len)
259 {
260         void *idesc;
261         uint64_t src, dst;
262         uint64_t len, remain = desc_len;
263
264         idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
265         if (unlikely(!idesc))
266                 return NULL;
267
268         dst = (uint64_t)(uintptr_t)idesc;
269
270         while (remain) {
271                 len = remain;
272                 src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
273                                 VHOST_ACCESS_RO);
274                 if (unlikely(!src || !len)) {
275                         rte_free(idesc);
276                         return NULL;
277                 }
278
279                 rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
280
281                 remain -= len;
282                 dst += len;
283                 desc_addr += len;
284         }
285
286         return idesc;
287 }
288
289 void
290 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
291 {
292         if ((vq->callfd >= 0) && (destroy != 0))
293                 close(vq->callfd);
294         if (vq->kickfd >= 0)
295                 close(vq->kickfd);
296 }
297
298 void
299 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
300 {
301         if (!(dev->protocol_features &
302             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
303                 return;
304
305         if (vq_is_packed(dev)) {
306                 if (vq->inflight_packed)
307                         vq->inflight_packed = NULL;
308         } else {
309                 if (vq->inflight_split)
310                         vq->inflight_split = NULL;
311         }
312
313         if (vq->resubmit_inflight) {
314                 if (vq->resubmit_inflight->resubmit_list) {
315                         rte_free(vq->resubmit_inflight->resubmit_list);
316                         vq->resubmit_inflight->resubmit_list = NULL;
317                 }
318                 rte_free(vq->resubmit_inflight);
319                 vq->resubmit_inflight = NULL;
320         }
321 }
322
323 /*
324  * Unmap any memory, close any file descriptors and
325  * free any memory owned by a device.
326  */
327 void
328 cleanup_device(struct virtio_net *dev, int destroy)
329 {
330         uint32_t i;
331
332         vhost_backend_cleanup(dev);
333
334         for (i = 0; i < dev->nr_vring; i++) {
335                 cleanup_vq(dev->virtqueue[i], destroy);
336                 cleanup_vq_inflight(dev, dev->virtqueue[i]);
337         }
338 }
339
340 static void
341 vhost_free_async_mem(struct vhost_virtqueue *vq)
342 {
343         if (!vq->async)
344                 return;
345
346         rte_free(vq->async->pkts_info);
347
348         rte_free(vq->async->buffers_packed);
349         vq->async->buffers_packed = NULL;
350         rte_free(vq->async->descs_split);
351         vq->async->descs_split = NULL;
352
353         rte_free(vq->async);
354         vq->async = NULL;
355 }
356
357 void
358 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
359 {
360         if (vq_is_packed(dev))
361                 rte_free(vq->shadow_used_packed);
362         else
363                 rte_free(vq->shadow_used_split);
364
365         vhost_free_async_mem(vq);
366         rte_free(vq->batch_copy_elems);
367         rte_mempool_free(vq->iotlb_pool);
368         rte_free(vq->log_cache);
369         rte_free(vq);
370 }
371
372 /*
373  * Release virtqueues and device memory.
374  */
375 static void
376 free_device(struct virtio_net *dev)
377 {
378         uint32_t i;
379
380         for (i = 0; i < dev->nr_vring; i++)
381                 free_vq(dev, dev->virtqueue[i]);
382
383         rte_free(dev);
384 }
385
386 static __rte_always_inline int
387 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
388 {
389         if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
390                 return 0;
391
392         vq->log_guest_addr = translate_log_addr(dev, vq,
393                                                 vq->ring_addrs.log_guest_addr);
394         if (vq->log_guest_addr == 0)
395                 return -1;
396
397         return 0;
398 }
399
400 /*
401  * Converts vring log address to GPA
402  * If IOMMU is enabled, the log address is IOVA
403  * If IOMMU not enabled, the log address is already GPA
404  *
405  * Caller should have iotlb_lock read-locked
406  */
407 uint64_t
408 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
409                 uint64_t log_addr)
410 {
411         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
412                 const uint64_t exp_size = sizeof(uint64_t);
413                 uint64_t hva, gpa;
414                 uint64_t size = exp_size;
415
416                 hva = vhost_iova_to_vva(dev, vq, log_addr,
417                                         &size, VHOST_ACCESS_RW);
418
419                 if (size != exp_size)
420                         return 0;
421
422                 gpa = hva_to_gpa(dev, hva, exp_size);
423                 if (!gpa) {
424                         VHOST_LOG_CONFIG(ERR,
425                                 "VQ: Failed to find GPA for log_addr: 0x%"
426                                 PRIx64 " hva: 0x%" PRIx64 "\n",
427                                 log_addr, hva);
428                         return 0;
429                 }
430                 return gpa;
431
432         } else
433                 return log_addr;
434 }
435
436 /* Caller should have iotlb_lock read-locked */
437 static int
438 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
439 {
440         uint64_t req_size, size;
441
442         req_size = sizeof(struct vring_desc) * vq->size;
443         size = req_size;
444         vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
445                                                 vq->ring_addrs.desc_user_addr,
446                                                 &size, VHOST_ACCESS_RW);
447         if (!vq->desc || size != req_size)
448                 return -1;
449
450         req_size = sizeof(struct vring_avail);
451         req_size += sizeof(uint16_t) * vq->size;
452         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
453                 req_size += sizeof(uint16_t);
454         size = req_size;
455         vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
456                                                 vq->ring_addrs.avail_user_addr,
457                                                 &size, VHOST_ACCESS_RW);
458         if (!vq->avail || size != req_size)
459                 return -1;
460
461         req_size = sizeof(struct vring_used);
462         req_size += sizeof(struct vring_used_elem) * vq->size;
463         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
464                 req_size += sizeof(uint16_t);
465         size = req_size;
466         vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
467                                                 vq->ring_addrs.used_user_addr,
468                                                 &size, VHOST_ACCESS_RW);
469         if (!vq->used || size != req_size)
470                 return -1;
471
472         return 0;
473 }
474
475 /* Caller should have iotlb_lock read-locked */
476 static int
477 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
478 {
479         uint64_t req_size, size;
480
481         req_size = sizeof(struct vring_packed_desc) * vq->size;
482         size = req_size;
483         vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
484                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
485                                 &size, VHOST_ACCESS_RW);
486         if (!vq->desc_packed || size != req_size)
487                 return -1;
488
489         req_size = sizeof(struct vring_packed_desc_event);
490         size = req_size;
491         vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
492                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
493                                 &size, VHOST_ACCESS_RW);
494         if (!vq->driver_event || size != req_size)
495                 return -1;
496
497         req_size = sizeof(struct vring_packed_desc_event);
498         size = req_size;
499         vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
500                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
501                                 &size, VHOST_ACCESS_RW);
502         if (!vq->device_event || size != req_size)
503                 return -1;
504
505         return 0;
506 }
507
508 int
509 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
510 {
511
512         if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
513                 return -1;
514
515         if (vq_is_packed(dev)) {
516                 if (vring_translate_packed(dev, vq) < 0)
517                         return -1;
518         } else {
519                 if (vring_translate_split(dev, vq) < 0)
520                         return -1;
521         }
522
523         if (log_translate(dev, vq) < 0)
524                 return -1;
525
526         vq->access_ok = true;
527
528         return 0;
529 }
530
531 void
532 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
533 {
534         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
535                 vhost_user_iotlb_wr_lock(vq);
536
537         vq->access_ok = false;
538         vq->desc = NULL;
539         vq->avail = NULL;
540         vq->used = NULL;
541         vq->log_guest_addr = 0;
542
543         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
544                 vhost_user_iotlb_wr_unlock(vq);
545 }
546
547 static void
548 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
549 {
550         struct vhost_virtqueue *vq;
551         int numa_node = SOCKET_ID_ANY;
552
553         if (vring_idx >= VHOST_MAX_VRING) {
554                 VHOST_LOG_CONFIG(ERR,
555                                 "Failed not init vring, out of bound (%d)\n",
556                                 vring_idx);
557                 return;
558         }
559
560         vq = dev->virtqueue[vring_idx];
561         if (!vq) {
562                 VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
563                                 vring_idx);
564                 return;
565         }
566
567         memset(vq, 0, sizeof(struct vhost_virtqueue));
568
569         vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
570         vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
571         vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
572
573 #ifdef RTE_LIBRTE_VHOST_NUMA
574         if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
575                 VHOST_LOG_CONFIG(ERR, "(%d) failed to query numa node: %s\n",
576                         dev->vid, rte_strerror(errno));
577                 numa_node = SOCKET_ID_ANY;
578         }
579 #endif
580         vq->numa_node = numa_node;
581
582         vhost_user_iotlb_init(dev, vring_idx);
583 }
584
585 static void
586 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
587 {
588         struct vhost_virtqueue *vq;
589         int callfd;
590
591         if (vring_idx >= VHOST_MAX_VRING) {
592                 VHOST_LOG_CONFIG(ERR,
593                                 "Failed not init vring, out of bound (%d)\n",
594                                 vring_idx);
595                 return;
596         }
597
598         vq = dev->virtqueue[vring_idx];
599         if (!vq) {
600                 VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
601                                 vring_idx);
602                 return;
603         }
604
605         callfd = vq->callfd;
606         init_vring_queue(dev, vring_idx);
607         vq->callfd = callfd;
608 }
609
610 int
611 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
612 {
613         struct vhost_virtqueue *vq;
614         uint32_t i;
615
616         /* Also allocate holes, if any, up to requested vring index. */
617         for (i = 0; i <= vring_idx; i++) {
618                 if (dev->virtqueue[i])
619                         continue;
620
621                 vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
622                 if (vq == NULL) {
623                         VHOST_LOG_CONFIG(ERR,
624                                 "Failed to allocate memory for vring:%u.\n", i);
625                         return -1;
626                 }
627
628                 dev->virtqueue[i] = vq;
629                 init_vring_queue(dev, i);
630                 rte_spinlock_init(&vq->access_lock);
631                 vq->avail_wrap_counter = 1;
632                 vq->used_wrap_counter = 1;
633                 vq->signalled_used_valid = false;
634         }
635
636         dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
637
638         return 0;
639 }
640
641 /*
642  * Reset some variables in device structure, while keeping few
643  * others untouched, such as vid, ifname, nr_vring: they
644  * should be same unless the device is removed.
645  */
646 void
647 reset_device(struct virtio_net *dev)
648 {
649         uint32_t i;
650
651         dev->features = 0;
652         dev->protocol_features = 0;
653         dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
654
655         for (i = 0; i < dev->nr_vring; i++)
656                 reset_vring_queue(dev, i);
657 }
658
659 /*
660  * Invoked when there is a new vhost-user connection established (when
661  * there is a new virtio device being attached).
662  */
663 int
664 vhost_new_device(void)
665 {
666         struct virtio_net *dev;
667         int i;
668
669         pthread_mutex_lock(&vhost_dev_lock);
670         for (i = 0; i < MAX_VHOST_DEVICE; i++) {
671                 if (vhost_devices[i] == NULL)
672                         break;
673         }
674
675         if (i == MAX_VHOST_DEVICE) {
676                 VHOST_LOG_CONFIG(ERR,
677                         "Failed to find a free slot for new device.\n");
678                 pthread_mutex_unlock(&vhost_dev_lock);
679                 return -1;
680         }
681
682         dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
683         if (dev == NULL) {
684                 VHOST_LOG_CONFIG(ERR,
685                         "Failed to allocate memory for new dev.\n");
686                 pthread_mutex_unlock(&vhost_dev_lock);
687                 return -1;
688         }
689
690         vhost_devices[i] = dev;
691         pthread_mutex_unlock(&vhost_dev_lock);
692
693         dev->vid = i;
694         dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
695         dev->slave_req_fd = -1;
696         dev->postcopy_ufd = -1;
697         rte_spinlock_init(&dev->slave_req_lock);
698
699         return i;
700 }
701
702 void
703 vhost_destroy_device_notify(struct virtio_net *dev)
704 {
705         struct rte_vdpa_device *vdpa_dev;
706
707         if (dev->flags & VIRTIO_DEV_RUNNING) {
708                 vdpa_dev = dev->vdpa_dev;
709                 if (vdpa_dev)
710                         vdpa_dev->ops->dev_close(dev->vid);
711                 dev->flags &= ~VIRTIO_DEV_RUNNING;
712                 dev->notify_ops->destroy_device(dev->vid);
713         }
714 }
715
716 /*
717  * Invoked when there is the vhost-user connection is broken (when
718  * the virtio device is being detached).
719  */
720 void
721 vhost_destroy_device(int vid)
722 {
723         struct virtio_net *dev = get_device(vid);
724
725         if (dev == NULL)
726                 return;
727
728         vhost_destroy_device_notify(dev);
729
730         cleanup_device(dev, 1);
731         free_device(dev);
732
733         vhost_devices[vid] = NULL;
734 }
735
736 void
737 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
738 {
739         struct virtio_net *dev = get_device(vid);
740
741         if (dev == NULL)
742                 return;
743
744         dev->vdpa_dev = vdpa_dev;
745 }
746
747 void
748 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
749 {
750         struct virtio_net *dev;
751         unsigned int len;
752
753         dev = get_device(vid);
754         if (dev == NULL)
755                 return;
756
757         len = if_len > sizeof(dev->ifname) ?
758                 sizeof(dev->ifname) : if_len;
759
760         strncpy(dev->ifname, if_name, len);
761         dev->ifname[sizeof(dev->ifname) - 1] = '\0';
762 }
763
764 void
765 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags)
766 {
767         struct virtio_net *dev = get_device(vid);
768
769         if (dev == NULL)
770                 return;
771
772         if (enable)
773                 dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
774         else
775                 dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
776         if (!compliant_ol_flags)
777                 dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
778         else
779                 dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
780 }
781
782 void
783 vhost_enable_extbuf(int vid)
784 {
785         struct virtio_net *dev = get_device(vid);
786
787         if (dev == NULL)
788                 return;
789
790         dev->extbuf = 1;
791 }
792
793 void
794 vhost_enable_linearbuf(int vid)
795 {
796         struct virtio_net *dev = get_device(vid);
797
798         if (dev == NULL)
799                 return;
800
801         dev->linearbuf = 1;
802 }
803
804 int
805 rte_vhost_get_mtu(int vid, uint16_t *mtu)
806 {
807         struct virtio_net *dev = get_device(vid);
808
809         if (dev == NULL || mtu == NULL)
810                 return -ENODEV;
811
812         if (!(dev->flags & VIRTIO_DEV_READY))
813                 return -EAGAIN;
814
815         if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
816                 return -ENOTSUP;
817
818         *mtu = dev->mtu;
819
820         return 0;
821 }
822
823 int
824 rte_vhost_get_numa_node(int vid)
825 {
826 #ifdef RTE_LIBRTE_VHOST_NUMA
827         struct virtio_net *dev = get_device(vid);
828         int numa_node;
829         int ret;
830
831         if (dev == NULL || numa_available() != 0)
832                 return -1;
833
834         ret = get_mempolicy(&numa_node, NULL, 0, dev,
835                             MPOL_F_NODE | MPOL_F_ADDR);
836         if (ret < 0) {
837                 VHOST_LOG_CONFIG(ERR,
838                         "(%d) failed to query numa node: %s\n",
839                         vid, rte_strerror(errno));
840                 return -1;
841         }
842
843         return numa_node;
844 #else
845         RTE_SET_USED(vid);
846         return -1;
847 #endif
848 }
849
850 uint32_t
851 rte_vhost_get_queue_num(int vid)
852 {
853         struct virtio_net *dev = get_device(vid);
854
855         if (dev == NULL)
856                 return 0;
857
858         return dev->nr_vring / 2;
859 }
860
861 uint16_t
862 rte_vhost_get_vring_num(int vid)
863 {
864         struct virtio_net *dev = get_device(vid);
865
866         if (dev == NULL)
867                 return 0;
868
869         return dev->nr_vring;
870 }
871
872 int
873 rte_vhost_get_ifname(int vid, char *buf, size_t len)
874 {
875         struct virtio_net *dev = get_device(vid);
876
877         if (dev == NULL || buf == NULL)
878                 return -1;
879
880         len = RTE_MIN(len, sizeof(dev->ifname));
881
882         strncpy(buf, dev->ifname, len);
883         buf[len - 1] = '\0';
884
885         return 0;
886 }
887
888 int
889 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
890 {
891         struct virtio_net *dev;
892
893         dev = get_device(vid);
894         if (dev == NULL || features == NULL)
895                 return -1;
896
897         *features = dev->features;
898         return 0;
899 }
900
901 int
902 rte_vhost_get_negotiated_protocol_features(int vid,
903                                            uint64_t *protocol_features)
904 {
905         struct virtio_net *dev;
906
907         dev = get_device(vid);
908         if (dev == NULL || protocol_features == NULL)
909                 return -1;
910
911         *protocol_features = dev->protocol_features;
912         return 0;
913 }
914
915 int
916 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
917 {
918         struct virtio_net *dev;
919         struct rte_vhost_memory *m;
920         size_t size;
921
922         dev = get_device(vid);
923         if (dev == NULL || mem == NULL)
924                 return -1;
925
926         size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
927         m = malloc(sizeof(struct rte_vhost_memory) + size);
928         if (!m)
929                 return -1;
930
931         m->nregions = dev->mem->nregions;
932         memcpy(m->regions, dev->mem->regions, size);
933         *mem = m;
934
935         return 0;
936 }
937
938 int
939 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
940                           struct rte_vhost_vring *vring)
941 {
942         struct virtio_net *dev;
943         struct vhost_virtqueue *vq;
944
945         dev = get_device(vid);
946         if (dev == NULL || vring == NULL)
947                 return -1;
948
949         if (vring_idx >= VHOST_MAX_VRING)
950                 return -1;
951
952         vq = dev->virtqueue[vring_idx];
953         if (!vq)
954                 return -1;
955
956         if (vq_is_packed(dev)) {
957                 vring->desc_packed = vq->desc_packed;
958                 vring->driver_event = vq->driver_event;
959                 vring->device_event = vq->device_event;
960         } else {
961                 vring->desc = vq->desc;
962                 vring->avail = vq->avail;
963                 vring->used = vq->used;
964         }
965         vring->log_guest_addr  = vq->log_guest_addr;
966
967         vring->callfd  = vq->callfd;
968         vring->kickfd  = vq->kickfd;
969         vring->size    = vq->size;
970
971         return 0;
972 }
973
974 int
975 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
976                                   struct rte_vhost_ring_inflight *vring)
977 {
978         struct virtio_net *dev;
979         struct vhost_virtqueue *vq;
980
981         dev = get_device(vid);
982         if (unlikely(!dev))
983                 return -1;
984
985         if (vring_idx >= VHOST_MAX_VRING)
986                 return -1;
987
988         vq = dev->virtqueue[vring_idx];
989         if (unlikely(!vq))
990                 return -1;
991
992         if (vq_is_packed(dev)) {
993                 if (unlikely(!vq->inflight_packed))
994                         return -1;
995
996                 vring->inflight_packed = vq->inflight_packed;
997         } else {
998                 if (unlikely(!vq->inflight_split))
999                         return -1;
1000
1001                 vring->inflight_split = vq->inflight_split;
1002         }
1003
1004         vring->resubmit_inflight = vq->resubmit_inflight;
1005
1006         return 0;
1007 }
1008
1009 int
1010 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
1011                                   uint16_t idx)
1012 {
1013         struct vhost_virtqueue *vq;
1014         struct virtio_net *dev;
1015
1016         dev = get_device(vid);
1017         if (unlikely(!dev))
1018                 return -1;
1019
1020         if (unlikely(!(dev->protocol_features &
1021             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1022                 return 0;
1023
1024         if (unlikely(vq_is_packed(dev)))
1025                 return -1;
1026
1027         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1028                 return -1;
1029
1030         vq = dev->virtqueue[vring_idx];
1031         if (unlikely(!vq))
1032                 return -1;
1033
1034         if (unlikely(!vq->inflight_split))
1035                 return -1;
1036
1037         if (unlikely(idx >= vq->size))
1038                 return -1;
1039
1040         vq->inflight_split->desc[idx].counter = vq->global_counter++;
1041         vq->inflight_split->desc[idx].inflight = 1;
1042         return 0;
1043 }
1044
1045 int
1046 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1047                                    uint16_t head, uint16_t last,
1048                                    uint16_t *inflight_entry)
1049 {
1050         struct rte_vhost_inflight_info_packed *inflight_info;
1051         struct virtio_net *dev;
1052         struct vhost_virtqueue *vq;
1053         struct vring_packed_desc *desc;
1054         uint16_t old_free_head, free_head;
1055
1056         dev = get_device(vid);
1057         if (unlikely(!dev))
1058                 return -1;
1059
1060         if (unlikely(!(dev->protocol_features &
1061             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1062                 return 0;
1063
1064         if (unlikely(!vq_is_packed(dev)))
1065                 return -1;
1066
1067         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1068                 return -1;
1069
1070         vq = dev->virtqueue[vring_idx];
1071         if (unlikely(!vq))
1072                 return -1;
1073
1074         inflight_info = vq->inflight_packed;
1075         if (unlikely(!inflight_info))
1076                 return -1;
1077
1078         if (unlikely(head >= vq->size))
1079                 return -1;
1080
1081         desc = vq->desc_packed;
1082         old_free_head = inflight_info->old_free_head;
1083         if (unlikely(old_free_head >= vq->size))
1084                 return -1;
1085
1086         free_head = old_free_head;
1087
1088         /* init header descriptor */
1089         inflight_info->desc[old_free_head].num = 0;
1090         inflight_info->desc[old_free_head].counter = vq->global_counter++;
1091         inflight_info->desc[old_free_head].inflight = 1;
1092
1093         /* save desc entry in flight entry */
1094         while (head != ((last + 1) % vq->size)) {
1095                 inflight_info->desc[old_free_head].num++;
1096                 inflight_info->desc[free_head].addr = desc[head].addr;
1097                 inflight_info->desc[free_head].len = desc[head].len;
1098                 inflight_info->desc[free_head].flags = desc[head].flags;
1099                 inflight_info->desc[free_head].id = desc[head].id;
1100
1101                 inflight_info->desc[old_free_head].last = free_head;
1102                 free_head = inflight_info->desc[free_head].next;
1103                 inflight_info->free_head = free_head;
1104                 head = (head + 1) % vq->size;
1105         }
1106
1107         inflight_info->old_free_head = free_head;
1108         *inflight_entry = old_free_head;
1109
1110         return 0;
1111 }
1112
1113 int
1114 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1115                                   uint16_t last_used_idx, uint16_t idx)
1116 {
1117         struct virtio_net *dev;
1118         struct vhost_virtqueue *vq;
1119
1120         dev = get_device(vid);
1121         if (unlikely(!dev))
1122                 return -1;
1123
1124         if (unlikely(!(dev->protocol_features &
1125             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1126                 return 0;
1127
1128         if (unlikely(vq_is_packed(dev)))
1129                 return -1;
1130
1131         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1132                 return -1;
1133
1134         vq = dev->virtqueue[vring_idx];
1135         if (unlikely(!vq))
1136                 return -1;
1137
1138         if (unlikely(!vq->inflight_split))
1139                 return -1;
1140
1141         if (unlikely(idx >= vq->size))
1142                 return -1;
1143
1144         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1145
1146         vq->inflight_split->desc[idx].inflight = 0;
1147
1148         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1149
1150         vq->inflight_split->used_idx = last_used_idx;
1151         return 0;
1152 }
1153
1154 int
1155 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1156                                    uint16_t head)
1157 {
1158         struct rte_vhost_inflight_info_packed *inflight_info;
1159         struct virtio_net *dev;
1160         struct vhost_virtqueue *vq;
1161
1162         dev = get_device(vid);
1163         if (unlikely(!dev))
1164                 return -1;
1165
1166         if (unlikely(!(dev->protocol_features &
1167             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1168                 return 0;
1169
1170         if (unlikely(!vq_is_packed(dev)))
1171                 return -1;
1172
1173         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1174                 return -1;
1175
1176         vq = dev->virtqueue[vring_idx];
1177         if (unlikely(!vq))
1178                 return -1;
1179
1180         inflight_info = vq->inflight_packed;
1181         if (unlikely(!inflight_info))
1182                 return -1;
1183
1184         if (unlikely(head >= vq->size))
1185                 return -1;
1186
1187         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1188
1189         inflight_info->desc[head].inflight = 0;
1190
1191         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1192
1193         inflight_info->old_free_head = inflight_info->free_head;
1194         inflight_info->old_used_idx = inflight_info->used_idx;
1195         inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1196
1197         return 0;
1198 }
1199
1200 int
1201 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1202                                      uint16_t idx)
1203 {
1204         struct virtio_net *dev;
1205         struct vhost_virtqueue *vq;
1206
1207         dev = get_device(vid);
1208         if (unlikely(!dev))
1209                 return -1;
1210
1211         if (unlikely(!(dev->protocol_features &
1212             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1213                 return 0;
1214
1215         if (unlikely(vq_is_packed(dev)))
1216                 return -1;
1217
1218         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1219                 return -1;
1220
1221         vq = dev->virtqueue[vring_idx];
1222         if (unlikely(!vq))
1223                 return -1;
1224
1225         if (unlikely(!vq->inflight_split))
1226                 return -1;
1227
1228         if (unlikely(idx >= vq->size))
1229                 return -1;
1230
1231         vq->inflight_split->last_inflight_io = idx;
1232         return 0;
1233 }
1234
1235 int
1236 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1237                                       uint16_t head)
1238 {
1239         struct rte_vhost_inflight_info_packed *inflight_info;
1240         struct virtio_net *dev;
1241         struct vhost_virtqueue *vq;
1242         uint16_t last;
1243
1244         dev = get_device(vid);
1245         if (unlikely(!dev))
1246                 return -1;
1247
1248         if (unlikely(!(dev->protocol_features &
1249             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1250                 return 0;
1251
1252         if (unlikely(!vq_is_packed(dev)))
1253                 return -1;
1254
1255         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1256                 return -1;
1257
1258         vq = dev->virtqueue[vring_idx];
1259         if (unlikely(!vq))
1260                 return -1;
1261
1262         inflight_info = vq->inflight_packed;
1263         if (unlikely(!inflight_info))
1264                 return -1;
1265
1266         if (unlikely(head >= vq->size))
1267                 return -1;
1268
1269         last = inflight_info->desc[head].last;
1270         if (unlikely(last >= vq->size))
1271                 return -1;
1272
1273         inflight_info->desc[last].next = inflight_info->free_head;
1274         inflight_info->free_head = head;
1275         inflight_info->used_idx += inflight_info->desc[head].num;
1276         if (inflight_info->used_idx >= inflight_info->desc_num) {
1277                 inflight_info->used_idx -= inflight_info->desc_num;
1278                 inflight_info->used_wrap_counter =
1279                         !inflight_info->used_wrap_counter;
1280         }
1281
1282         return 0;
1283 }
1284
1285 int
1286 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1287 {
1288         struct virtio_net *dev;
1289         struct vhost_virtqueue *vq;
1290
1291         dev = get_device(vid);
1292         if (!dev)
1293                 return -1;
1294
1295         if (vring_idx >= VHOST_MAX_VRING)
1296                 return -1;
1297
1298         vq = dev->virtqueue[vring_idx];
1299         if (!vq)
1300                 return -1;
1301
1302         if (vq_is_packed(dev))
1303                 vhost_vring_call_packed(dev, vq);
1304         else
1305                 vhost_vring_call_split(dev, vq);
1306
1307         return 0;
1308 }
1309
1310 uint16_t
1311 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1312 {
1313         struct virtio_net *dev;
1314         struct vhost_virtqueue *vq;
1315         uint16_t ret = 0;
1316
1317         dev = get_device(vid);
1318         if (!dev)
1319                 return 0;
1320
1321         if (queue_id >= VHOST_MAX_VRING)
1322                 return 0;
1323
1324         vq = dev->virtqueue[queue_id];
1325         if (!vq)
1326                 return 0;
1327
1328         rte_spinlock_lock(&vq->access_lock);
1329
1330         if (unlikely(!vq->enabled || vq->avail == NULL))
1331                 goto out;
1332
1333         ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1334
1335 out:
1336         rte_spinlock_unlock(&vq->access_lock);
1337         return ret;
1338 }
1339
1340 static inline int
1341 vhost_enable_notify_split(struct virtio_net *dev,
1342                 struct vhost_virtqueue *vq, int enable)
1343 {
1344         if (vq->used == NULL)
1345                 return -1;
1346
1347         if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1348                 if (enable)
1349                         vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1350                 else
1351                         vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1352         } else {
1353                 if (enable)
1354                         vhost_avail_event(vq) = vq->last_avail_idx;
1355         }
1356         return 0;
1357 }
1358
1359 static inline int
1360 vhost_enable_notify_packed(struct virtio_net *dev,
1361                 struct vhost_virtqueue *vq, int enable)
1362 {
1363         uint16_t flags;
1364
1365         if (vq->device_event == NULL)
1366                 return -1;
1367
1368         if (!enable) {
1369                 vq->device_event->flags = VRING_EVENT_F_DISABLE;
1370                 return 0;
1371         }
1372
1373         flags = VRING_EVENT_F_ENABLE;
1374         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1375                 flags = VRING_EVENT_F_DESC;
1376                 vq->device_event->off_wrap = vq->last_avail_idx |
1377                         vq->avail_wrap_counter << 15;
1378         }
1379
1380         rte_atomic_thread_fence(__ATOMIC_RELEASE);
1381
1382         vq->device_event->flags = flags;
1383         return 0;
1384 }
1385
1386 int
1387 vhost_enable_guest_notification(struct virtio_net *dev,
1388                 struct vhost_virtqueue *vq, int enable)
1389 {
1390         /*
1391          * If the virtqueue is not ready yet, it will be applied
1392          * when it will become ready.
1393          */
1394         if (!vq->ready)
1395                 return 0;
1396
1397         if (vq_is_packed(dev))
1398                 return vhost_enable_notify_packed(dev, vq, enable);
1399         else
1400                 return vhost_enable_notify_split(dev, vq, enable);
1401 }
1402
1403 int
1404 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1405 {
1406         struct virtio_net *dev = get_device(vid);
1407         struct vhost_virtqueue *vq;
1408         int ret;
1409
1410         if (!dev)
1411                 return -1;
1412
1413         if (queue_id >= VHOST_MAX_VRING)
1414                 return -1;
1415
1416         vq = dev->virtqueue[queue_id];
1417         if (!vq)
1418                 return -1;
1419
1420         rte_spinlock_lock(&vq->access_lock);
1421
1422         vq->notif_enable = enable;
1423         ret = vhost_enable_guest_notification(dev, vq, enable);
1424
1425         rte_spinlock_unlock(&vq->access_lock);
1426
1427         return ret;
1428 }
1429
1430 void
1431 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1432 {
1433         struct virtio_net *dev = get_device(vid);
1434
1435         if (dev == NULL)
1436                 return;
1437
1438         vhost_log_write(dev, addr, len);
1439 }
1440
1441 void
1442 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1443                          uint64_t offset, uint64_t len)
1444 {
1445         struct virtio_net *dev;
1446         struct vhost_virtqueue *vq;
1447
1448         dev = get_device(vid);
1449         if (dev == NULL)
1450                 return;
1451
1452         if (vring_idx >= VHOST_MAX_VRING)
1453                 return;
1454         vq = dev->virtqueue[vring_idx];
1455         if (!vq)
1456                 return;
1457
1458         vhost_log_used_vring(dev, vq, offset, len);
1459 }
1460
1461 uint32_t
1462 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1463 {
1464         struct virtio_net *dev;
1465         struct vhost_virtqueue *vq;
1466         uint32_t ret = 0;
1467
1468         dev = get_device(vid);
1469         if (dev == NULL)
1470                 return 0;
1471
1472         if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1473                 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
1474                         dev->vid, __func__, qid);
1475                 return 0;
1476         }
1477
1478         vq = dev->virtqueue[qid];
1479         if (vq == NULL)
1480                 return 0;
1481
1482         rte_spinlock_lock(&vq->access_lock);
1483
1484         if (unlikely(!vq->enabled || vq->avail == NULL))
1485                 goto out;
1486
1487         ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1488
1489 out:
1490         rte_spinlock_unlock(&vq->access_lock);
1491         return ret;
1492 }
1493
1494 struct rte_vdpa_device *
1495 rte_vhost_get_vdpa_device(int vid)
1496 {
1497         struct virtio_net *dev = get_device(vid);
1498
1499         if (dev == NULL)
1500                 return NULL;
1501
1502         return dev->vdpa_dev;
1503 }
1504
1505 int
1506 rte_vhost_get_log_base(int vid, uint64_t *log_base,
1507                 uint64_t *log_size)
1508 {
1509         struct virtio_net *dev = get_device(vid);
1510
1511         if (dev == NULL || log_base == NULL || log_size == NULL)
1512                 return -1;
1513
1514         *log_base = dev->log_base;
1515         *log_size = dev->log_size;
1516
1517         return 0;
1518 }
1519
1520 int
1521 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1522                 uint16_t *last_avail_idx, uint16_t *last_used_idx)
1523 {
1524         struct vhost_virtqueue *vq;
1525         struct virtio_net *dev = get_device(vid);
1526
1527         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1528                 return -1;
1529
1530         if (queue_id >= VHOST_MAX_VRING)
1531                 return -1;
1532
1533         vq = dev->virtqueue[queue_id];
1534         if (!vq)
1535                 return -1;
1536
1537         if (vq_is_packed(dev)) {
1538                 *last_avail_idx = (vq->avail_wrap_counter << 15) |
1539                                   vq->last_avail_idx;
1540                 *last_used_idx = (vq->used_wrap_counter << 15) |
1541                                  vq->last_used_idx;
1542         } else {
1543                 *last_avail_idx = vq->last_avail_idx;
1544                 *last_used_idx = vq->last_used_idx;
1545         }
1546
1547         return 0;
1548 }
1549
1550 int
1551 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1552                 uint16_t last_avail_idx, uint16_t last_used_idx)
1553 {
1554         struct vhost_virtqueue *vq;
1555         struct virtio_net *dev = get_device(vid);
1556
1557         if (!dev)
1558                 return -1;
1559
1560         if (queue_id >= VHOST_MAX_VRING)
1561                 return -1;
1562
1563         vq = dev->virtqueue[queue_id];
1564         if (!vq)
1565                 return -1;
1566
1567         if (vq_is_packed(dev)) {
1568                 vq->last_avail_idx = last_avail_idx & 0x7fff;
1569                 vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1570                 vq->last_used_idx = last_used_idx & 0x7fff;
1571                 vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1572         } else {
1573                 vq->last_avail_idx = last_avail_idx;
1574                 vq->last_used_idx = last_used_idx;
1575         }
1576
1577         return 0;
1578 }
1579
1580 int
1581 rte_vhost_get_vring_base_from_inflight(int vid,
1582                                        uint16_t queue_id,
1583                                        uint16_t *last_avail_idx,
1584                                        uint16_t *last_used_idx)
1585 {
1586         struct rte_vhost_inflight_info_packed *inflight_info;
1587         struct vhost_virtqueue *vq;
1588         struct virtio_net *dev = get_device(vid);
1589
1590         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1591                 return -1;
1592
1593         if (queue_id >= VHOST_MAX_VRING)
1594                 return -1;
1595
1596         vq = dev->virtqueue[queue_id];
1597         if (!vq)
1598                 return -1;
1599
1600         if (!vq_is_packed(dev))
1601                 return -1;
1602
1603         inflight_info = vq->inflight_packed;
1604         if (!inflight_info)
1605                 return -1;
1606
1607         *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1608                           inflight_info->old_used_idx;
1609         *last_used_idx = *last_avail_idx;
1610
1611         return 0;
1612 }
1613
1614 int
1615 rte_vhost_extern_callback_register(int vid,
1616                 struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1617 {
1618         struct virtio_net *dev = get_device(vid);
1619
1620         if (dev == NULL || ops == NULL)
1621                 return -1;
1622
1623         dev->extern_ops = *ops;
1624         dev->extern_data = ctx;
1625         return 0;
1626 }
1627
1628 static __rte_always_inline int
1629 async_channel_register(int vid, uint16_t queue_id,
1630                 struct rte_vhost_async_channel_ops *ops)
1631 {
1632         struct virtio_net *dev = get_device(vid);
1633         struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
1634         struct vhost_async *async;
1635         int node = vq->numa_node;
1636
1637         if (unlikely(vq->async)) {
1638                 VHOST_LOG_CONFIG(ERR,
1639                                 "async register failed: already registered (vid %d, qid: %d)\n",
1640                                 vid, queue_id);
1641                 return -1;
1642         }
1643
1644         async = rte_zmalloc_socket(NULL, sizeof(struct vhost_async), 0, node);
1645         if (!async) {
1646                 VHOST_LOG_CONFIG(ERR, "failed to allocate async metadata (vid %d, qid: %d)\n",
1647                                 vid, queue_id);
1648                 return -1;
1649         }
1650
1651         async->pkts_info = rte_malloc_socket(NULL, vq->size * sizeof(struct async_inflight_info),
1652                         RTE_CACHE_LINE_SIZE, node);
1653         if (!async->pkts_info) {
1654                 VHOST_LOG_CONFIG(ERR, "failed to allocate async_pkts_info (vid %d, qid: %d)\n",
1655                                 vid, queue_id);
1656                 goto out_free_async;
1657         }
1658
1659         if (vq_is_packed(dev)) {
1660                 async->buffers_packed = rte_malloc_socket(NULL,
1661                                 vq->size * sizeof(struct vring_used_elem_packed),
1662                                 RTE_CACHE_LINE_SIZE, node);
1663                 if (!async->buffers_packed) {
1664                         VHOST_LOG_CONFIG(ERR, "failed to allocate async buffers (vid %d, qid: %d)\n",
1665                                         vid, queue_id);
1666                         goto out_free_inflight;
1667                 }
1668         } else {
1669                 async->descs_split = rte_malloc_socket(NULL,
1670                                 vq->size * sizeof(struct vring_used_elem),
1671                                 RTE_CACHE_LINE_SIZE, node);
1672                 if (!async->descs_split) {
1673                         VHOST_LOG_CONFIG(ERR, "failed to allocate async descs (vid %d, qid: %d)\n",
1674                                         vid, queue_id);
1675                         goto out_free_inflight;
1676                 }
1677         }
1678
1679         async->ops.check_completed_copies = ops->check_completed_copies;
1680         async->ops.transfer_data = ops->transfer_data;
1681
1682         vq->async = async;
1683
1684         return 0;
1685 out_free_inflight:
1686         rte_free(async->pkts_info);
1687 out_free_async:
1688         rte_free(async);
1689
1690         return -1;
1691 }
1692
1693 int
1694 rte_vhost_async_channel_register(int vid, uint16_t queue_id,
1695                 struct rte_vhost_async_config config,
1696                 struct rte_vhost_async_channel_ops *ops)
1697 {
1698         struct vhost_virtqueue *vq;
1699         struct virtio_net *dev = get_device(vid);
1700         int ret;
1701
1702         if (dev == NULL || ops == NULL)
1703                 return -1;
1704
1705         if (queue_id >= VHOST_MAX_VRING)
1706                 return -1;
1707
1708         vq = dev->virtqueue[queue_id];
1709
1710         if (unlikely(vq == NULL || !dev->async_copy))
1711                 return -1;
1712
1713         if (unlikely(!(config.features & RTE_VHOST_ASYNC_INORDER))) {
1714                 VHOST_LOG_CONFIG(ERR,
1715                         "async copy is not supported on non-inorder mode "
1716                         "(vid %d, qid: %d)\n", vid, queue_id);
1717                 return -1;
1718         }
1719
1720         if (unlikely(ops->check_completed_copies == NULL ||
1721                 ops->transfer_data == NULL))
1722                 return -1;
1723
1724         rte_spinlock_lock(&vq->access_lock);
1725         ret = async_channel_register(vid, queue_id, ops);
1726         rte_spinlock_unlock(&vq->access_lock);
1727
1728         return ret;
1729 }
1730
1731 int
1732 rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id,
1733                 struct rte_vhost_async_config config,
1734                 struct rte_vhost_async_channel_ops *ops)
1735 {
1736         struct vhost_virtqueue *vq;
1737         struct virtio_net *dev = get_device(vid);
1738
1739         if (dev == NULL || ops == NULL)
1740                 return -1;
1741
1742         if (queue_id >= VHOST_MAX_VRING)
1743                 return -1;
1744
1745         vq = dev->virtqueue[queue_id];
1746
1747         if (unlikely(vq == NULL || !dev->async_copy))
1748                 return -1;
1749
1750         if (unlikely(!(config.features & RTE_VHOST_ASYNC_INORDER))) {
1751                 VHOST_LOG_CONFIG(ERR,
1752                         "async copy is not supported on non-inorder mode "
1753                         "(vid %d, qid: %d)\n", vid, queue_id);
1754                 return -1;
1755         }
1756
1757         if (unlikely(ops->check_completed_copies == NULL ||
1758                 ops->transfer_data == NULL))
1759                 return -1;
1760
1761         return async_channel_register(vid, queue_id, ops);
1762 }
1763
1764 int
1765 rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1766 {
1767         struct vhost_virtqueue *vq;
1768         struct virtio_net *dev = get_device(vid);
1769         int ret = -1;
1770
1771         if (dev == NULL)
1772                 return ret;
1773
1774         if (queue_id >= VHOST_MAX_VRING)
1775                 return ret;
1776
1777         vq = dev->virtqueue[queue_id];
1778
1779         if (vq == NULL)
1780                 return ret;
1781
1782         ret = 0;
1783
1784         if (!vq->async)
1785                 return ret;
1786
1787         if (!rte_spinlock_trylock(&vq->access_lock)) {
1788                 VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1789                         "virt queue busy.\n");
1790                 return -1;
1791         }
1792
1793         if (vq->async->pkts_inflight_n) {
1794                 VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1795                         "async inflight packets must be completed before unregistration.\n");
1796                 ret = -1;
1797                 goto out;
1798         }
1799
1800         vhost_free_async_mem(vq);
1801 out:
1802         rte_spinlock_unlock(&vq->access_lock);
1803
1804         return ret;
1805 }
1806
1807 int
1808 rte_vhost_async_channel_unregister_thread_unsafe(int vid, uint16_t queue_id)
1809 {
1810         struct vhost_virtqueue *vq;
1811         struct virtio_net *dev = get_device(vid);
1812
1813         if (dev == NULL)
1814                 return -1;
1815
1816         if (queue_id >= VHOST_MAX_VRING)
1817                 return -1;
1818
1819         vq = dev->virtqueue[queue_id];
1820
1821         if (vq == NULL)
1822                 return -1;
1823
1824         if (!vq->async)
1825                 return 0;
1826
1827         if (vq->async->pkts_inflight_n) {
1828                 VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1829                         "async inflight packets must be completed before unregistration.\n");
1830                 return -1;
1831         }
1832
1833         vhost_free_async_mem(vq);
1834
1835         return 0;
1836 }
1837
1838 int
1839 rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
1840 {
1841         struct vhost_virtqueue *vq;
1842         struct virtio_net *dev = get_device(vid);
1843         int ret = -1;
1844
1845         if (dev == NULL)
1846                 return ret;
1847
1848         if (queue_id >= VHOST_MAX_VRING)
1849                 return ret;
1850
1851         vq = dev->virtqueue[queue_id];
1852
1853         if (vq == NULL)
1854                 return ret;
1855
1856         if (!vq->async)
1857                 return ret;
1858
1859         if (!rte_spinlock_trylock(&vq->access_lock)) {
1860                 VHOST_LOG_CONFIG(DEBUG, "Failed to check in-flight packets. "
1861                         "virt queue busy.\n");
1862                 return ret;
1863         }
1864
1865         ret = vq->async->pkts_inflight_n;
1866         rte_spinlock_unlock(&vq->access_lock);
1867
1868         return ret;
1869 }
1870
1871 int
1872 rte_vhost_get_monitor_addr(int vid, uint16_t queue_id,
1873                 struct rte_vhost_power_monitor_cond *pmc)
1874 {
1875         struct virtio_net *dev = get_device(vid);
1876         struct vhost_virtqueue *vq;
1877
1878         if (dev == NULL)
1879                 return -1;
1880         if (queue_id >= VHOST_MAX_VRING)
1881                 return -1;
1882
1883         vq = dev->virtqueue[queue_id];
1884         if (vq == NULL)
1885                 return -1;
1886
1887         if (vq_is_packed(dev)) {
1888                 struct vring_packed_desc *desc;
1889                 desc = vq->desc_packed;
1890                 pmc->addr = &desc[vq->last_avail_idx].flags;
1891                 if (vq->avail_wrap_counter)
1892                         pmc->val = VRING_DESC_F_AVAIL;
1893                 else
1894                         pmc->val = VRING_DESC_F_USED;
1895                 pmc->mask = VRING_DESC_F_AVAIL | VRING_DESC_F_USED;
1896                 pmc->size = sizeof(desc[vq->last_avail_idx].flags);
1897                 pmc->match = 1;
1898         } else {
1899                 pmc->addr = &vq->avail->idx;
1900                 pmc->val = vq->last_avail_idx & (vq->size - 1);
1901                 pmc->mask = vq->size - 1;
1902                 pmc->size = sizeof(vq->avail->idx);
1903                 pmc->match = 0;
1904         }
1905
1906         return 0;
1907 }
1908
1909 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
1910 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);