cryptodev: add missing C++ guards
[dpdk.git] / lib / vhost / vhost.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #ifdef RTE_LIBRTE_VHOST_NUMA
10 #include <numa.h>
11 #include <numaif.h>
12 #endif
13
14 #include <rte_errno.h>
15 #include <rte_log.h>
16 #include <rte_memory.h>
17 #include <rte_malloc.h>
18 #include <rte_vhost.h>
19
20 #include "iotlb.h"
21 #include "vhost.h"
22 #include "vhost_user.h"
23
24 struct virtio_net *vhost_devices[RTE_MAX_VHOST_DEVICE];
25 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
26
27 /* Called with iotlb_lock read-locked */
28 uint64_t
29 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
30                     uint64_t iova, uint64_t *size, uint8_t perm)
31 {
32         uint64_t vva, tmp_size;
33
34         if (unlikely(!*size))
35                 return 0;
36
37         tmp_size = *size;
38
39         vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
40         if (tmp_size == *size)
41                 return vva;
42
43         iova += tmp_size;
44
45         if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
46                 /*
47                  * iotlb_lock is read-locked for a full burst,
48                  * but it only protects the iotlb cache.
49                  * In case of IOTLB miss, we might block on the socket,
50                  * which could cause a deadlock with QEMU if an IOTLB update
51                  * is being handled. We can safely unlock here to avoid it.
52                  */
53                 vhost_user_iotlb_rd_unlock(vq);
54
55                 vhost_user_iotlb_pending_insert(dev, vq, iova, perm);
56                 if (vhost_user_iotlb_miss(dev, iova, perm)) {
57                         VHOST_LOG_DATA(ERR, "(%s) IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
58                                 dev->ifname, iova);
59                         vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
60                 }
61
62                 vhost_user_iotlb_rd_lock(vq);
63         }
64
65         return 0;
66 }
67
68 #define VHOST_LOG_PAGE  4096
69
70 /*
71  * Atomically set a bit in memory.
72  */
73 static __rte_always_inline void
74 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
75 {
76 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
77         /*
78          * __sync_ built-ins are deprecated, but __atomic_ ones
79          * are sub-optimized in older GCC versions.
80          */
81         __sync_fetch_and_or_1(addr, (1U << nr));
82 #else
83         __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
84 #endif
85 }
86
87 static __rte_always_inline void
88 vhost_log_page(uint8_t *log_base, uint64_t page)
89 {
90         vhost_set_bit(page % 8, &log_base[page / 8]);
91 }
92
93 void
94 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
95 {
96         uint64_t page;
97
98         if (unlikely(!dev->log_base || !len))
99                 return;
100
101         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
102                 return;
103
104         /* To make sure guest memory updates are committed before logging */
105         rte_atomic_thread_fence(__ATOMIC_RELEASE);
106
107         page = addr / VHOST_LOG_PAGE;
108         while (page * VHOST_LOG_PAGE < addr + len) {
109                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
110                 page += 1;
111         }
112 }
113
114 void
115 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
116                              uint64_t iova, uint64_t len)
117 {
118         uint64_t hva, gpa, map_len;
119         map_len = len;
120
121         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
122         if (map_len != len) {
123                 VHOST_LOG_DATA(ERR,
124                         "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
125                         dev->ifname, iova);
126                 return;
127         }
128
129         gpa = hva_to_gpa(dev, hva, len);
130         if (gpa)
131                 __vhost_log_write(dev, gpa, len);
132 }
133
134 void
135 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
136 {
137         unsigned long *log_base;
138         int i;
139
140         if (unlikely(!dev->log_base))
141                 return;
142
143         /* No cache, nothing to sync */
144         if (unlikely(!vq->log_cache))
145                 return;
146
147         rte_atomic_thread_fence(__ATOMIC_RELEASE);
148
149         log_base = (unsigned long *)(uintptr_t)dev->log_base;
150
151         for (i = 0; i < vq->log_cache_nb_elem; i++) {
152                 struct log_cache_entry *elem = vq->log_cache + i;
153
154 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
155                 /*
156                  * '__sync' builtins are deprecated, but '__atomic' ones
157                  * are sub-optimized in older GCC versions.
158                  */
159                 __sync_fetch_and_or(log_base + elem->offset, elem->val);
160 #else
161                 __atomic_fetch_or(log_base + elem->offset, elem->val,
162                                 __ATOMIC_RELAXED);
163 #endif
164         }
165
166         rte_atomic_thread_fence(__ATOMIC_RELEASE);
167
168         vq->log_cache_nb_elem = 0;
169 }
170
171 static __rte_always_inline void
172 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
173                         uint64_t page)
174 {
175         uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
176         uint32_t offset = page / (sizeof(unsigned long) << 3);
177         int i;
178
179         if (unlikely(!vq->log_cache)) {
180                 /* No logging cache allocated, write dirty log map directly */
181                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
182                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
183
184                 return;
185         }
186
187         for (i = 0; i < vq->log_cache_nb_elem; i++) {
188                 struct log_cache_entry *elem = vq->log_cache + i;
189
190                 if (elem->offset == offset) {
191                         elem->val |= (1UL << bit_nr);
192                         return;
193                 }
194         }
195
196         if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
197                 /*
198                  * No more room for a new log cache entry,
199                  * so write the dirty log map directly.
200                  */
201                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
202                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
203
204                 return;
205         }
206
207         vq->log_cache[i].offset = offset;
208         vq->log_cache[i].val = (1UL << bit_nr);
209         vq->log_cache_nb_elem++;
210 }
211
212 void
213 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
214                         uint64_t addr, uint64_t len)
215 {
216         uint64_t page;
217
218         if (unlikely(!dev->log_base || !len))
219                 return;
220
221         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
222                 return;
223
224         page = addr / VHOST_LOG_PAGE;
225         while (page * VHOST_LOG_PAGE < addr + len) {
226                 vhost_log_cache_page(dev, vq, page);
227                 page += 1;
228         }
229 }
230
231 void
232 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
233                              uint64_t iova, uint64_t len)
234 {
235         uint64_t hva, gpa, map_len;
236         map_len = len;
237
238         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
239         if (map_len != len) {
240                 VHOST_LOG_DATA(ERR,
241                         "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
242                         dev->ifname, iova);
243                 return;
244         }
245
246         gpa = hva_to_gpa(dev, hva, len);
247         if (gpa)
248                 __vhost_log_cache_write(dev, vq, gpa, len);
249 }
250
251 void *
252 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
253                 uint64_t desc_addr, uint64_t desc_len)
254 {
255         void *idesc;
256         uint64_t src, dst;
257         uint64_t len, remain = desc_len;
258
259         idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
260         if (unlikely(!idesc))
261                 return NULL;
262
263         dst = (uint64_t)(uintptr_t)idesc;
264
265         while (remain) {
266                 len = remain;
267                 src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
268                                 VHOST_ACCESS_RO);
269                 if (unlikely(!src || !len)) {
270                         rte_free(idesc);
271                         return NULL;
272                 }
273
274                 rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
275
276                 remain -= len;
277                 dst += len;
278                 desc_addr += len;
279         }
280
281         return idesc;
282 }
283
284 void
285 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
286 {
287         if ((vq->callfd >= 0) && (destroy != 0))
288                 close(vq->callfd);
289         if (vq->kickfd >= 0)
290                 close(vq->kickfd);
291 }
292
293 void
294 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
295 {
296         if (!(dev->protocol_features &
297             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
298                 return;
299
300         if (vq_is_packed(dev)) {
301                 if (vq->inflight_packed)
302                         vq->inflight_packed = NULL;
303         } else {
304                 if (vq->inflight_split)
305                         vq->inflight_split = NULL;
306         }
307
308         if (vq->resubmit_inflight) {
309                 if (vq->resubmit_inflight->resubmit_list) {
310                         rte_free(vq->resubmit_inflight->resubmit_list);
311                         vq->resubmit_inflight->resubmit_list = NULL;
312                 }
313                 rte_free(vq->resubmit_inflight);
314                 vq->resubmit_inflight = NULL;
315         }
316 }
317
318 /*
319  * Unmap any memory, close any file descriptors and
320  * free any memory owned by a device.
321  */
322 void
323 cleanup_device(struct virtio_net *dev, int destroy)
324 {
325         uint32_t i;
326
327         vhost_backend_cleanup(dev);
328
329         for (i = 0; i < dev->nr_vring; i++) {
330                 cleanup_vq(dev->virtqueue[i], destroy);
331                 cleanup_vq_inflight(dev, dev->virtqueue[i]);
332         }
333 }
334
335 static void
336 vhost_free_async_mem(struct vhost_virtqueue *vq)
337 {
338         if (!vq->async)
339                 return;
340
341         rte_free(vq->async->pkts_info);
342         rte_free(vq->async->pkts_cmpl_flag);
343
344         rte_free(vq->async->buffers_packed);
345         vq->async->buffers_packed = NULL;
346         rte_free(vq->async->descs_split);
347         vq->async->descs_split = NULL;
348
349         rte_free(vq->async);
350         vq->async = NULL;
351 }
352
353 void
354 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
355 {
356         if (vq_is_packed(dev))
357                 rte_free(vq->shadow_used_packed);
358         else
359                 rte_free(vq->shadow_used_split);
360
361         vhost_free_async_mem(vq);
362         rte_free(vq->batch_copy_elems);
363         rte_mempool_free(vq->iotlb_pool);
364         rte_free(vq->log_cache);
365         rte_free(vq);
366 }
367
368 /*
369  * Release virtqueues and device memory.
370  */
371 static void
372 free_device(struct virtio_net *dev)
373 {
374         uint32_t i;
375
376         for (i = 0; i < dev->nr_vring; i++)
377                 free_vq(dev, dev->virtqueue[i]);
378
379         rte_free(dev);
380 }
381
382 static __rte_always_inline int
383 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
384 {
385         if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
386                 return 0;
387
388         vq->log_guest_addr = translate_log_addr(dev, vq,
389                                                 vq->ring_addrs.log_guest_addr);
390         if (vq->log_guest_addr == 0)
391                 return -1;
392
393         return 0;
394 }
395
396 /*
397  * Converts vring log address to GPA
398  * If IOMMU is enabled, the log address is IOVA
399  * If IOMMU not enabled, the log address is already GPA
400  *
401  * Caller should have iotlb_lock read-locked
402  */
403 uint64_t
404 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
405                 uint64_t log_addr)
406 {
407         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
408                 const uint64_t exp_size = sizeof(uint64_t);
409                 uint64_t hva, gpa;
410                 uint64_t size = exp_size;
411
412                 hva = vhost_iova_to_vva(dev, vq, log_addr,
413                                         &size, VHOST_ACCESS_RW);
414
415                 if (size != exp_size)
416                         return 0;
417
418                 gpa = hva_to_gpa(dev, hva, exp_size);
419                 if (!gpa) {
420                         VHOST_LOG_DATA(ERR,
421                                 "(%s) failed to find GPA for log_addr: 0x%"
422                                 PRIx64 " hva: 0x%" PRIx64 "\n",
423                                 dev->ifname, log_addr, hva);
424                         return 0;
425                 }
426                 return gpa;
427
428         } else
429                 return log_addr;
430 }
431
432 /* Caller should have iotlb_lock read-locked */
433 static int
434 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
435 {
436         uint64_t req_size, size;
437
438         req_size = sizeof(struct vring_desc) * vq->size;
439         size = req_size;
440         vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
441                                                 vq->ring_addrs.desc_user_addr,
442                                                 &size, VHOST_ACCESS_RW);
443         if (!vq->desc || size != req_size)
444                 return -1;
445
446         req_size = sizeof(struct vring_avail);
447         req_size += sizeof(uint16_t) * vq->size;
448         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
449                 req_size += sizeof(uint16_t);
450         size = req_size;
451         vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
452                                                 vq->ring_addrs.avail_user_addr,
453                                                 &size, VHOST_ACCESS_RW);
454         if (!vq->avail || size != req_size)
455                 return -1;
456
457         req_size = sizeof(struct vring_used);
458         req_size += sizeof(struct vring_used_elem) * vq->size;
459         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
460                 req_size += sizeof(uint16_t);
461         size = req_size;
462         vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
463                                                 vq->ring_addrs.used_user_addr,
464                                                 &size, VHOST_ACCESS_RW);
465         if (!vq->used || size != req_size)
466                 return -1;
467
468         return 0;
469 }
470
471 /* Caller should have iotlb_lock read-locked */
472 static int
473 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
474 {
475         uint64_t req_size, size;
476
477         req_size = sizeof(struct vring_packed_desc) * vq->size;
478         size = req_size;
479         vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
480                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
481                                 &size, VHOST_ACCESS_RW);
482         if (!vq->desc_packed || size != req_size)
483                 return -1;
484
485         req_size = sizeof(struct vring_packed_desc_event);
486         size = req_size;
487         vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
488                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
489                                 &size, VHOST_ACCESS_RW);
490         if (!vq->driver_event || size != req_size)
491                 return -1;
492
493         req_size = sizeof(struct vring_packed_desc_event);
494         size = req_size;
495         vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
496                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
497                                 &size, VHOST_ACCESS_RW);
498         if (!vq->device_event || size != req_size)
499                 return -1;
500
501         return 0;
502 }
503
504 int
505 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
506 {
507
508         if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
509                 return -1;
510
511         if (vq_is_packed(dev)) {
512                 if (vring_translate_packed(dev, vq) < 0)
513                         return -1;
514         } else {
515                 if (vring_translate_split(dev, vq) < 0)
516                         return -1;
517         }
518
519         if (log_translate(dev, vq) < 0)
520                 return -1;
521
522         vq->access_ok = true;
523
524         return 0;
525 }
526
527 void
528 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
529 {
530         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
531                 vhost_user_iotlb_wr_lock(vq);
532
533         vq->access_ok = false;
534         vq->desc = NULL;
535         vq->avail = NULL;
536         vq->used = NULL;
537         vq->log_guest_addr = 0;
538
539         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
540                 vhost_user_iotlb_wr_unlock(vq);
541 }
542
543 static void
544 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
545 {
546         struct vhost_virtqueue *vq;
547         int numa_node = SOCKET_ID_ANY;
548
549         if (vring_idx >= VHOST_MAX_VRING) {
550                 VHOST_LOG_CONFIG(ERR, "(%s) failed to init vring, out of bound (%d)\n",
551                                 dev->ifname, vring_idx);
552                 return;
553         }
554
555         vq = dev->virtqueue[vring_idx];
556         if (!vq) {
557                 VHOST_LOG_CONFIG(ERR, "(%s) virtqueue not allocated (%d)\n",
558                                 dev->ifname, vring_idx);
559                 return;
560         }
561
562         memset(vq, 0, sizeof(struct vhost_virtqueue));
563
564         vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
565         vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
566         vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
567
568 #ifdef RTE_LIBRTE_VHOST_NUMA
569         if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
570                 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
571                         dev->ifname, rte_strerror(errno));
572                 numa_node = SOCKET_ID_ANY;
573         }
574 #endif
575         vq->numa_node = numa_node;
576
577         vhost_user_iotlb_init(dev, vring_idx);
578 }
579
580 static void
581 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
582 {
583         struct vhost_virtqueue *vq;
584         int callfd;
585
586         if (vring_idx >= VHOST_MAX_VRING) {
587                 VHOST_LOG_CONFIG(ERR,
588                                 "(%s) failed to reset vring, out of bound (%d)\n",
589                                 dev->ifname, vring_idx);
590                 return;
591         }
592
593         vq = dev->virtqueue[vring_idx];
594         if (!vq) {
595                 VHOST_LOG_CONFIG(ERR, "(%s) failed to reset vring, virtqueue not allocated (%d)\n",
596                                 dev->ifname, vring_idx);
597                 return;
598         }
599
600         callfd = vq->callfd;
601         init_vring_queue(dev, vring_idx);
602         vq->callfd = callfd;
603 }
604
605 int
606 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
607 {
608         struct vhost_virtqueue *vq;
609         uint32_t i;
610
611         /* Also allocate holes, if any, up to requested vring index. */
612         for (i = 0; i <= vring_idx; i++) {
613                 if (dev->virtqueue[i])
614                         continue;
615
616                 vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
617                 if (vq == NULL) {
618                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate memory for vring %u.\n",
619                                         dev->ifname, i);
620                         return -1;
621                 }
622
623                 dev->virtqueue[i] = vq;
624                 init_vring_queue(dev, i);
625                 rte_spinlock_init(&vq->access_lock);
626                 vq->avail_wrap_counter = 1;
627                 vq->used_wrap_counter = 1;
628                 vq->signalled_used_valid = false;
629         }
630
631         dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
632
633         return 0;
634 }
635
636 /*
637  * Reset some variables in device structure, while keeping few
638  * others untouched, such as vid, ifname, nr_vring: they
639  * should be same unless the device is removed.
640  */
641 void
642 reset_device(struct virtio_net *dev)
643 {
644         uint32_t i;
645
646         dev->features = 0;
647         dev->protocol_features = 0;
648         dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
649
650         for (i = 0; i < dev->nr_vring; i++)
651                 reset_vring_queue(dev, i);
652 }
653
654 /*
655  * Invoked when there is a new vhost-user connection established (when
656  * there is a new virtio device being attached).
657  */
658 int
659 vhost_new_device(void)
660 {
661         struct virtio_net *dev;
662         int i;
663
664         pthread_mutex_lock(&vhost_dev_lock);
665         for (i = 0; i < RTE_MAX_VHOST_DEVICE; i++) {
666                 if (vhost_devices[i] == NULL)
667                         break;
668         }
669
670         if (i == RTE_MAX_VHOST_DEVICE) {
671                 VHOST_LOG_CONFIG(ERR, "failed to find a free slot for new device.\n");
672                 pthread_mutex_unlock(&vhost_dev_lock);
673                 return -1;
674         }
675
676         dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
677         if (dev == NULL) {
678                 VHOST_LOG_CONFIG(ERR, "failed to allocate memory for new device.\n");
679                 pthread_mutex_unlock(&vhost_dev_lock);
680                 return -1;
681         }
682
683         vhost_devices[i] = dev;
684         pthread_mutex_unlock(&vhost_dev_lock);
685
686         dev->vid = i;
687         dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
688         dev->slave_req_fd = -1;
689         dev->postcopy_ufd = -1;
690         rte_spinlock_init(&dev->slave_req_lock);
691
692         return i;
693 }
694
695 void
696 vhost_destroy_device_notify(struct virtio_net *dev)
697 {
698         struct rte_vdpa_device *vdpa_dev;
699
700         if (dev->flags & VIRTIO_DEV_RUNNING) {
701                 vdpa_dev = dev->vdpa_dev;
702                 if (vdpa_dev)
703                         vdpa_dev->ops->dev_close(dev->vid);
704                 dev->flags &= ~VIRTIO_DEV_RUNNING;
705                 dev->notify_ops->destroy_device(dev->vid);
706         }
707 }
708
709 /*
710  * Invoked when there is the vhost-user connection is broken (when
711  * the virtio device is being detached).
712  */
713 void
714 vhost_destroy_device(int vid)
715 {
716         struct virtio_net *dev = get_device(vid);
717
718         if (dev == NULL)
719                 return;
720
721         vhost_destroy_device_notify(dev);
722
723         cleanup_device(dev, 1);
724         free_device(dev);
725
726         vhost_devices[vid] = NULL;
727 }
728
729 void
730 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
731 {
732         struct virtio_net *dev = get_device(vid);
733
734         if (dev == NULL)
735                 return;
736
737         dev->vdpa_dev = vdpa_dev;
738 }
739
740 void
741 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
742 {
743         struct virtio_net *dev;
744         unsigned int len;
745
746         dev = get_device(vid);
747         if (dev == NULL)
748                 return;
749
750         len = if_len > sizeof(dev->ifname) ?
751                 sizeof(dev->ifname) : if_len;
752
753         strncpy(dev->ifname, if_name, len);
754         dev->ifname[sizeof(dev->ifname) - 1] = '\0';
755 }
756
757 void
758 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags)
759 {
760         struct virtio_net *dev = get_device(vid);
761
762         if (dev == NULL)
763                 return;
764
765         if (enable)
766                 dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
767         else
768                 dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
769         if (!compliant_ol_flags)
770                 dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
771         else
772                 dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
773 }
774
775 void
776 vhost_enable_extbuf(int vid)
777 {
778         struct virtio_net *dev = get_device(vid);
779
780         if (dev == NULL)
781                 return;
782
783         dev->extbuf = 1;
784 }
785
786 void
787 vhost_enable_linearbuf(int vid)
788 {
789         struct virtio_net *dev = get_device(vid);
790
791         if (dev == NULL)
792                 return;
793
794         dev->linearbuf = 1;
795 }
796
797 int
798 rte_vhost_get_mtu(int vid, uint16_t *mtu)
799 {
800         struct virtio_net *dev = get_device(vid);
801
802         if (dev == NULL || mtu == NULL)
803                 return -ENODEV;
804
805         if (!(dev->flags & VIRTIO_DEV_READY))
806                 return -EAGAIN;
807
808         if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
809                 return -ENOTSUP;
810
811         *mtu = dev->mtu;
812
813         return 0;
814 }
815
816 int
817 rte_vhost_get_numa_node(int vid)
818 {
819 #ifdef RTE_LIBRTE_VHOST_NUMA
820         struct virtio_net *dev = get_device(vid);
821         int numa_node;
822         int ret;
823
824         if (dev == NULL || numa_available() != 0)
825                 return -1;
826
827         ret = get_mempolicy(&numa_node, NULL, 0, dev,
828                             MPOL_F_NODE | MPOL_F_ADDR);
829         if (ret < 0) {
830                 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
831                         dev->ifname, rte_strerror(errno));
832                 return -1;
833         }
834
835         return numa_node;
836 #else
837         RTE_SET_USED(vid);
838         return -1;
839 #endif
840 }
841
842 uint32_t
843 rte_vhost_get_queue_num(int vid)
844 {
845         struct virtio_net *dev = get_device(vid);
846
847         if (dev == NULL)
848                 return 0;
849
850         return dev->nr_vring / 2;
851 }
852
853 uint16_t
854 rte_vhost_get_vring_num(int vid)
855 {
856         struct virtio_net *dev = get_device(vid);
857
858         if (dev == NULL)
859                 return 0;
860
861         return dev->nr_vring;
862 }
863
864 int
865 rte_vhost_get_ifname(int vid, char *buf, size_t len)
866 {
867         struct virtio_net *dev = get_device(vid);
868
869         if (dev == NULL || buf == NULL)
870                 return -1;
871
872         len = RTE_MIN(len, sizeof(dev->ifname));
873
874         strncpy(buf, dev->ifname, len);
875         buf[len - 1] = '\0';
876
877         return 0;
878 }
879
880 int
881 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
882 {
883         struct virtio_net *dev;
884
885         dev = get_device(vid);
886         if (dev == NULL || features == NULL)
887                 return -1;
888
889         *features = dev->features;
890         return 0;
891 }
892
893 int
894 rte_vhost_get_negotiated_protocol_features(int vid,
895                                            uint64_t *protocol_features)
896 {
897         struct virtio_net *dev;
898
899         dev = get_device(vid);
900         if (dev == NULL || protocol_features == NULL)
901                 return -1;
902
903         *protocol_features = dev->protocol_features;
904         return 0;
905 }
906
907 int
908 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
909 {
910         struct virtio_net *dev;
911         struct rte_vhost_memory *m;
912         size_t size;
913
914         dev = get_device(vid);
915         if (dev == NULL || mem == NULL)
916                 return -1;
917
918         size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
919         m = malloc(sizeof(struct rte_vhost_memory) + size);
920         if (!m)
921                 return -1;
922
923         m->nregions = dev->mem->nregions;
924         memcpy(m->regions, dev->mem->regions, size);
925         *mem = m;
926
927         return 0;
928 }
929
930 int
931 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
932                           struct rte_vhost_vring *vring)
933 {
934         struct virtio_net *dev;
935         struct vhost_virtqueue *vq;
936
937         dev = get_device(vid);
938         if (dev == NULL || vring == NULL)
939                 return -1;
940
941         if (vring_idx >= VHOST_MAX_VRING)
942                 return -1;
943
944         vq = dev->virtqueue[vring_idx];
945         if (!vq)
946                 return -1;
947
948         if (vq_is_packed(dev)) {
949                 vring->desc_packed = vq->desc_packed;
950                 vring->driver_event = vq->driver_event;
951                 vring->device_event = vq->device_event;
952         } else {
953                 vring->desc = vq->desc;
954                 vring->avail = vq->avail;
955                 vring->used = vq->used;
956         }
957         vring->log_guest_addr  = vq->log_guest_addr;
958
959         vring->callfd  = vq->callfd;
960         vring->kickfd  = vq->kickfd;
961         vring->size    = vq->size;
962
963         return 0;
964 }
965
966 int
967 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
968                                   struct rte_vhost_ring_inflight *vring)
969 {
970         struct virtio_net *dev;
971         struct vhost_virtqueue *vq;
972
973         dev = get_device(vid);
974         if (unlikely(!dev))
975                 return -1;
976
977         if (vring_idx >= VHOST_MAX_VRING)
978                 return -1;
979
980         vq = dev->virtqueue[vring_idx];
981         if (unlikely(!vq))
982                 return -1;
983
984         if (vq_is_packed(dev)) {
985                 if (unlikely(!vq->inflight_packed))
986                         return -1;
987
988                 vring->inflight_packed = vq->inflight_packed;
989         } else {
990                 if (unlikely(!vq->inflight_split))
991                         return -1;
992
993                 vring->inflight_split = vq->inflight_split;
994         }
995
996         vring->resubmit_inflight = vq->resubmit_inflight;
997
998         return 0;
999 }
1000
1001 int
1002 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
1003                                   uint16_t idx)
1004 {
1005         struct vhost_virtqueue *vq;
1006         struct virtio_net *dev;
1007
1008         dev = get_device(vid);
1009         if (unlikely(!dev))
1010                 return -1;
1011
1012         if (unlikely(!(dev->protocol_features &
1013             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1014                 return 0;
1015
1016         if (unlikely(vq_is_packed(dev)))
1017                 return -1;
1018
1019         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1020                 return -1;
1021
1022         vq = dev->virtqueue[vring_idx];
1023         if (unlikely(!vq))
1024                 return -1;
1025
1026         if (unlikely(!vq->inflight_split))
1027                 return -1;
1028
1029         if (unlikely(idx >= vq->size))
1030                 return -1;
1031
1032         vq->inflight_split->desc[idx].counter = vq->global_counter++;
1033         vq->inflight_split->desc[idx].inflight = 1;
1034         return 0;
1035 }
1036
1037 int
1038 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1039                                    uint16_t head, uint16_t last,
1040                                    uint16_t *inflight_entry)
1041 {
1042         struct rte_vhost_inflight_info_packed *inflight_info;
1043         struct virtio_net *dev;
1044         struct vhost_virtqueue *vq;
1045         struct vring_packed_desc *desc;
1046         uint16_t old_free_head, free_head;
1047
1048         dev = get_device(vid);
1049         if (unlikely(!dev))
1050                 return -1;
1051
1052         if (unlikely(!(dev->protocol_features &
1053             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1054                 return 0;
1055
1056         if (unlikely(!vq_is_packed(dev)))
1057                 return -1;
1058
1059         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1060                 return -1;
1061
1062         vq = dev->virtqueue[vring_idx];
1063         if (unlikely(!vq))
1064                 return -1;
1065
1066         inflight_info = vq->inflight_packed;
1067         if (unlikely(!inflight_info))
1068                 return -1;
1069
1070         if (unlikely(head >= vq->size))
1071                 return -1;
1072
1073         desc = vq->desc_packed;
1074         old_free_head = inflight_info->old_free_head;
1075         if (unlikely(old_free_head >= vq->size))
1076                 return -1;
1077
1078         free_head = old_free_head;
1079
1080         /* init header descriptor */
1081         inflight_info->desc[old_free_head].num = 0;
1082         inflight_info->desc[old_free_head].counter = vq->global_counter++;
1083         inflight_info->desc[old_free_head].inflight = 1;
1084
1085         /* save desc entry in flight entry */
1086         while (head != ((last + 1) % vq->size)) {
1087                 inflight_info->desc[old_free_head].num++;
1088                 inflight_info->desc[free_head].addr = desc[head].addr;
1089                 inflight_info->desc[free_head].len = desc[head].len;
1090                 inflight_info->desc[free_head].flags = desc[head].flags;
1091                 inflight_info->desc[free_head].id = desc[head].id;
1092
1093                 inflight_info->desc[old_free_head].last = free_head;
1094                 free_head = inflight_info->desc[free_head].next;
1095                 inflight_info->free_head = free_head;
1096                 head = (head + 1) % vq->size;
1097         }
1098
1099         inflight_info->old_free_head = free_head;
1100         *inflight_entry = old_free_head;
1101
1102         return 0;
1103 }
1104
1105 int
1106 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1107                                   uint16_t last_used_idx, uint16_t idx)
1108 {
1109         struct virtio_net *dev;
1110         struct vhost_virtqueue *vq;
1111
1112         dev = get_device(vid);
1113         if (unlikely(!dev))
1114                 return -1;
1115
1116         if (unlikely(!(dev->protocol_features &
1117             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1118                 return 0;
1119
1120         if (unlikely(vq_is_packed(dev)))
1121                 return -1;
1122
1123         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1124                 return -1;
1125
1126         vq = dev->virtqueue[vring_idx];
1127         if (unlikely(!vq))
1128                 return -1;
1129
1130         if (unlikely(!vq->inflight_split))
1131                 return -1;
1132
1133         if (unlikely(idx >= vq->size))
1134                 return -1;
1135
1136         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1137
1138         vq->inflight_split->desc[idx].inflight = 0;
1139
1140         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1141
1142         vq->inflight_split->used_idx = last_used_idx;
1143         return 0;
1144 }
1145
1146 int
1147 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1148                                    uint16_t head)
1149 {
1150         struct rte_vhost_inflight_info_packed *inflight_info;
1151         struct virtio_net *dev;
1152         struct vhost_virtqueue *vq;
1153
1154         dev = get_device(vid);
1155         if (unlikely(!dev))
1156                 return -1;
1157
1158         if (unlikely(!(dev->protocol_features &
1159             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1160                 return 0;
1161
1162         if (unlikely(!vq_is_packed(dev)))
1163                 return -1;
1164
1165         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1166                 return -1;
1167
1168         vq = dev->virtqueue[vring_idx];
1169         if (unlikely(!vq))
1170                 return -1;
1171
1172         inflight_info = vq->inflight_packed;
1173         if (unlikely(!inflight_info))
1174                 return -1;
1175
1176         if (unlikely(head >= vq->size))
1177                 return -1;
1178
1179         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1180
1181         inflight_info->desc[head].inflight = 0;
1182
1183         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1184
1185         inflight_info->old_free_head = inflight_info->free_head;
1186         inflight_info->old_used_idx = inflight_info->used_idx;
1187         inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1188
1189         return 0;
1190 }
1191
1192 int
1193 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1194                                      uint16_t idx)
1195 {
1196         struct virtio_net *dev;
1197         struct vhost_virtqueue *vq;
1198
1199         dev = get_device(vid);
1200         if (unlikely(!dev))
1201                 return -1;
1202
1203         if (unlikely(!(dev->protocol_features &
1204             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1205                 return 0;
1206
1207         if (unlikely(vq_is_packed(dev)))
1208                 return -1;
1209
1210         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1211                 return -1;
1212
1213         vq = dev->virtqueue[vring_idx];
1214         if (unlikely(!vq))
1215                 return -1;
1216
1217         if (unlikely(!vq->inflight_split))
1218                 return -1;
1219
1220         if (unlikely(idx >= vq->size))
1221                 return -1;
1222
1223         vq->inflight_split->last_inflight_io = idx;
1224         return 0;
1225 }
1226
1227 int
1228 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1229                                       uint16_t head)
1230 {
1231         struct rte_vhost_inflight_info_packed *inflight_info;
1232         struct virtio_net *dev;
1233         struct vhost_virtqueue *vq;
1234         uint16_t last;
1235
1236         dev = get_device(vid);
1237         if (unlikely(!dev))
1238                 return -1;
1239
1240         if (unlikely(!(dev->protocol_features &
1241             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1242                 return 0;
1243
1244         if (unlikely(!vq_is_packed(dev)))
1245                 return -1;
1246
1247         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1248                 return -1;
1249
1250         vq = dev->virtqueue[vring_idx];
1251         if (unlikely(!vq))
1252                 return -1;
1253
1254         inflight_info = vq->inflight_packed;
1255         if (unlikely(!inflight_info))
1256                 return -1;
1257
1258         if (unlikely(head >= vq->size))
1259                 return -1;
1260
1261         last = inflight_info->desc[head].last;
1262         if (unlikely(last >= vq->size))
1263                 return -1;
1264
1265         inflight_info->desc[last].next = inflight_info->free_head;
1266         inflight_info->free_head = head;
1267         inflight_info->used_idx += inflight_info->desc[head].num;
1268         if (inflight_info->used_idx >= inflight_info->desc_num) {
1269                 inflight_info->used_idx -= inflight_info->desc_num;
1270                 inflight_info->used_wrap_counter =
1271                         !inflight_info->used_wrap_counter;
1272         }
1273
1274         return 0;
1275 }
1276
1277 int
1278 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1279 {
1280         struct virtio_net *dev;
1281         struct vhost_virtqueue *vq;
1282
1283         dev = get_device(vid);
1284         if (!dev)
1285                 return -1;
1286
1287         if (vring_idx >= VHOST_MAX_VRING)
1288                 return -1;
1289
1290         vq = dev->virtqueue[vring_idx];
1291         if (!vq)
1292                 return -1;
1293
1294         if (vq_is_packed(dev))
1295                 vhost_vring_call_packed(dev, vq);
1296         else
1297                 vhost_vring_call_split(dev, vq);
1298
1299         return 0;
1300 }
1301
1302 uint16_t
1303 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1304 {
1305         struct virtio_net *dev;
1306         struct vhost_virtqueue *vq;
1307         uint16_t ret = 0;
1308
1309         dev = get_device(vid);
1310         if (!dev)
1311                 return 0;
1312
1313         if (queue_id >= VHOST_MAX_VRING)
1314                 return 0;
1315
1316         vq = dev->virtqueue[queue_id];
1317         if (!vq)
1318                 return 0;
1319
1320         rte_spinlock_lock(&vq->access_lock);
1321
1322         if (unlikely(!vq->enabled || vq->avail == NULL))
1323                 goto out;
1324
1325         ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1326
1327 out:
1328         rte_spinlock_unlock(&vq->access_lock);
1329         return ret;
1330 }
1331
1332 static inline int
1333 vhost_enable_notify_split(struct virtio_net *dev,
1334                 struct vhost_virtqueue *vq, int enable)
1335 {
1336         if (vq->used == NULL)
1337                 return -1;
1338
1339         if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1340                 if (enable)
1341                         vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1342                 else
1343                         vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1344         } else {
1345                 if (enable)
1346                         vhost_avail_event(vq) = vq->last_avail_idx;
1347         }
1348         return 0;
1349 }
1350
1351 static inline int
1352 vhost_enable_notify_packed(struct virtio_net *dev,
1353                 struct vhost_virtqueue *vq, int enable)
1354 {
1355         uint16_t flags;
1356
1357         if (vq->device_event == NULL)
1358                 return -1;
1359
1360         if (!enable) {
1361                 vq->device_event->flags = VRING_EVENT_F_DISABLE;
1362                 return 0;
1363         }
1364
1365         flags = VRING_EVENT_F_ENABLE;
1366         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1367                 flags = VRING_EVENT_F_DESC;
1368                 vq->device_event->off_wrap = vq->last_avail_idx |
1369                         vq->avail_wrap_counter << 15;
1370         }
1371
1372         rte_atomic_thread_fence(__ATOMIC_RELEASE);
1373
1374         vq->device_event->flags = flags;
1375         return 0;
1376 }
1377
1378 int
1379 vhost_enable_guest_notification(struct virtio_net *dev,
1380                 struct vhost_virtqueue *vq, int enable)
1381 {
1382         /*
1383          * If the virtqueue is not ready yet, it will be applied
1384          * when it will become ready.
1385          */
1386         if (!vq->ready)
1387                 return 0;
1388
1389         if (vq_is_packed(dev))
1390                 return vhost_enable_notify_packed(dev, vq, enable);
1391         else
1392                 return vhost_enable_notify_split(dev, vq, enable);
1393 }
1394
1395 int
1396 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1397 {
1398         struct virtio_net *dev = get_device(vid);
1399         struct vhost_virtqueue *vq;
1400         int ret;
1401
1402         if (!dev)
1403                 return -1;
1404
1405         if (queue_id >= VHOST_MAX_VRING)
1406                 return -1;
1407
1408         vq = dev->virtqueue[queue_id];
1409         if (!vq)
1410                 return -1;
1411
1412         rte_spinlock_lock(&vq->access_lock);
1413
1414         vq->notif_enable = enable;
1415         ret = vhost_enable_guest_notification(dev, vq, enable);
1416
1417         rte_spinlock_unlock(&vq->access_lock);
1418
1419         return ret;
1420 }
1421
1422 void
1423 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1424 {
1425         struct virtio_net *dev = get_device(vid);
1426
1427         if (dev == NULL)
1428                 return;
1429
1430         vhost_log_write(dev, addr, len);
1431 }
1432
1433 void
1434 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1435                          uint64_t offset, uint64_t len)
1436 {
1437         struct virtio_net *dev;
1438         struct vhost_virtqueue *vq;
1439
1440         dev = get_device(vid);
1441         if (dev == NULL)
1442                 return;
1443
1444         if (vring_idx >= VHOST_MAX_VRING)
1445                 return;
1446         vq = dev->virtqueue[vring_idx];
1447         if (!vq)
1448                 return;
1449
1450         vhost_log_used_vring(dev, vq, offset, len);
1451 }
1452
1453 uint32_t
1454 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1455 {
1456         struct virtio_net *dev;
1457         struct vhost_virtqueue *vq;
1458         uint32_t ret = 0;
1459
1460         dev = get_device(vid);
1461         if (dev == NULL)
1462                 return 0;
1463
1464         if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1465                 VHOST_LOG_DATA(ERR, "(%s) %s: invalid virtqueue idx %d.\n",
1466                         dev->ifname, __func__, qid);
1467                 return 0;
1468         }
1469
1470         vq = dev->virtqueue[qid];
1471         if (vq == NULL)
1472                 return 0;
1473
1474         rte_spinlock_lock(&vq->access_lock);
1475
1476         if (unlikely(!vq->enabled || vq->avail == NULL))
1477                 goto out;
1478
1479         ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1480
1481 out:
1482         rte_spinlock_unlock(&vq->access_lock);
1483         return ret;
1484 }
1485
1486 struct rte_vdpa_device *
1487 rte_vhost_get_vdpa_device(int vid)
1488 {
1489         struct virtio_net *dev = get_device(vid);
1490
1491         if (dev == NULL)
1492                 return NULL;
1493
1494         return dev->vdpa_dev;
1495 }
1496
1497 int
1498 rte_vhost_get_log_base(int vid, uint64_t *log_base,
1499                 uint64_t *log_size)
1500 {
1501         struct virtio_net *dev = get_device(vid);
1502
1503         if (dev == NULL || log_base == NULL || log_size == NULL)
1504                 return -1;
1505
1506         *log_base = dev->log_base;
1507         *log_size = dev->log_size;
1508
1509         return 0;
1510 }
1511
1512 int
1513 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1514                 uint16_t *last_avail_idx, uint16_t *last_used_idx)
1515 {
1516         struct vhost_virtqueue *vq;
1517         struct virtio_net *dev = get_device(vid);
1518
1519         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1520                 return -1;
1521
1522         if (queue_id >= VHOST_MAX_VRING)
1523                 return -1;
1524
1525         vq = dev->virtqueue[queue_id];
1526         if (!vq)
1527                 return -1;
1528
1529         if (vq_is_packed(dev)) {
1530                 *last_avail_idx = (vq->avail_wrap_counter << 15) |
1531                                   vq->last_avail_idx;
1532                 *last_used_idx = (vq->used_wrap_counter << 15) |
1533                                  vq->last_used_idx;
1534         } else {
1535                 *last_avail_idx = vq->last_avail_idx;
1536                 *last_used_idx = vq->last_used_idx;
1537         }
1538
1539         return 0;
1540 }
1541
1542 int
1543 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1544                 uint16_t last_avail_idx, uint16_t last_used_idx)
1545 {
1546         struct vhost_virtqueue *vq;
1547         struct virtio_net *dev = get_device(vid);
1548
1549         if (!dev)
1550                 return -1;
1551
1552         if (queue_id >= VHOST_MAX_VRING)
1553                 return -1;
1554
1555         vq = dev->virtqueue[queue_id];
1556         if (!vq)
1557                 return -1;
1558
1559         if (vq_is_packed(dev)) {
1560                 vq->last_avail_idx = last_avail_idx & 0x7fff;
1561                 vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1562                 vq->last_used_idx = last_used_idx & 0x7fff;
1563                 vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1564         } else {
1565                 vq->last_avail_idx = last_avail_idx;
1566                 vq->last_used_idx = last_used_idx;
1567         }
1568
1569         return 0;
1570 }
1571
1572 int
1573 rte_vhost_get_vring_base_from_inflight(int vid,
1574                                        uint16_t queue_id,
1575                                        uint16_t *last_avail_idx,
1576                                        uint16_t *last_used_idx)
1577 {
1578         struct rte_vhost_inflight_info_packed *inflight_info;
1579         struct vhost_virtqueue *vq;
1580         struct virtio_net *dev = get_device(vid);
1581
1582         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1583                 return -1;
1584
1585         if (queue_id >= VHOST_MAX_VRING)
1586                 return -1;
1587
1588         vq = dev->virtqueue[queue_id];
1589         if (!vq)
1590                 return -1;
1591
1592         if (!vq_is_packed(dev))
1593                 return -1;
1594
1595         inflight_info = vq->inflight_packed;
1596         if (!inflight_info)
1597                 return -1;
1598
1599         *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1600                           inflight_info->old_used_idx;
1601         *last_used_idx = *last_avail_idx;
1602
1603         return 0;
1604 }
1605
1606 int
1607 rte_vhost_extern_callback_register(int vid,
1608                 struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1609 {
1610         struct virtio_net *dev = get_device(vid);
1611
1612         if (dev == NULL || ops == NULL)
1613                 return -1;
1614
1615         dev->extern_ops = *ops;
1616         dev->extern_data = ctx;
1617         return 0;
1618 }
1619
1620 static __rte_always_inline int
1621 async_channel_register(int vid, uint16_t queue_id)
1622 {
1623         struct virtio_net *dev = get_device(vid);
1624         struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
1625         struct vhost_async *async;
1626         int node = vq->numa_node;
1627
1628         if (unlikely(vq->async)) {
1629                 VHOST_LOG_CONFIG(ERR,
1630                                 "(%s) async register failed: already registered (qid: %d)\n",
1631                                 dev->ifname, queue_id);
1632                 return -1;
1633         }
1634
1635         async = rte_zmalloc_socket(NULL, sizeof(struct vhost_async), 0, node);
1636         if (!async) {
1637                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async metadata (qid: %d)\n",
1638                                 dev->ifname, queue_id);
1639                 return -1;
1640         }
1641
1642         async->pkts_info = rte_malloc_socket(NULL, vq->size * sizeof(struct async_inflight_info),
1643                         RTE_CACHE_LINE_SIZE, node);
1644         if (!async->pkts_info) {
1645                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async_pkts_info (qid: %d)\n",
1646                                 dev->ifname, queue_id);
1647                 goto out_free_async;
1648         }
1649
1650         async->pkts_cmpl_flag = rte_zmalloc_socket(NULL, vq->size * sizeof(bool),
1651                         RTE_CACHE_LINE_SIZE, node);
1652         if (!async->pkts_cmpl_flag) {
1653                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async pkts_cmpl_flag (qid: %d)\n",
1654                                 dev->ifname, queue_id);
1655                 goto out_free_async;
1656         }
1657
1658         if (vq_is_packed(dev)) {
1659                 async->buffers_packed = rte_malloc_socket(NULL,
1660                                 vq->size * sizeof(struct vring_used_elem_packed),
1661                                 RTE_CACHE_LINE_SIZE, node);
1662                 if (!async->buffers_packed) {
1663                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async buffers (qid: %d)\n",
1664                                         dev->ifname, queue_id);
1665                         goto out_free_inflight;
1666                 }
1667         } else {
1668                 async->descs_split = rte_malloc_socket(NULL,
1669                                 vq->size * sizeof(struct vring_used_elem),
1670                                 RTE_CACHE_LINE_SIZE, node);
1671                 if (!async->descs_split) {
1672                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async descs (qid: %d)\n",
1673                                         dev->ifname, queue_id);
1674                         goto out_free_inflight;
1675                 }
1676         }
1677
1678         vq->async = async;
1679
1680         return 0;
1681 out_free_inflight:
1682         rte_free(async->pkts_info);
1683 out_free_async:
1684         rte_free(async);
1685
1686         return -1;
1687 }
1688
1689 int
1690 rte_vhost_async_channel_register(int vid, uint16_t queue_id)
1691 {
1692         struct vhost_virtqueue *vq;
1693         struct virtio_net *dev = get_device(vid);
1694         int ret;
1695
1696         if (dev == NULL)
1697                 return -1;
1698
1699         if (queue_id >= VHOST_MAX_VRING)
1700                 return -1;
1701
1702         vq = dev->virtqueue[queue_id];
1703
1704         if (unlikely(vq == NULL || !dev->async_copy))
1705                 return -1;
1706
1707         rte_spinlock_lock(&vq->access_lock);
1708         ret = async_channel_register(vid, queue_id);
1709         rte_spinlock_unlock(&vq->access_lock);
1710
1711         return ret;
1712 }
1713
1714 int
1715 rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id)
1716 {
1717         struct vhost_virtqueue *vq;
1718         struct virtio_net *dev = get_device(vid);
1719
1720         if (dev == NULL)
1721                 return -1;
1722
1723         if (queue_id >= VHOST_MAX_VRING)
1724                 return -1;
1725
1726         vq = dev->virtqueue[queue_id];
1727
1728         if (unlikely(vq == NULL || !dev->async_copy))
1729                 return -1;
1730
1731         return async_channel_register(vid, queue_id);
1732 }
1733
1734 int
1735 rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1736 {
1737         struct vhost_virtqueue *vq;
1738         struct virtio_net *dev = get_device(vid);
1739         int ret = -1;
1740
1741         if (dev == NULL)
1742                 return ret;
1743
1744         if (queue_id >= VHOST_MAX_VRING)
1745                 return ret;
1746
1747         vq = dev->virtqueue[queue_id];
1748
1749         if (vq == NULL)
1750                 return ret;
1751
1752         ret = 0;
1753
1754         if (!vq->async)
1755                 return ret;
1756
1757         if (!rte_spinlock_trylock(&vq->access_lock)) {
1758                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel, virtqueue busy.\n",
1759                                 dev->ifname);
1760                 return -1;
1761         }
1762
1763         if (vq->async->pkts_inflight_n) {
1764                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1765                 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1766                         dev->ifname);
1767                 ret = -1;
1768                 goto out;
1769         }
1770
1771         vhost_free_async_mem(vq);
1772 out:
1773         rte_spinlock_unlock(&vq->access_lock);
1774
1775         return ret;
1776 }
1777
1778 int
1779 rte_vhost_async_channel_unregister_thread_unsafe(int vid, uint16_t queue_id)
1780 {
1781         struct vhost_virtqueue *vq;
1782         struct virtio_net *dev = get_device(vid);
1783
1784         if (dev == NULL)
1785                 return -1;
1786
1787         if (queue_id >= VHOST_MAX_VRING)
1788                 return -1;
1789
1790         vq = dev->virtqueue[queue_id];
1791
1792         if (vq == NULL)
1793                 return -1;
1794
1795         if (!vq->async)
1796                 return 0;
1797
1798         if (vq->async->pkts_inflight_n) {
1799                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1800                 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1801                         dev->ifname);
1802                 return -1;
1803         }
1804
1805         vhost_free_async_mem(vq);
1806
1807         return 0;
1808 }
1809
1810 int
1811 rte_vhost_async_dma_configure(int16_t dma_id, uint16_t vchan_id)
1812 {
1813         struct rte_dma_info info;
1814         void *pkts_cmpl_flag_addr;
1815         uint16_t max_desc;
1816
1817         if (!rte_dma_is_valid(dma_id)) {
1818                 VHOST_LOG_CONFIG(ERR, "DMA %d is not found.\n", dma_id);
1819                 return -1;
1820         }
1821
1822         rte_dma_info_get(dma_id, &info);
1823         if (vchan_id >= info.max_vchans) {
1824                 VHOST_LOG_CONFIG(ERR, "Invalid DMA %d vChannel %u.\n", dma_id, vchan_id);
1825                 return -1;
1826         }
1827
1828         if (!dma_copy_track[dma_id].vchans) {
1829                 struct async_dma_vchan_info *vchans;
1830
1831                 vchans = rte_zmalloc(NULL, sizeof(struct async_dma_vchan_info) * info.max_vchans,
1832                                 RTE_CACHE_LINE_SIZE);
1833                 if (vchans == NULL) {
1834                         VHOST_LOG_CONFIG(ERR, "Failed to allocate vchans for DMA %d vChannel %u.\n",
1835                                         dma_id, vchan_id);
1836                         return -1;
1837                 }
1838
1839                 dma_copy_track[dma_id].vchans = vchans;
1840         }
1841
1842         if (dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr) {
1843                 VHOST_LOG_CONFIG(INFO, "DMA %d vChannel %u already registered.\n", dma_id,
1844                                 vchan_id);
1845                 return 0;
1846         }
1847
1848         max_desc = info.max_desc;
1849         if (!rte_is_power_of_2(max_desc))
1850                 max_desc = rte_align32pow2(max_desc);
1851
1852         pkts_cmpl_flag_addr = rte_zmalloc(NULL, sizeof(bool *) * max_desc, RTE_CACHE_LINE_SIZE);
1853         if (!pkts_cmpl_flag_addr) {
1854                 VHOST_LOG_CONFIG(ERR, "Failed to allocate pkts_cmpl_flag_addr for DMA %d "
1855                                 "vChannel %u.\n", dma_id, vchan_id);
1856
1857                 if (dma_copy_track[dma_id].nr_vchans == 0) {
1858                         rte_free(dma_copy_track[dma_id].vchans);
1859                         dma_copy_track[dma_id].vchans = NULL;
1860                 }
1861                 return -1;
1862         }
1863
1864         dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr = pkts_cmpl_flag_addr;
1865         dma_copy_track[dma_id].vchans[vchan_id].ring_size = max_desc;
1866         dma_copy_track[dma_id].vchans[vchan_id].ring_mask = max_desc - 1;
1867         dma_copy_track[dma_id].nr_vchans++;
1868
1869         return 0;
1870 }
1871
1872 int
1873 rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
1874 {
1875         struct vhost_virtqueue *vq;
1876         struct virtio_net *dev = get_device(vid);
1877         int ret = -1;
1878
1879         if (dev == NULL)
1880                 return ret;
1881
1882         if (queue_id >= VHOST_MAX_VRING)
1883                 return ret;
1884
1885         vq = dev->virtqueue[queue_id];
1886
1887         if (vq == NULL)
1888                 return ret;
1889
1890         if (!vq->async)
1891                 return ret;
1892
1893         if (!rte_spinlock_trylock(&vq->access_lock)) {
1894                 VHOST_LOG_CONFIG(DEBUG,
1895                         "(%s) failed to check in-flight packets. virtqueue busy.\n",
1896                         dev->ifname);
1897                 return ret;
1898         }
1899
1900         ret = vq->async->pkts_inflight_n;
1901         rte_spinlock_unlock(&vq->access_lock);
1902
1903         return ret;
1904 }
1905
1906 int
1907 rte_vhost_get_monitor_addr(int vid, uint16_t queue_id,
1908                 struct rte_vhost_power_monitor_cond *pmc)
1909 {
1910         struct virtio_net *dev = get_device(vid);
1911         struct vhost_virtqueue *vq;
1912
1913         if (dev == NULL)
1914                 return -1;
1915         if (queue_id >= VHOST_MAX_VRING)
1916                 return -1;
1917
1918         vq = dev->virtqueue[queue_id];
1919         if (vq == NULL)
1920                 return -1;
1921
1922         if (vq_is_packed(dev)) {
1923                 struct vring_packed_desc *desc;
1924                 desc = vq->desc_packed;
1925                 pmc->addr = &desc[vq->last_avail_idx].flags;
1926                 if (vq->avail_wrap_counter)
1927                         pmc->val = VRING_DESC_F_AVAIL;
1928                 else
1929                         pmc->val = VRING_DESC_F_USED;
1930                 pmc->mask = VRING_DESC_F_AVAIL | VRING_DESC_F_USED;
1931                 pmc->size = sizeof(desc[vq->last_avail_idx].flags);
1932                 pmc->match = 1;
1933         } else {
1934                 pmc->addr = &vq->avail->idx;
1935                 pmc->val = vq->last_avail_idx & (vq->size - 1);
1936                 pmc->mask = vq->size - 1;
1937                 pmc->size = sizeof(vq->avail->idx);
1938                 pmc->match = 0;
1939         }
1940
1941         return 0;
1942 }
1943
1944 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
1945 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);