crypto/dpaa_sec: add debug prints
[dpdk.git] / lib / vhost / vhost.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #ifdef RTE_LIBRTE_VHOST_NUMA
11 #include <numa.h>
12 #include <numaif.h>
13 #endif
14
15 #include <rte_errno.h>
16 #include <rte_ethdev.h>
17 #include <rte_log.h>
18 #include <rte_string_fns.h>
19 #include <rte_memory.h>
20 #include <rte_malloc.h>
21 #include <rte_vhost.h>
22 #include <rte_rwlock.h>
23
24 #include "iotlb.h"
25 #include "vhost.h"
26 #include "vhost_user.h"
27
28 struct virtio_net *vhost_devices[RTE_MAX_VHOST_DEVICE];
29 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
30
31 /* Called with iotlb_lock read-locked */
32 uint64_t
33 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
34                     uint64_t iova, uint64_t *size, uint8_t perm)
35 {
36         uint64_t vva, tmp_size;
37
38         if (unlikely(!*size))
39                 return 0;
40
41         tmp_size = *size;
42
43         vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
44         if (tmp_size == *size)
45                 return vva;
46
47         iova += tmp_size;
48
49         if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
50                 /*
51                  * iotlb_lock is read-locked for a full burst,
52                  * but it only protects the iotlb cache.
53                  * In case of IOTLB miss, we might block on the socket,
54                  * which could cause a deadlock with QEMU if an IOTLB update
55                  * is being handled. We can safely unlock here to avoid it.
56                  */
57                 vhost_user_iotlb_rd_unlock(vq);
58
59                 vhost_user_iotlb_pending_insert(dev, vq, iova, perm);
60                 if (vhost_user_iotlb_miss(dev, iova, perm)) {
61                         VHOST_LOG_DATA(ERR, "(%s) IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
62                                 dev->ifname, iova);
63                         vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
64                 }
65
66                 vhost_user_iotlb_rd_lock(vq);
67         }
68
69         return 0;
70 }
71
72 #define VHOST_LOG_PAGE  4096
73
74 /*
75  * Atomically set a bit in memory.
76  */
77 static __rte_always_inline void
78 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
79 {
80 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
81         /*
82          * __sync_ built-ins are deprecated, but __atomic_ ones
83          * are sub-optimized in older GCC versions.
84          */
85         __sync_fetch_and_or_1(addr, (1U << nr));
86 #else
87         __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
88 #endif
89 }
90
91 static __rte_always_inline void
92 vhost_log_page(uint8_t *log_base, uint64_t page)
93 {
94         vhost_set_bit(page % 8, &log_base[page / 8]);
95 }
96
97 void
98 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
99 {
100         uint64_t page;
101
102         if (unlikely(!dev->log_base || !len))
103                 return;
104
105         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
106                 return;
107
108         /* To make sure guest memory updates are committed before logging */
109         rte_atomic_thread_fence(__ATOMIC_RELEASE);
110
111         page = addr / VHOST_LOG_PAGE;
112         while (page * VHOST_LOG_PAGE < addr + len) {
113                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
114                 page += 1;
115         }
116 }
117
118 void
119 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
120                              uint64_t iova, uint64_t len)
121 {
122         uint64_t hva, gpa, map_len;
123         map_len = len;
124
125         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
126         if (map_len != len) {
127                 VHOST_LOG_DATA(ERR,
128                         "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
129                         dev->ifname, iova);
130                 return;
131         }
132
133         gpa = hva_to_gpa(dev, hva, len);
134         if (gpa)
135                 __vhost_log_write(dev, gpa, len);
136 }
137
138 void
139 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
140 {
141         unsigned long *log_base;
142         int i;
143
144         if (unlikely(!dev->log_base))
145                 return;
146
147         /* No cache, nothing to sync */
148         if (unlikely(!vq->log_cache))
149                 return;
150
151         rte_atomic_thread_fence(__ATOMIC_RELEASE);
152
153         log_base = (unsigned long *)(uintptr_t)dev->log_base;
154
155         for (i = 0; i < vq->log_cache_nb_elem; i++) {
156                 struct log_cache_entry *elem = vq->log_cache + i;
157
158 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
159                 /*
160                  * '__sync' builtins are deprecated, but '__atomic' ones
161                  * are sub-optimized in older GCC versions.
162                  */
163                 __sync_fetch_and_or(log_base + elem->offset, elem->val);
164 #else
165                 __atomic_fetch_or(log_base + elem->offset, elem->val,
166                                 __ATOMIC_RELAXED);
167 #endif
168         }
169
170         rte_atomic_thread_fence(__ATOMIC_RELEASE);
171
172         vq->log_cache_nb_elem = 0;
173 }
174
175 static __rte_always_inline void
176 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
177                         uint64_t page)
178 {
179         uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
180         uint32_t offset = page / (sizeof(unsigned long) << 3);
181         int i;
182
183         if (unlikely(!vq->log_cache)) {
184                 /* No logging cache allocated, write dirty log map directly */
185                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
186                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
187
188                 return;
189         }
190
191         for (i = 0; i < vq->log_cache_nb_elem; i++) {
192                 struct log_cache_entry *elem = vq->log_cache + i;
193
194                 if (elem->offset == offset) {
195                         elem->val |= (1UL << bit_nr);
196                         return;
197                 }
198         }
199
200         if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
201                 /*
202                  * No more room for a new log cache entry,
203                  * so write the dirty log map directly.
204                  */
205                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
206                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
207
208                 return;
209         }
210
211         vq->log_cache[i].offset = offset;
212         vq->log_cache[i].val = (1UL << bit_nr);
213         vq->log_cache_nb_elem++;
214 }
215
216 void
217 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
218                         uint64_t addr, uint64_t len)
219 {
220         uint64_t page;
221
222         if (unlikely(!dev->log_base || !len))
223                 return;
224
225         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
226                 return;
227
228         page = addr / VHOST_LOG_PAGE;
229         while (page * VHOST_LOG_PAGE < addr + len) {
230                 vhost_log_cache_page(dev, vq, page);
231                 page += 1;
232         }
233 }
234
235 void
236 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
237                              uint64_t iova, uint64_t len)
238 {
239         uint64_t hva, gpa, map_len;
240         map_len = len;
241
242         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
243         if (map_len != len) {
244                 VHOST_LOG_DATA(ERR,
245                         "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
246                         dev->ifname, iova);
247                 return;
248         }
249
250         gpa = hva_to_gpa(dev, hva, len);
251         if (gpa)
252                 __vhost_log_cache_write(dev, vq, gpa, len);
253 }
254
255 void *
256 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
257                 uint64_t desc_addr, uint64_t desc_len)
258 {
259         void *idesc;
260         uint64_t src, dst;
261         uint64_t len, remain = desc_len;
262
263         idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
264         if (unlikely(!idesc))
265                 return NULL;
266
267         dst = (uint64_t)(uintptr_t)idesc;
268
269         while (remain) {
270                 len = remain;
271                 src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
272                                 VHOST_ACCESS_RO);
273                 if (unlikely(!src || !len)) {
274                         rte_free(idesc);
275                         return NULL;
276                 }
277
278                 rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
279
280                 remain -= len;
281                 dst += len;
282                 desc_addr += len;
283         }
284
285         return idesc;
286 }
287
288 void
289 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
290 {
291         if ((vq->callfd >= 0) && (destroy != 0))
292                 close(vq->callfd);
293         if (vq->kickfd >= 0)
294                 close(vq->kickfd);
295 }
296
297 void
298 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
299 {
300         if (!(dev->protocol_features &
301             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
302                 return;
303
304         if (vq_is_packed(dev)) {
305                 if (vq->inflight_packed)
306                         vq->inflight_packed = NULL;
307         } else {
308                 if (vq->inflight_split)
309                         vq->inflight_split = NULL;
310         }
311
312         if (vq->resubmit_inflight) {
313                 if (vq->resubmit_inflight->resubmit_list) {
314                         rte_free(vq->resubmit_inflight->resubmit_list);
315                         vq->resubmit_inflight->resubmit_list = NULL;
316                 }
317                 rte_free(vq->resubmit_inflight);
318                 vq->resubmit_inflight = NULL;
319         }
320 }
321
322 /*
323  * Unmap any memory, close any file descriptors and
324  * free any memory owned by a device.
325  */
326 void
327 cleanup_device(struct virtio_net *dev, int destroy)
328 {
329         uint32_t i;
330
331         vhost_backend_cleanup(dev);
332
333         for (i = 0; i < dev->nr_vring; i++) {
334                 cleanup_vq(dev->virtqueue[i], destroy);
335                 cleanup_vq_inflight(dev, dev->virtqueue[i]);
336         }
337 }
338
339 static void
340 vhost_free_async_mem(struct vhost_virtqueue *vq)
341 {
342         if (!vq->async)
343                 return;
344
345         rte_free(vq->async->pkts_info);
346         rte_free(vq->async->pkts_cmpl_flag);
347
348         rte_free(vq->async->buffers_packed);
349         vq->async->buffers_packed = NULL;
350         rte_free(vq->async->descs_split);
351         vq->async->descs_split = NULL;
352
353         rte_free(vq->async);
354         vq->async = NULL;
355 }
356
357 void
358 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
359 {
360         if (vq_is_packed(dev))
361                 rte_free(vq->shadow_used_packed);
362         else
363                 rte_free(vq->shadow_used_split);
364
365         vhost_free_async_mem(vq);
366         rte_free(vq->batch_copy_elems);
367         rte_mempool_free(vq->iotlb_pool);
368         rte_free(vq->log_cache);
369         rte_free(vq);
370 }
371
372 /*
373  * Release virtqueues and device memory.
374  */
375 static void
376 free_device(struct virtio_net *dev)
377 {
378         uint32_t i;
379
380         for (i = 0; i < dev->nr_vring; i++)
381                 free_vq(dev, dev->virtqueue[i]);
382
383         rte_free(dev);
384 }
385
386 static __rte_always_inline int
387 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
388 {
389         if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
390                 return 0;
391
392         vq->log_guest_addr = translate_log_addr(dev, vq,
393                                                 vq->ring_addrs.log_guest_addr);
394         if (vq->log_guest_addr == 0)
395                 return -1;
396
397         return 0;
398 }
399
400 /*
401  * Converts vring log address to GPA
402  * If IOMMU is enabled, the log address is IOVA
403  * If IOMMU not enabled, the log address is already GPA
404  *
405  * Caller should have iotlb_lock read-locked
406  */
407 uint64_t
408 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
409                 uint64_t log_addr)
410 {
411         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
412                 const uint64_t exp_size = sizeof(uint64_t);
413                 uint64_t hva, gpa;
414                 uint64_t size = exp_size;
415
416                 hva = vhost_iova_to_vva(dev, vq, log_addr,
417                                         &size, VHOST_ACCESS_RW);
418
419                 if (size != exp_size)
420                         return 0;
421
422                 gpa = hva_to_gpa(dev, hva, exp_size);
423                 if (!gpa) {
424                         VHOST_LOG_DATA(ERR,
425                                 "(%s) failed to find GPA for log_addr: 0x%"
426                                 PRIx64 " hva: 0x%" PRIx64 "\n",
427                                 dev->ifname, log_addr, hva);
428                         return 0;
429                 }
430                 return gpa;
431
432         } else
433                 return log_addr;
434 }
435
436 /* Caller should have iotlb_lock read-locked */
437 static int
438 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
439 {
440         uint64_t req_size, size;
441
442         req_size = sizeof(struct vring_desc) * vq->size;
443         size = req_size;
444         vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
445                                                 vq->ring_addrs.desc_user_addr,
446                                                 &size, VHOST_ACCESS_RW);
447         if (!vq->desc || size != req_size)
448                 return -1;
449
450         req_size = sizeof(struct vring_avail);
451         req_size += sizeof(uint16_t) * vq->size;
452         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
453                 req_size += sizeof(uint16_t);
454         size = req_size;
455         vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
456                                                 vq->ring_addrs.avail_user_addr,
457                                                 &size, VHOST_ACCESS_RW);
458         if (!vq->avail || size != req_size)
459                 return -1;
460
461         req_size = sizeof(struct vring_used);
462         req_size += sizeof(struct vring_used_elem) * vq->size;
463         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
464                 req_size += sizeof(uint16_t);
465         size = req_size;
466         vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
467                                                 vq->ring_addrs.used_user_addr,
468                                                 &size, VHOST_ACCESS_RW);
469         if (!vq->used || size != req_size)
470                 return -1;
471
472         return 0;
473 }
474
475 /* Caller should have iotlb_lock read-locked */
476 static int
477 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
478 {
479         uint64_t req_size, size;
480
481         req_size = sizeof(struct vring_packed_desc) * vq->size;
482         size = req_size;
483         vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
484                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
485                                 &size, VHOST_ACCESS_RW);
486         if (!vq->desc_packed || size != req_size)
487                 return -1;
488
489         req_size = sizeof(struct vring_packed_desc_event);
490         size = req_size;
491         vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
492                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
493                                 &size, VHOST_ACCESS_RW);
494         if (!vq->driver_event || size != req_size)
495                 return -1;
496
497         req_size = sizeof(struct vring_packed_desc_event);
498         size = req_size;
499         vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
500                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
501                                 &size, VHOST_ACCESS_RW);
502         if (!vq->device_event || size != req_size)
503                 return -1;
504
505         return 0;
506 }
507
508 int
509 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
510 {
511
512         if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
513                 return -1;
514
515         if (vq_is_packed(dev)) {
516                 if (vring_translate_packed(dev, vq) < 0)
517                         return -1;
518         } else {
519                 if (vring_translate_split(dev, vq) < 0)
520                         return -1;
521         }
522
523         if (log_translate(dev, vq) < 0)
524                 return -1;
525
526         vq->access_ok = true;
527
528         return 0;
529 }
530
531 void
532 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
533 {
534         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
535                 vhost_user_iotlb_wr_lock(vq);
536
537         vq->access_ok = false;
538         vq->desc = NULL;
539         vq->avail = NULL;
540         vq->used = NULL;
541         vq->log_guest_addr = 0;
542
543         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
544                 vhost_user_iotlb_wr_unlock(vq);
545 }
546
547 static void
548 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
549 {
550         struct vhost_virtqueue *vq;
551         int numa_node = SOCKET_ID_ANY;
552
553         if (vring_idx >= VHOST_MAX_VRING) {
554                 VHOST_LOG_CONFIG(ERR, "(%s) failed to init vring, out of bound (%d)\n",
555                                 dev->ifname, vring_idx);
556                 return;
557         }
558
559         vq = dev->virtqueue[vring_idx];
560         if (!vq) {
561                 VHOST_LOG_CONFIG(ERR, "(%s) virtqueue not allocated (%d)\n",
562                                 dev->ifname, vring_idx);
563                 return;
564         }
565
566         memset(vq, 0, sizeof(struct vhost_virtqueue));
567
568         vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
569         vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
570         vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
571
572 #ifdef RTE_LIBRTE_VHOST_NUMA
573         if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
574                 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
575                         dev->ifname, rte_strerror(errno));
576                 numa_node = SOCKET_ID_ANY;
577         }
578 #endif
579         vq->numa_node = numa_node;
580
581         vhost_user_iotlb_init(dev, vring_idx);
582 }
583
584 static void
585 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
586 {
587         struct vhost_virtqueue *vq;
588         int callfd;
589
590         if (vring_idx >= VHOST_MAX_VRING) {
591                 VHOST_LOG_CONFIG(ERR,
592                                 "(%s) failed to reset vring, out of bound (%d)\n",
593                                 dev->ifname, vring_idx);
594                 return;
595         }
596
597         vq = dev->virtqueue[vring_idx];
598         if (!vq) {
599                 VHOST_LOG_CONFIG(ERR, "(%s) failed to reset vring, virtqueue not allocated (%d)\n",
600                                 dev->ifname, vring_idx);
601                 return;
602         }
603
604         callfd = vq->callfd;
605         init_vring_queue(dev, vring_idx);
606         vq->callfd = callfd;
607 }
608
609 int
610 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
611 {
612         struct vhost_virtqueue *vq;
613         uint32_t i;
614
615         /* Also allocate holes, if any, up to requested vring index. */
616         for (i = 0; i <= vring_idx; i++) {
617                 if (dev->virtqueue[i])
618                         continue;
619
620                 vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
621                 if (vq == NULL) {
622                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate memory for vring %u.\n",
623                                         dev->ifname, i);
624                         return -1;
625                 }
626
627                 dev->virtqueue[i] = vq;
628                 init_vring_queue(dev, i);
629                 rte_spinlock_init(&vq->access_lock);
630                 vq->avail_wrap_counter = 1;
631                 vq->used_wrap_counter = 1;
632                 vq->signalled_used_valid = false;
633         }
634
635         dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
636
637         return 0;
638 }
639
640 /*
641  * Reset some variables in device structure, while keeping few
642  * others untouched, such as vid, ifname, nr_vring: they
643  * should be same unless the device is removed.
644  */
645 void
646 reset_device(struct virtio_net *dev)
647 {
648         uint32_t i;
649
650         dev->features = 0;
651         dev->protocol_features = 0;
652         dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
653
654         for (i = 0; i < dev->nr_vring; i++)
655                 reset_vring_queue(dev, i);
656 }
657
658 /*
659  * Invoked when there is a new vhost-user connection established (when
660  * there is a new virtio device being attached).
661  */
662 int
663 vhost_new_device(void)
664 {
665         struct virtio_net *dev;
666         int i;
667
668         pthread_mutex_lock(&vhost_dev_lock);
669         for (i = 0; i < RTE_MAX_VHOST_DEVICE; i++) {
670                 if (vhost_devices[i] == NULL)
671                         break;
672         }
673
674         if (i == RTE_MAX_VHOST_DEVICE) {
675                 VHOST_LOG_CONFIG(ERR, "failed to find a free slot for new device.\n");
676                 pthread_mutex_unlock(&vhost_dev_lock);
677                 return -1;
678         }
679
680         dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
681         if (dev == NULL) {
682                 VHOST_LOG_CONFIG(ERR, "failed to allocate memory for new device.\n");
683                 pthread_mutex_unlock(&vhost_dev_lock);
684                 return -1;
685         }
686
687         vhost_devices[i] = dev;
688         pthread_mutex_unlock(&vhost_dev_lock);
689
690         dev->vid = i;
691         dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
692         dev->slave_req_fd = -1;
693         dev->postcopy_ufd = -1;
694         rte_spinlock_init(&dev->slave_req_lock);
695
696         return i;
697 }
698
699 void
700 vhost_destroy_device_notify(struct virtio_net *dev)
701 {
702         struct rte_vdpa_device *vdpa_dev;
703
704         if (dev->flags & VIRTIO_DEV_RUNNING) {
705                 vdpa_dev = dev->vdpa_dev;
706                 if (vdpa_dev)
707                         vdpa_dev->ops->dev_close(dev->vid);
708                 dev->flags &= ~VIRTIO_DEV_RUNNING;
709                 dev->notify_ops->destroy_device(dev->vid);
710         }
711 }
712
713 /*
714  * Invoked when there is the vhost-user connection is broken (when
715  * the virtio device is being detached).
716  */
717 void
718 vhost_destroy_device(int vid)
719 {
720         struct virtio_net *dev = get_device(vid);
721
722         if (dev == NULL)
723                 return;
724
725         vhost_destroy_device_notify(dev);
726
727         cleanup_device(dev, 1);
728         free_device(dev);
729
730         vhost_devices[vid] = NULL;
731 }
732
733 void
734 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
735 {
736         struct virtio_net *dev = get_device(vid);
737
738         if (dev == NULL)
739                 return;
740
741         dev->vdpa_dev = vdpa_dev;
742 }
743
744 void
745 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
746 {
747         struct virtio_net *dev;
748         unsigned int len;
749
750         dev = get_device(vid);
751         if (dev == NULL)
752                 return;
753
754         len = if_len > sizeof(dev->ifname) ?
755                 sizeof(dev->ifname) : if_len;
756
757         strncpy(dev->ifname, if_name, len);
758         dev->ifname[sizeof(dev->ifname) - 1] = '\0';
759 }
760
761 void
762 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags)
763 {
764         struct virtio_net *dev = get_device(vid);
765
766         if (dev == NULL)
767                 return;
768
769         if (enable)
770                 dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
771         else
772                 dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
773         if (!compliant_ol_flags)
774                 dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
775         else
776                 dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
777 }
778
779 void
780 vhost_enable_extbuf(int vid)
781 {
782         struct virtio_net *dev = get_device(vid);
783
784         if (dev == NULL)
785                 return;
786
787         dev->extbuf = 1;
788 }
789
790 void
791 vhost_enable_linearbuf(int vid)
792 {
793         struct virtio_net *dev = get_device(vid);
794
795         if (dev == NULL)
796                 return;
797
798         dev->linearbuf = 1;
799 }
800
801 int
802 rte_vhost_get_mtu(int vid, uint16_t *mtu)
803 {
804         struct virtio_net *dev = get_device(vid);
805
806         if (dev == NULL || mtu == NULL)
807                 return -ENODEV;
808
809         if (!(dev->flags & VIRTIO_DEV_READY))
810                 return -EAGAIN;
811
812         if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
813                 return -ENOTSUP;
814
815         *mtu = dev->mtu;
816
817         return 0;
818 }
819
820 int
821 rte_vhost_get_numa_node(int vid)
822 {
823 #ifdef RTE_LIBRTE_VHOST_NUMA
824         struct virtio_net *dev = get_device(vid);
825         int numa_node;
826         int ret;
827
828         if (dev == NULL || numa_available() != 0)
829                 return -1;
830
831         ret = get_mempolicy(&numa_node, NULL, 0, dev,
832                             MPOL_F_NODE | MPOL_F_ADDR);
833         if (ret < 0) {
834                 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
835                         dev->ifname, rte_strerror(errno));
836                 return -1;
837         }
838
839         return numa_node;
840 #else
841         RTE_SET_USED(vid);
842         return -1;
843 #endif
844 }
845
846 uint32_t
847 rte_vhost_get_queue_num(int vid)
848 {
849         struct virtio_net *dev = get_device(vid);
850
851         if (dev == NULL)
852                 return 0;
853
854         return dev->nr_vring / 2;
855 }
856
857 uint16_t
858 rte_vhost_get_vring_num(int vid)
859 {
860         struct virtio_net *dev = get_device(vid);
861
862         if (dev == NULL)
863                 return 0;
864
865         return dev->nr_vring;
866 }
867
868 int
869 rte_vhost_get_ifname(int vid, char *buf, size_t len)
870 {
871         struct virtio_net *dev = get_device(vid);
872
873         if (dev == NULL || buf == NULL)
874                 return -1;
875
876         len = RTE_MIN(len, sizeof(dev->ifname));
877
878         strncpy(buf, dev->ifname, len);
879         buf[len - 1] = '\0';
880
881         return 0;
882 }
883
884 int
885 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
886 {
887         struct virtio_net *dev;
888
889         dev = get_device(vid);
890         if (dev == NULL || features == NULL)
891                 return -1;
892
893         *features = dev->features;
894         return 0;
895 }
896
897 int
898 rte_vhost_get_negotiated_protocol_features(int vid,
899                                            uint64_t *protocol_features)
900 {
901         struct virtio_net *dev;
902
903         dev = get_device(vid);
904         if (dev == NULL || protocol_features == NULL)
905                 return -1;
906
907         *protocol_features = dev->protocol_features;
908         return 0;
909 }
910
911 int
912 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
913 {
914         struct virtio_net *dev;
915         struct rte_vhost_memory *m;
916         size_t size;
917
918         dev = get_device(vid);
919         if (dev == NULL || mem == NULL)
920                 return -1;
921
922         size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
923         m = malloc(sizeof(struct rte_vhost_memory) + size);
924         if (!m)
925                 return -1;
926
927         m->nregions = dev->mem->nregions;
928         memcpy(m->regions, dev->mem->regions, size);
929         *mem = m;
930
931         return 0;
932 }
933
934 int
935 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
936                           struct rte_vhost_vring *vring)
937 {
938         struct virtio_net *dev;
939         struct vhost_virtqueue *vq;
940
941         dev = get_device(vid);
942         if (dev == NULL || vring == NULL)
943                 return -1;
944
945         if (vring_idx >= VHOST_MAX_VRING)
946                 return -1;
947
948         vq = dev->virtqueue[vring_idx];
949         if (!vq)
950                 return -1;
951
952         if (vq_is_packed(dev)) {
953                 vring->desc_packed = vq->desc_packed;
954                 vring->driver_event = vq->driver_event;
955                 vring->device_event = vq->device_event;
956         } else {
957                 vring->desc = vq->desc;
958                 vring->avail = vq->avail;
959                 vring->used = vq->used;
960         }
961         vring->log_guest_addr  = vq->log_guest_addr;
962
963         vring->callfd  = vq->callfd;
964         vring->kickfd  = vq->kickfd;
965         vring->size    = vq->size;
966
967         return 0;
968 }
969
970 int
971 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
972                                   struct rte_vhost_ring_inflight *vring)
973 {
974         struct virtio_net *dev;
975         struct vhost_virtqueue *vq;
976
977         dev = get_device(vid);
978         if (unlikely(!dev))
979                 return -1;
980
981         if (vring_idx >= VHOST_MAX_VRING)
982                 return -1;
983
984         vq = dev->virtqueue[vring_idx];
985         if (unlikely(!vq))
986                 return -1;
987
988         if (vq_is_packed(dev)) {
989                 if (unlikely(!vq->inflight_packed))
990                         return -1;
991
992                 vring->inflight_packed = vq->inflight_packed;
993         } else {
994                 if (unlikely(!vq->inflight_split))
995                         return -1;
996
997                 vring->inflight_split = vq->inflight_split;
998         }
999
1000         vring->resubmit_inflight = vq->resubmit_inflight;
1001
1002         return 0;
1003 }
1004
1005 int
1006 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
1007                                   uint16_t idx)
1008 {
1009         struct vhost_virtqueue *vq;
1010         struct virtio_net *dev;
1011
1012         dev = get_device(vid);
1013         if (unlikely(!dev))
1014                 return -1;
1015
1016         if (unlikely(!(dev->protocol_features &
1017             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1018                 return 0;
1019
1020         if (unlikely(vq_is_packed(dev)))
1021                 return -1;
1022
1023         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1024                 return -1;
1025
1026         vq = dev->virtqueue[vring_idx];
1027         if (unlikely(!vq))
1028                 return -1;
1029
1030         if (unlikely(!vq->inflight_split))
1031                 return -1;
1032
1033         if (unlikely(idx >= vq->size))
1034                 return -1;
1035
1036         vq->inflight_split->desc[idx].counter = vq->global_counter++;
1037         vq->inflight_split->desc[idx].inflight = 1;
1038         return 0;
1039 }
1040
1041 int
1042 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1043                                    uint16_t head, uint16_t last,
1044                                    uint16_t *inflight_entry)
1045 {
1046         struct rte_vhost_inflight_info_packed *inflight_info;
1047         struct virtio_net *dev;
1048         struct vhost_virtqueue *vq;
1049         struct vring_packed_desc *desc;
1050         uint16_t old_free_head, free_head;
1051
1052         dev = get_device(vid);
1053         if (unlikely(!dev))
1054                 return -1;
1055
1056         if (unlikely(!(dev->protocol_features &
1057             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1058                 return 0;
1059
1060         if (unlikely(!vq_is_packed(dev)))
1061                 return -1;
1062
1063         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1064                 return -1;
1065
1066         vq = dev->virtqueue[vring_idx];
1067         if (unlikely(!vq))
1068                 return -1;
1069
1070         inflight_info = vq->inflight_packed;
1071         if (unlikely(!inflight_info))
1072                 return -1;
1073
1074         if (unlikely(head >= vq->size))
1075                 return -1;
1076
1077         desc = vq->desc_packed;
1078         old_free_head = inflight_info->old_free_head;
1079         if (unlikely(old_free_head >= vq->size))
1080                 return -1;
1081
1082         free_head = old_free_head;
1083
1084         /* init header descriptor */
1085         inflight_info->desc[old_free_head].num = 0;
1086         inflight_info->desc[old_free_head].counter = vq->global_counter++;
1087         inflight_info->desc[old_free_head].inflight = 1;
1088
1089         /* save desc entry in flight entry */
1090         while (head != ((last + 1) % vq->size)) {
1091                 inflight_info->desc[old_free_head].num++;
1092                 inflight_info->desc[free_head].addr = desc[head].addr;
1093                 inflight_info->desc[free_head].len = desc[head].len;
1094                 inflight_info->desc[free_head].flags = desc[head].flags;
1095                 inflight_info->desc[free_head].id = desc[head].id;
1096
1097                 inflight_info->desc[old_free_head].last = free_head;
1098                 free_head = inflight_info->desc[free_head].next;
1099                 inflight_info->free_head = free_head;
1100                 head = (head + 1) % vq->size;
1101         }
1102
1103         inflight_info->old_free_head = free_head;
1104         *inflight_entry = old_free_head;
1105
1106         return 0;
1107 }
1108
1109 int
1110 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1111                                   uint16_t last_used_idx, uint16_t idx)
1112 {
1113         struct virtio_net *dev;
1114         struct vhost_virtqueue *vq;
1115
1116         dev = get_device(vid);
1117         if (unlikely(!dev))
1118                 return -1;
1119
1120         if (unlikely(!(dev->protocol_features &
1121             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1122                 return 0;
1123
1124         if (unlikely(vq_is_packed(dev)))
1125                 return -1;
1126
1127         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1128                 return -1;
1129
1130         vq = dev->virtqueue[vring_idx];
1131         if (unlikely(!vq))
1132                 return -1;
1133
1134         if (unlikely(!vq->inflight_split))
1135                 return -1;
1136
1137         if (unlikely(idx >= vq->size))
1138                 return -1;
1139
1140         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1141
1142         vq->inflight_split->desc[idx].inflight = 0;
1143
1144         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1145
1146         vq->inflight_split->used_idx = last_used_idx;
1147         return 0;
1148 }
1149
1150 int
1151 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1152                                    uint16_t head)
1153 {
1154         struct rte_vhost_inflight_info_packed *inflight_info;
1155         struct virtio_net *dev;
1156         struct vhost_virtqueue *vq;
1157
1158         dev = get_device(vid);
1159         if (unlikely(!dev))
1160                 return -1;
1161
1162         if (unlikely(!(dev->protocol_features &
1163             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1164                 return 0;
1165
1166         if (unlikely(!vq_is_packed(dev)))
1167                 return -1;
1168
1169         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1170                 return -1;
1171
1172         vq = dev->virtqueue[vring_idx];
1173         if (unlikely(!vq))
1174                 return -1;
1175
1176         inflight_info = vq->inflight_packed;
1177         if (unlikely(!inflight_info))
1178                 return -1;
1179
1180         if (unlikely(head >= vq->size))
1181                 return -1;
1182
1183         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1184
1185         inflight_info->desc[head].inflight = 0;
1186
1187         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1188
1189         inflight_info->old_free_head = inflight_info->free_head;
1190         inflight_info->old_used_idx = inflight_info->used_idx;
1191         inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1192
1193         return 0;
1194 }
1195
1196 int
1197 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1198                                      uint16_t idx)
1199 {
1200         struct virtio_net *dev;
1201         struct vhost_virtqueue *vq;
1202
1203         dev = get_device(vid);
1204         if (unlikely(!dev))
1205                 return -1;
1206
1207         if (unlikely(!(dev->protocol_features &
1208             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1209                 return 0;
1210
1211         if (unlikely(vq_is_packed(dev)))
1212                 return -1;
1213
1214         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1215                 return -1;
1216
1217         vq = dev->virtqueue[vring_idx];
1218         if (unlikely(!vq))
1219                 return -1;
1220
1221         if (unlikely(!vq->inflight_split))
1222                 return -1;
1223
1224         if (unlikely(idx >= vq->size))
1225                 return -1;
1226
1227         vq->inflight_split->last_inflight_io = idx;
1228         return 0;
1229 }
1230
1231 int
1232 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1233                                       uint16_t head)
1234 {
1235         struct rte_vhost_inflight_info_packed *inflight_info;
1236         struct virtio_net *dev;
1237         struct vhost_virtqueue *vq;
1238         uint16_t last;
1239
1240         dev = get_device(vid);
1241         if (unlikely(!dev))
1242                 return -1;
1243
1244         if (unlikely(!(dev->protocol_features &
1245             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1246                 return 0;
1247
1248         if (unlikely(!vq_is_packed(dev)))
1249                 return -1;
1250
1251         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1252                 return -1;
1253
1254         vq = dev->virtqueue[vring_idx];
1255         if (unlikely(!vq))
1256                 return -1;
1257
1258         inflight_info = vq->inflight_packed;
1259         if (unlikely(!inflight_info))
1260                 return -1;
1261
1262         if (unlikely(head >= vq->size))
1263                 return -1;
1264
1265         last = inflight_info->desc[head].last;
1266         if (unlikely(last >= vq->size))
1267                 return -1;
1268
1269         inflight_info->desc[last].next = inflight_info->free_head;
1270         inflight_info->free_head = head;
1271         inflight_info->used_idx += inflight_info->desc[head].num;
1272         if (inflight_info->used_idx >= inflight_info->desc_num) {
1273                 inflight_info->used_idx -= inflight_info->desc_num;
1274                 inflight_info->used_wrap_counter =
1275                         !inflight_info->used_wrap_counter;
1276         }
1277
1278         return 0;
1279 }
1280
1281 int
1282 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1283 {
1284         struct virtio_net *dev;
1285         struct vhost_virtqueue *vq;
1286
1287         dev = get_device(vid);
1288         if (!dev)
1289                 return -1;
1290
1291         if (vring_idx >= VHOST_MAX_VRING)
1292                 return -1;
1293
1294         vq = dev->virtqueue[vring_idx];
1295         if (!vq)
1296                 return -1;
1297
1298         if (vq_is_packed(dev))
1299                 vhost_vring_call_packed(dev, vq);
1300         else
1301                 vhost_vring_call_split(dev, vq);
1302
1303         return 0;
1304 }
1305
1306 uint16_t
1307 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1308 {
1309         struct virtio_net *dev;
1310         struct vhost_virtqueue *vq;
1311         uint16_t ret = 0;
1312
1313         dev = get_device(vid);
1314         if (!dev)
1315                 return 0;
1316
1317         if (queue_id >= VHOST_MAX_VRING)
1318                 return 0;
1319
1320         vq = dev->virtqueue[queue_id];
1321         if (!vq)
1322                 return 0;
1323
1324         rte_spinlock_lock(&vq->access_lock);
1325
1326         if (unlikely(!vq->enabled || vq->avail == NULL))
1327                 goto out;
1328
1329         ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1330
1331 out:
1332         rte_spinlock_unlock(&vq->access_lock);
1333         return ret;
1334 }
1335
1336 static inline int
1337 vhost_enable_notify_split(struct virtio_net *dev,
1338                 struct vhost_virtqueue *vq, int enable)
1339 {
1340         if (vq->used == NULL)
1341                 return -1;
1342
1343         if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1344                 if (enable)
1345                         vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1346                 else
1347                         vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1348         } else {
1349                 if (enable)
1350                         vhost_avail_event(vq) = vq->last_avail_idx;
1351         }
1352         return 0;
1353 }
1354
1355 static inline int
1356 vhost_enable_notify_packed(struct virtio_net *dev,
1357                 struct vhost_virtqueue *vq, int enable)
1358 {
1359         uint16_t flags;
1360
1361         if (vq->device_event == NULL)
1362                 return -1;
1363
1364         if (!enable) {
1365                 vq->device_event->flags = VRING_EVENT_F_DISABLE;
1366                 return 0;
1367         }
1368
1369         flags = VRING_EVENT_F_ENABLE;
1370         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1371                 flags = VRING_EVENT_F_DESC;
1372                 vq->device_event->off_wrap = vq->last_avail_idx |
1373                         vq->avail_wrap_counter << 15;
1374         }
1375
1376         rte_atomic_thread_fence(__ATOMIC_RELEASE);
1377
1378         vq->device_event->flags = flags;
1379         return 0;
1380 }
1381
1382 int
1383 vhost_enable_guest_notification(struct virtio_net *dev,
1384                 struct vhost_virtqueue *vq, int enable)
1385 {
1386         /*
1387          * If the virtqueue is not ready yet, it will be applied
1388          * when it will become ready.
1389          */
1390         if (!vq->ready)
1391                 return 0;
1392
1393         if (vq_is_packed(dev))
1394                 return vhost_enable_notify_packed(dev, vq, enable);
1395         else
1396                 return vhost_enable_notify_split(dev, vq, enable);
1397 }
1398
1399 int
1400 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1401 {
1402         struct virtio_net *dev = get_device(vid);
1403         struct vhost_virtqueue *vq;
1404         int ret;
1405
1406         if (!dev)
1407                 return -1;
1408
1409         if (queue_id >= VHOST_MAX_VRING)
1410                 return -1;
1411
1412         vq = dev->virtqueue[queue_id];
1413         if (!vq)
1414                 return -1;
1415
1416         rte_spinlock_lock(&vq->access_lock);
1417
1418         vq->notif_enable = enable;
1419         ret = vhost_enable_guest_notification(dev, vq, enable);
1420
1421         rte_spinlock_unlock(&vq->access_lock);
1422
1423         return ret;
1424 }
1425
1426 void
1427 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1428 {
1429         struct virtio_net *dev = get_device(vid);
1430
1431         if (dev == NULL)
1432                 return;
1433
1434         vhost_log_write(dev, addr, len);
1435 }
1436
1437 void
1438 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1439                          uint64_t offset, uint64_t len)
1440 {
1441         struct virtio_net *dev;
1442         struct vhost_virtqueue *vq;
1443
1444         dev = get_device(vid);
1445         if (dev == NULL)
1446                 return;
1447
1448         if (vring_idx >= VHOST_MAX_VRING)
1449                 return;
1450         vq = dev->virtqueue[vring_idx];
1451         if (!vq)
1452                 return;
1453
1454         vhost_log_used_vring(dev, vq, offset, len);
1455 }
1456
1457 uint32_t
1458 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1459 {
1460         struct virtio_net *dev;
1461         struct vhost_virtqueue *vq;
1462         uint32_t ret = 0;
1463
1464         dev = get_device(vid);
1465         if (dev == NULL)
1466                 return 0;
1467
1468         if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1469                 VHOST_LOG_DATA(ERR, "(%s) %s: invalid virtqueue idx %d.\n",
1470                         dev->ifname, __func__, qid);
1471                 return 0;
1472         }
1473
1474         vq = dev->virtqueue[qid];
1475         if (vq == NULL)
1476                 return 0;
1477
1478         rte_spinlock_lock(&vq->access_lock);
1479
1480         if (unlikely(!vq->enabled || vq->avail == NULL))
1481                 goto out;
1482
1483         ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1484
1485 out:
1486         rte_spinlock_unlock(&vq->access_lock);
1487         return ret;
1488 }
1489
1490 struct rte_vdpa_device *
1491 rte_vhost_get_vdpa_device(int vid)
1492 {
1493         struct virtio_net *dev = get_device(vid);
1494
1495         if (dev == NULL)
1496                 return NULL;
1497
1498         return dev->vdpa_dev;
1499 }
1500
1501 int
1502 rte_vhost_get_log_base(int vid, uint64_t *log_base,
1503                 uint64_t *log_size)
1504 {
1505         struct virtio_net *dev = get_device(vid);
1506
1507         if (dev == NULL || log_base == NULL || log_size == NULL)
1508                 return -1;
1509
1510         *log_base = dev->log_base;
1511         *log_size = dev->log_size;
1512
1513         return 0;
1514 }
1515
1516 int
1517 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1518                 uint16_t *last_avail_idx, uint16_t *last_used_idx)
1519 {
1520         struct vhost_virtqueue *vq;
1521         struct virtio_net *dev = get_device(vid);
1522
1523         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1524                 return -1;
1525
1526         if (queue_id >= VHOST_MAX_VRING)
1527                 return -1;
1528
1529         vq = dev->virtqueue[queue_id];
1530         if (!vq)
1531                 return -1;
1532
1533         if (vq_is_packed(dev)) {
1534                 *last_avail_idx = (vq->avail_wrap_counter << 15) |
1535                                   vq->last_avail_idx;
1536                 *last_used_idx = (vq->used_wrap_counter << 15) |
1537                                  vq->last_used_idx;
1538         } else {
1539                 *last_avail_idx = vq->last_avail_idx;
1540                 *last_used_idx = vq->last_used_idx;
1541         }
1542
1543         return 0;
1544 }
1545
1546 int
1547 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1548                 uint16_t last_avail_idx, uint16_t last_used_idx)
1549 {
1550         struct vhost_virtqueue *vq;
1551         struct virtio_net *dev = get_device(vid);
1552
1553         if (!dev)
1554                 return -1;
1555
1556         if (queue_id >= VHOST_MAX_VRING)
1557                 return -1;
1558
1559         vq = dev->virtqueue[queue_id];
1560         if (!vq)
1561                 return -1;
1562
1563         if (vq_is_packed(dev)) {
1564                 vq->last_avail_idx = last_avail_idx & 0x7fff;
1565                 vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1566                 vq->last_used_idx = last_used_idx & 0x7fff;
1567                 vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1568         } else {
1569                 vq->last_avail_idx = last_avail_idx;
1570                 vq->last_used_idx = last_used_idx;
1571         }
1572
1573         return 0;
1574 }
1575
1576 int
1577 rte_vhost_get_vring_base_from_inflight(int vid,
1578                                        uint16_t queue_id,
1579                                        uint16_t *last_avail_idx,
1580                                        uint16_t *last_used_idx)
1581 {
1582         struct rte_vhost_inflight_info_packed *inflight_info;
1583         struct vhost_virtqueue *vq;
1584         struct virtio_net *dev = get_device(vid);
1585
1586         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1587                 return -1;
1588
1589         if (queue_id >= VHOST_MAX_VRING)
1590                 return -1;
1591
1592         vq = dev->virtqueue[queue_id];
1593         if (!vq)
1594                 return -1;
1595
1596         if (!vq_is_packed(dev))
1597                 return -1;
1598
1599         inflight_info = vq->inflight_packed;
1600         if (!inflight_info)
1601                 return -1;
1602
1603         *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1604                           inflight_info->old_used_idx;
1605         *last_used_idx = *last_avail_idx;
1606
1607         return 0;
1608 }
1609
1610 int
1611 rte_vhost_extern_callback_register(int vid,
1612                 struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1613 {
1614         struct virtio_net *dev = get_device(vid);
1615
1616         if (dev == NULL || ops == NULL)
1617                 return -1;
1618
1619         dev->extern_ops = *ops;
1620         dev->extern_data = ctx;
1621         return 0;
1622 }
1623
1624 static __rte_always_inline int
1625 async_channel_register(int vid, uint16_t queue_id)
1626 {
1627         struct virtio_net *dev = get_device(vid);
1628         struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
1629         struct vhost_async *async;
1630         int node = vq->numa_node;
1631
1632         if (unlikely(vq->async)) {
1633                 VHOST_LOG_CONFIG(ERR,
1634                                 "(%s) async register failed: already registered (qid: %d)\n",
1635                                 dev->ifname, queue_id);
1636                 return -1;
1637         }
1638
1639         async = rte_zmalloc_socket(NULL, sizeof(struct vhost_async), 0, node);
1640         if (!async) {
1641                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async metadata (qid: %d)\n",
1642                                 dev->ifname, queue_id);
1643                 return -1;
1644         }
1645
1646         async->pkts_info = rte_malloc_socket(NULL, vq->size * sizeof(struct async_inflight_info),
1647                         RTE_CACHE_LINE_SIZE, node);
1648         if (!async->pkts_info) {
1649                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async_pkts_info (qid: %d)\n",
1650                                 dev->ifname, queue_id);
1651                 goto out_free_async;
1652         }
1653
1654         async->pkts_cmpl_flag = rte_zmalloc_socket(NULL, vq->size * sizeof(bool),
1655                         RTE_CACHE_LINE_SIZE, node);
1656         if (!async->pkts_cmpl_flag) {
1657                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async pkts_cmpl_flag (qid: %d)\n",
1658                                 dev->ifname, queue_id);
1659                 goto out_free_async;
1660         }
1661
1662         if (vq_is_packed(dev)) {
1663                 async->buffers_packed = rte_malloc_socket(NULL,
1664                                 vq->size * sizeof(struct vring_used_elem_packed),
1665                                 RTE_CACHE_LINE_SIZE, node);
1666                 if (!async->buffers_packed) {
1667                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async buffers (qid: %d)\n",
1668                                         dev->ifname, queue_id);
1669                         goto out_free_inflight;
1670                 }
1671         } else {
1672                 async->descs_split = rte_malloc_socket(NULL,
1673                                 vq->size * sizeof(struct vring_used_elem),
1674                                 RTE_CACHE_LINE_SIZE, node);
1675                 if (!async->descs_split) {
1676                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async descs (qid: %d)\n",
1677                                         dev->ifname, queue_id);
1678                         goto out_free_inflight;
1679                 }
1680         }
1681
1682         vq->async = async;
1683
1684         return 0;
1685 out_free_inflight:
1686         rte_free(async->pkts_info);
1687 out_free_async:
1688         rte_free(async);
1689
1690         return -1;
1691 }
1692
1693 int
1694 rte_vhost_async_channel_register(int vid, uint16_t queue_id)
1695 {
1696         struct vhost_virtqueue *vq;
1697         struct virtio_net *dev = get_device(vid);
1698         int ret;
1699
1700         if (dev == NULL)
1701                 return -1;
1702
1703         if (queue_id >= VHOST_MAX_VRING)
1704                 return -1;
1705
1706         vq = dev->virtqueue[queue_id];
1707
1708         if (unlikely(vq == NULL || !dev->async_copy))
1709                 return -1;
1710
1711         rte_spinlock_lock(&vq->access_lock);
1712         ret = async_channel_register(vid, queue_id);
1713         rte_spinlock_unlock(&vq->access_lock);
1714
1715         return ret;
1716 }
1717
1718 int
1719 rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id)
1720 {
1721         struct vhost_virtqueue *vq;
1722         struct virtio_net *dev = get_device(vid);
1723
1724         if (dev == NULL)
1725                 return -1;
1726
1727         if (queue_id >= VHOST_MAX_VRING)
1728                 return -1;
1729
1730         vq = dev->virtqueue[queue_id];
1731
1732         if (unlikely(vq == NULL || !dev->async_copy))
1733                 return -1;
1734
1735         return async_channel_register(vid, queue_id);
1736 }
1737
1738 int
1739 rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1740 {
1741         struct vhost_virtqueue *vq;
1742         struct virtio_net *dev = get_device(vid);
1743         int ret = -1;
1744
1745         if (dev == NULL)
1746                 return ret;
1747
1748         if (queue_id >= VHOST_MAX_VRING)
1749                 return ret;
1750
1751         vq = dev->virtqueue[queue_id];
1752
1753         if (vq == NULL)
1754                 return ret;
1755
1756         ret = 0;
1757
1758         if (!vq->async)
1759                 return ret;
1760
1761         if (!rte_spinlock_trylock(&vq->access_lock)) {
1762                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel, virtqueue busy.\n",
1763                                 dev->ifname);
1764                 return -1;
1765         }
1766
1767         if (vq->async->pkts_inflight_n) {
1768                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1769                 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1770                         dev->ifname);
1771                 ret = -1;
1772                 goto out;
1773         }
1774
1775         vhost_free_async_mem(vq);
1776 out:
1777         rte_spinlock_unlock(&vq->access_lock);
1778
1779         return ret;
1780 }
1781
1782 int
1783 rte_vhost_async_channel_unregister_thread_unsafe(int vid, uint16_t queue_id)
1784 {
1785         struct vhost_virtqueue *vq;
1786         struct virtio_net *dev = get_device(vid);
1787
1788         if (dev == NULL)
1789                 return -1;
1790
1791         if (queue_id >= VHOST_MAX_VRING)
1792                 return -1;
1793
1794         vq = dev->virtqueue[queue_id];
1795
1796         if (vq == NULL)
1797                 return -1;
1798
1799         if (!vq->async)
1800                 return 0;
1801
1802         if (vq->async->pkts_inflight_n) {
1803                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1804                 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1805                         dev->ifname);
1806                 return -1;
1807         }
1808
1809         vhost_free_async_mem(vq);
1810
1811         return 0;
1812 }
1813
1814 int
1815 rte_vhost_async_dma_configure(int16_t dma_id, uint16_t vchan_id)
1816 {
1817         struct rte_dma_info info;
1818         void *pkts_cmpl_flag_addr;
1819         uint16_t max_desc;
1820
1821         if (!rte_dma_is_valid(dma_id)) {
1822                 VHOST_LOG_CONFIG(ERR, "DMA %d is not found.\n", dma_id);
1823                 return -1;
1824         }
1825
1826         rte_dma_info_get(dma_id, &info);
1827         if (vchan_id >= info.max_vchans) {
1828                 VHOST_LOG_CONFIG(ERR, "Invalid DMA %d vChannel %u.\n", dma_id, vchan_id);
1829                 return -1;
1830         }
1831
1832         if (!dma_copy_track[dma_id].vchans) {
1833                 struct async_dma_vchan_info *vchans;
1834
1835                 vchans = rte_zmalloc(NULL, sizeof(struct async_dma_vchan_info) * info.max_vchans,
1836                                 RTE_CACHE_LINE_SIZE);
1837                 if (vchans == NULL) {
1838                         VHOST_LOG_CONFIG(ERR, "Failed to allocate vchans for DMA %d vChannel %u.\n",
1839                                         dma_id, vchan_id);
1840                         return -1;
1841                 }
1842
1843                 dma_copy_track[dma_id].vchans = vchans;
1844         }
1845
1846         if (dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr) {
1847                 VHOST_LOG_CONFIG(INFO, "DMA %d vChannel %u already registered.\n", dma_id,
1848                                 vchan_id);
1849                 return 0;
1850         }
1851
1852         max_desc = info.max_desc;
1853         if (!rte_is_power_of_2(max_desc))
1854                 max_desc = rte_align32pow2(max_desc);
1855
1856         pkts_cmpl_flag_addr = rte_zmalloc(NULL, sizeof(bool *) * max_desc, RTE_CACHE_LINE_SIZE);
1857         if (!pkts_cmpl_flag_addr) {
1858                 VHOST_LOG_CONFIG(ERR, "Failed to allocate pkts_cmpl_flag_addr for DMA %d "
1859                                 "vChannel %u.\n", dma_id, vchan_id);
1860
1861                 if (dma_copy_track[dma_id].nr_vchans == 0) {
1862                         rte_free(dma_copy_track[dma_id].vchans);
1863                         dma_copy_track[dma_id].vchans = NULL;
1864                 }
1865                 return -1;
1866         }
1867
1868         dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr = pkts_cmpl_flag_addr;
1869         dma_copy_track[dma_id].vchans[vchan_id].ring_size = max_desc;
1870         dma_copy_track[dma_id].vchans[vchan_id].ring_mask = max_desc - 1;
1871         dma_copy_track[dma_id].nr_vchans++;
1872
1873         return 0;
1874 }
1875
1876 int
1877 rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
1878 {
1879         struct vhost_virtqueue *vq;
1880         struct virtio_net *dev = get_device(vid);
1881         int ret = -1;
1882
1883         if (dev == NULL)
1884                 return ret;
1885
1886         if (queue_id >= VHOST_MAX_VRING)
1887                 return ret;
1888
1889         vq = dev->virtqueue[queue_id];
1890
1891         if (vq == NULL)
1892                 return ret;
1893
1894         if (!vq->async)
1895                 return ret;
1896
1897         if (!rte_spinlock_trylock(&vq->access_lock)) {
1898                 VHOST_LOG_CONFIG(DEBUG,
1899                         "(%s) failed to check in-flight packets. virtqueue busy.\n",
1900                         dev->ifname);
1901                 return ret;
1902         }
1903
1904         ret = vq->async->pkts_inflight_n;
1905         rte_spinlock_unlock(&vq->access_lock);
1906
1907         return ret;
1908 }
1909
1910 int
1911 rte_vhost_get_monitor_addr(int vid, uint16_t queue_id,
1912                 struct rte_vhost_power_monitor_cond *pmc)
1913 {
1914         struct virtio_net *dev = get_device(vid);
1915         struct vhost_virtqueue *vq;
1916
1917         if (dev == NULL)
1918                 return -1;
1919         if (queue_id >= VHOST_MAX_VRING)
1920                 return -1;
1921
1922         vq = dev->virtqueue[queue_id];
1923         if (vq == NULL)
1924                 return -1;
1925
1926         if (vq_is_packed(dev)) {
1927                 struct vring_packed_desc *desc;
1928                 desc = vq->desc_packed;
1929                 pmc->addr = &desc[vq->last_avail_idx].flags;
1930                 if (vq->avail_wrap_counter)
1931                         pmc->val = VRING_DESC_F_AVAIL;
1932                 else
1933                         pmc->val = VRING_DESC_F_USED;
1934                 pmc->mask = VRING_DESC_F_AVAIL | VRING_DESC_F_USED;
1935                 pmc->size = sizeof(desc[vq->last_avail_idx].flags);
1936                 pmc->match = 1;
1937         } else {
1938                 pmc->addr = &vq->avail->idx;
1939                 pmc->val = vq->last_avail_idx & (vq->size - 1);
1940                 pmc->mask = vq->size - 1;
1941                 pmc->size = sizeof(vq->avail->idx);
1942                 pmc->match = 0;
1943         }
1944
1945         return 0;
1946 }
1947
1948 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
1949 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);