doc: add Meson coding style to contributors guide
[dpdk.git] / lib / librte_vhost / vhost.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #ifdef RTE_LIBRTE_VHOST_NUMA
11 #include <numa.h>
12 #include <numaif.h>
13 #endif
14
15 #include <rte_errno.h>
16 #include <rte_ethdev.h>
17 #include <rte_log.h>
18 #include <rte_string_fns.h>
19 #include <rte_memory.h>
20 #include <rte_malloc.h>
21 #include <rte_vhost.h>
22 #include <rte_rwlock.h>
23
24 #include "iotlb.h"
25 #include "vhost.h"
26 #include "vhost_user.h"
27
28 struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
29 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
30
31 /* Called with iotlb_lock read-locked */
32 uint64_t
33 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
34                     uint64_t iova, uint64_t *size, uint8_t perm)
35 {
36         uint64_t vva, tmp_size;
37
38         if (unlikely(!*size))
39                 return 0;
40
41         tmp_size = *size;
42
43         vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
44         if (tmp_size == *size)
45                 return vva;
46
47         iova += tmp_size;
48
49         if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
50                 /*
51                  * iotlb_lock is read-locked for a full burst,
52                  * but it only protects the iotlb cache.
53                  * In case of IOTLB miss, we might block on the socket,
54                  * which could cause a deadlock with QEMU if an IOTLB update
55                  * is being handled. We can safely unlock here to avoid it.
56                  */
57                 vhost_user_iotlb_rd_unlock(vq);
58
59                 vhost_user_iotlb_pending_insert(vq, iova, perm);
60                 if (vhost_user_iotlb_miss(dev, iova, perm)) {
61                         VHOST_LOG_CONFIG(ERR,
62                                 "IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
63                                 iova);
64                         vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
65                 }
66
67                 vhost_user_iotlb_rd_lock(vq);
68         }
69
70         return 0;
71 }
72
73 #define VHOST_LOG_PAGE  4096
74
75 /*
76  * Atomically set a bit in memory.
77  */
78 static __rte_always_inline void
79 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
80 {
81 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
82         /*
83          * __sync_ built-ins are deprecated, but __atomic_ ones
84          * are sub-optimized in older GCC versions.
85          */
86         __sync_fetch_and_or_1(addr, (1U << nr));
87 #else
88         __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
89 #endif
90 }
91
92 static __rte_always_inline void
93 vhost_log_page(uint8_t *log_base, uint64_t page)
94 {
95         vhost_set_bit(page % 8, &log_base[page / 8]);
96 }
97
98 void
99 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
100 {
101         uint64_t page;
102
103         if (unlikely(!dev->log_base || !len))
104                 return;
105
106         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
107                 return;
108
109         /* To make sure guest memory updates are committed before logging */
110         rte_atomic_thread_fence(__ATOMIC_RELEASE);
111
112         page = addr / VHOST_LOG_PAGE;
113         while (page * VHOST_LOG_PAGE < addr + len) {
114                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
115                 page += 1;
116         }
117 }
118
119 void
120 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
121                              uint64_t iova, uint64_t len)
122 {
123         uint64_t hva, gpa, map_len;
124         map_len = len;
125
126         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
127         if (map_len != len) {
128                 VHOST_LOG_DATA(ERR,
129                         "Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
130                         iova);
131                 return;
132         }
133
134         gpa = hva_to_gpa(dev, hva, len);
135         if (gpa)
136                 __vhost_log_write(dev, gpa, len);
137 }
138
139 void
140 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
141 {
142         unsigned long *log_base;
143         int i;
144
145         if (unlikely(!dev->log_base))
146                 return;
147
148         /* No cache, nothing to sync */
149         if (unlikely(!vq->log_cache))
150                 return;
151
152         rte_atomic_thread_fence(__ATOMIC_RELEASE);
153
154         log_base = (unsigned long *)(uintptr_t)dev->log_base;
155
156         for (i = 0; i < vq->log_cache_nb_elem; i++) {
157                 struct log_cache_entry *elem = vq->log_cache + i;
158
159 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
160                 /*
161                  * '__sync' builtins are deprecated, but '__atomic' ones
162                  * are sub-optimized in older GCC versions.
163                  */
164                 __sync_fetch_and_or(log_base + elem->offset, elem->val);
165 #else
166                 __atomic_fetch_or(log_base + elem->offset, elem->val,
167                                 __ATOMIC_RELAXED);
168 #endif
169         }
170
171         rte_atomic_thread_fence(__ATOMIC_RELEASE);
172
173         vq->log_cache_nb_elem = 0;
174 }
175
176 static __rte_always_inline void
177 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
178                         uint64_t page)
179 {
180         uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
181         uint32_t offset = page / (sizeof(unsigned long) << 3);
182         int i;
183
184         if (unlikely(!vq->log_cache)) {
185                 /* No logging cache allocated, write dirty log map directly */
186                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
187                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
188
189                 return;
190         }
191
192         for (i = 0; i < vq->log_cache_nb_elem; i++) {
193                 struct log_cache_entry *elem = vq->log_cache + i;
194
195                 if (elem->offset == offset) {
196                         elem->val |= (1UL << bit_nr);
197                         return;
198                 }
199         }
200
201         if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
202                 /*
203                  * No more room for a new log cache entry,
204                  * so write the dirty log map directly.
205                  */
206                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
207                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
208
209                 return;
210         }
211
212         vq->log_cache[i].offset = offset;
213         vq->log_cache[i].val = (1UL << bit_nr);
214         vq->log_cache_nb_elem++;
215 }
216
217 void
218 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
219                         uint64_t addr, uint64_t len)
220 {
221         uint64_t page;
222
223         if (unlikely(!dev->log_base || !len))
224                 return;
225
226         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
227                 return;
228
229         page = addr / VHOST_LOG_PAGE;
230         while (page * VHOST_LOG_PAGE < addr + len) {
231                 vhost_log_cache_page(dev, vq, page);
232                 page += 1;
233         }
234 }
235
236 void
237 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
238                              uint64_t iova, uint64_t len)
239 {
240         uint64_t hva, gpa, map_len;
241         map_len = len;
242
243         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
244         if (map_len != len) {
245                 VHOST_LOG_DATA(ERR,
246                         "Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
247                         iova);
248                 return;
249         }
250
251         gpa = hva_to_gpa(dev, hva, len);
252         if (gpa)
253                 __vhost_log_cache_write(dev, vq, gpa, len);
254 }
255
256 void *
257 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
258                 uint64_t desc_addr, uint64_t desc_len)
259 {
260         void *idesc;
261         uint64_t src, dst;
262         uint64_t len, remain = desc_len;
263
264         idesc = rte_malloc(__func__, desc_len, 0);
265         if (unlikely(!idesc))
266                 return NULL;
267
268         dst = (uint64_t)(uintptr_t)idesc;
269
270         while (remain) {
271                 len = remain;
272                 src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
273                                 VHOST_ACCESS_RO);
274                 if (unlikely(!src || !len)) {
275                         rte_free(idesc);
276                         return NULL;
277                 }
278
279                 rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
280
281                 remain -= len;
282                 dst += len;
283                 desc_addr += len;
284         }
285
286         return idesc;
287 }
288
289 void
290 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
291 {
292         if ((vq->callfd >= 0) && (destroy != 0))
293                 close(vq->callfd);
294         if (vq->kickfd >= 0)
295                 close(vq->kickfd);
296 }
297
298 void
299 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
300 {
301         if (!(dev->protocol_features &
302             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
303                 return;
304
305         if (vq_is_packed(dev)) {
306                 if (vq->inflight_packed)
307                         vq->inflight_packed = NULL;
308         } else {
309                 if (vq->inflight_split)
310                         vq->inflight_split = NULL;
311         }
312
313         if (vq->resubmit_inflight) {
314                 if (vq->resubmit_inflight->resubmit_list) {
315                         free(vq->resubmit_inflight->resubmit_list);
316                         vq->resubmit_inflight->resubmit_list = NULL;
317                 }
318                 free(vq->resubmit_inflight);
319                 vq->resubmit_inflight = NULL;
320         }
321 }
322
323 /*
324  * Unmap any memory, close any file descriptors and
325  * free any memory owned by a device.
326  */
327 void
328 cleanup_device(struct virtio_net *dev, int destroy)
329 {
330         uint32_t i;
331
332         vhost_backend_cleanup(dev);
333
334         for (i = 0; i < dev->nr_vring; i++) {
335                 cleanup_vq(dev->virtqueue[i], destroy);
336                 cleanup_vq_inflight(dev, dev->virtqueue[i]);
337         }
338 }
339
340 static void
341 vhost_free_async_mem(struct vhost_virtqueue *vq)
342 {
343         if (vq->async_pkts_info)
344                 rte_free(vq->async_pkts_info);
345         if (vq->async_descs_split)
346                 rte_free(vq->async_descs_split);
347         if (vq->it_pool)
348                 rte_free(vq->it_pool);
349         if (vq->vec_pool)
350                 rte_free(vq->vec_pool);
351
352         vq->async_pkts_info = NULL;
353         vq->async_descs_split = NULL;
354         vq->it_pool = NULL;
355         vq->vec_pool = NULL;
356 }
357
358 void
359 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
360 {
361         if (vq_is_packed(dev))
362                 rte_free(vq->shadow_used_packed);
363         else {
364                 rte_free(vq->shadow_used_split);
365                 vhost_free_async_mem(vq);
366         }
367         rte_free(vq->batch_copy_elems);
368         if (vq->iotlb_pool)
369                 rte_mempool_free(vq->iotlb_pool);
370         rte_free(vq->log_cache);
371         rte_free(vq);
372 }
373
374 /*
375  * Release virtqueues and device memory.
376  */
377 static void
378 free_device(struct virtio_net *dev)
379 {
380         uint32_t i;
381
382         for (i = 0; i < dev->nr_vring; i++)
383                 free_vq(dev, dev->virtqueue[i]);
384
385         rte_free(dev);
386 }
387
388 static __rte_always_inline int
389 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
390 {
391         if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
392                 return 0;
393
394         vq->log_guest_addr = translate_log_addr(dev, vq,
395                                                 vq->ring_addrs.log_guest_addr);
396         if (vq->log_guest_addr == 0)
397                 return -1;
398
399         return 0;
400 }
401
402 /*
403  * Converts vring log address to GPA
404  * If IOMMU is enabled, the log address is IOVA
405  * If IOMMU not enabled, the log address is already GPA
406  *
407  * Caller should have iotlb_lock read-locked
408  */
409 uint64_t
410 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
411                 uint64_t log_addr)
412 {
413         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
414                 const uint64_t exp_size = sizeof(uint64_t);
415                 uint64_t hva, gpa;
416                 uint64_t size = exp_size;
417
418                 hva = vhost_iova_to_vva(dev, vq, log_addr,
419                                         &size, VHOST_ACCESS_RW);
420
421                 if (size != exp_size)
422                         return 0;
423
424                 gpa = hva_to_gpa(dev, hva, exp_size);
425                 if (!gpa) {
426                         VHOST_LOG_CONFIG(ERR,
427                                 "VQ: Failed to find GPA for log_addr: 0x%"
428                                 PRIx64 " hva: 0x%" PRIx64 "\n",
429                                 log_addr, hva);
430                         return 0;
431                 }
432                 return gpa;
433
434         } else
435                 return log_addr;
436 }
437
438 /* Caller should have iotlb_lock read-locked */
439 static int
440 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
441 {
442         uint64_t req_size, size;
443
444         req_size = sizeof(struct vring_desc) * vq->size;
445         size = req_size;
446         vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
447                                                 vq->ring_addrs.desc_user_addr,
448                                                 &size, VHOST_ACCESS_RW);
449         if (!vq->desc || size != req_size)
450                 return -1;
451
452         req_size = sizeof(struct vring_avail);
453         req_size += sizeof(uint16_t) * vq->size;
454         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
455                 req_size += sizeof(uint16_t);
456         size = req_size;
457         vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
458                                                 vq->ring_addrs.avail_user_addr,
459                                                 &size, VHOST_ACCESS_RW);
460         if (!vq->avail || size != req_size)
461                 return -1;
462
463         req_size = sizeof(struct vring_used);
464         req_size += sizeof(struct vring_used_elem) * vq->size;
465         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
466                 req_size += sizeof(uint16_t);
467         size = req_size;
468         vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
469                                                 vq->ring_addrs.used_user_addr,
470                                                 &size, VHOST_ACCESS_RW);
471         if (!vq->used || size != req_size)
472                 return -1;
473
474         return 0;
475 }
476
477 /* Caller should have iotlb_lock read-locked */
478 static int
479 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
480 {
481         uint64_t req_size, size;
482
483         req_size = sizeof(struct vring_packed_desc) * vq->size;
484         size = req_size;
485         vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
486                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
487                                 &size, VHOST_ACCESS_RW);
488         if (!vq->desc_packed || size != req_size)
489                 return -1;
490
491         req_size = sizeof(struct vring_packed_desc_event);
492         size = req_size;
493         vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
494                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
495                                 &size, VHOST_ACCESS_RW);
496         if (!vq->driver_event || size != req_size)
497                 return -1;
498
499         req_size = sizeof(struct vring_packed_desc_event);
500         size = req_size;
501         vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
502                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
503                                 &size, VHOST_ACCESS_RW);
504         if (!vq->device_event || size != req_size)
505                 return -1;
506
507         return 0;
508 }
509
510 int
511 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
512 {
513
514         if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
515                 return -1;
516
517         if (vq_is_packed(dev)) {
518                 if (vring_translate_packed(dev, vq) < 0)
519                         return -1;
520         } else {
521                 if (vring_translate_split(dev, vq) < 0)
522                         return -1;
523         }
524
525         if (log_translate(dev, vq) < 0)
526                 return -1;
527
528         vq->access_ok = true;
529
530         return 0;
531 }
532
533 void
534 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
535 {
536         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
537                 vhost_user_iotlb_wr_lock(vq);
538
539         vq->access_ok = false;
540         vq->desc = NULL;
541         vq->avail = NULL;
542         vq->used = NULL;
543         vq->log_guest_addr = 0;
544
545         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
546                 vhost_user_iotlb_wr_unlock(vq);
547 }
548
549 static void
550 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
551 {
552         struct vhost_virtqueue *vq;
553
554         if (vring_idx >= VHOST_MAX_VRING) {
555                 VHOST_LOG_CONFIG(ERR,
556                                 "Failed not init vring, out of bound (%d)\n",
557                                 vring_idx);
558                 return;
559         }
560
561         vq = dev->virtqueue[vring_idx];
562         if (!vq) {
563                 VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
564                                 vring_idx);
565                 return;
566         }
567
568         memset(vq, 0, sizeof(struct vhost_virtqueue));
569
570         vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
571         vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
572         vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
573 }
574
575 static void
576 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
577 {
578         struct vhost_virtqueue *vq;
579         int callfd;
580
581         if (vring_idx >= VHOST_MAX_VRING) {
582                 VHOST_LOG_CONFIG(ERR,
583                                 "Failed not init vring, out of bound (%d)\n",
584                                 vring_idx);
585                 return;
586         }
587
588         vq = dev->virtqueue[vring_idx];
589         if (!vq) {
590                 VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
591                                 vring_idx);
592                 return;
593         }
594
595         callfd = vq->callfd;
596         init_vring_queue(dev, vring_idx);
597         vq->callfd = callfd;
598 }
599
600 int
601 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
602 {
603         struct vhost_virtqueue *vq;
604         uint32_t i;
605
606         /* Also allocate holes, if any, up to requested vring index. */
607         for (i = 0; i <= vring_idx; i++) {
608                 if (dev->virtqueue[i])
609                         continue;
610
611                 vq = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0);
612                 if (vq == NULL) {
613                         VHOST_LOG_CONFIG(ERR,
614                                 "Failed to allocate memory for vring:%u.\n", i);
615                         return -1;
616                 }
617
618                 dev->virtqueue[i] = vq;
619                 init_vring_queue(dev, i);
620                 rte_spinlock_init(&vq->access_lock);
621                 vq->avail_wrap_counter = 1;
622                 vq->used_wrap_counter = 1;
623                 vq->signalled_used_valid = false;
624         }
625
626         dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
627
628         return 0;
629 }
630
631 /*
632  * Reset some variables in device structure, while keeping few
633  * others untouched, such as vid, ifname, nr_vring: they
634  * should be same unless the device is removed.
635  */
636 void
637 reset_device(struct virtio_net *dev)
638 {
639         uint32_t i;
640
641         dev->features = 0;
642         dev->protocol_features = 0;
643         dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
644
645         for (i = 0; i < dev->nr_vring; i++)
646                 reset_vring_queue(dev, i);
647 }
648
649 /*
650  * Invoked when there is a new vhost-user connection established (when
651  * there is a new virtio device being attached).
652  */
653 int
654 vhost_new_device(void)
655 {
656         struct virtio_net *dev;
657         int i;
658
659         pthread_mutex_lock(&vhost_dev_lock);
660         for (i = 0; i < MAX_VHOST_DEVICE; i++) {
661                 if (vhost_devices[i] == NULL)
662                         break;
663         }
664
665         if (i == MAX_VHOST_DEVICE) {
666                 VHOST_LOG_CONFIG(ERR,
667                         "Failed to find a free slot for new device.\n");
668                 pthread_mutex_unlock(&vhost_dev_lock);
669                 return -1;
670         }
671
672         dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
673         if (dev == NULL) {
674                 VHOST_LOG_CONFIG(ERR,
675                         "Failed to allocate memory for new dev.\n");
676                 pthread_mutex_unlock(&vhost_dev_lock);
677                 return -1;
678         }
679
680         vhost_devices[i] = dev;
681         pthread_mutex_unlock(&vhost_dev_lock);
682
683         dev->vid = i;
684         dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
685         dev->slave_req_fd = -1;
686         dev->postcopy_ufd = -1;
687         rte_spinlock_init(&dev->slave_req_lock);
688
689         return i;
690 }
691
692 void
693 vhost_destroy_device_notify(struct virtio_net *dev)
694 {
695         struct rte_vdpa_device *vdpa_dev;
696
697         if (dev->flags & VIRTIO_DEV_RUNNING) {
698                 vdpa_dev = dev->vdpa_dev;
699                 if (vdpa_dev)
700                         vdpa_dev->ops->dev_close(dev->vid);
701                 dev->flags &= ~VIRTIO_DEV_RUNNING;
702                 dev->notify_ops->destroy_device(dev->vid);
703         }
704 }
705
706 /*
707  * Invoked when there is the vhost-user connection is broken (when
708  * the virtio device is being detached).
709  */
710 void
711 vhost_destroy_device(int vid)
712 {
713         struct virtio_net *dev = get_device(vid);
714
715         if (dev == NULL)
716                 return;
717
718         vhost_destroy_device_notify(dev);
719
720         cleanup_device(dev, 1);
721         free_device(dev);
722
723         vhost_devices[vid] = NULL;
724 }
725
726 void
727 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
728 {
729         struct virtio_net *dev = get_device(vid);
730
731         if (dev == NULL)
732                 return;
733
734         dev->vdpa_dev = vdpa_dev;
735 }
736
737 void
738 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
739 {
740         struct virtio_net *dev;
741         unsigned int len;
742
743         dev = get_device(vid);
744         if (dev == NULL)
745                 return;
746
747         len = if_len > sizeof(dev->ifname) ?
748                 sizeof(dev->ifname) : if_len;
749
750         strncpy(dev->ifname, if_name, len);
751         dev->ifname[sizeof(dev->ifname) - 1] = '\0';
752 }
753
754 void
755 vhost_set_builtin_virtio_net(int vid, bool enable)
756 {
757         struct virtio_net *dev = get_device(vid);
758
759         if (dev == NULL)
760                 return;
761
762         if (enable)
763                 dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
764         else
765                 dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
766 }
767
768 void
769 vhost_enable_extbuf(int vid)
770 {
771         struct virtio_net *dev = get_device(vid);
772
773         if (dev == NULL)
774                 return;
775
776         dev->extbuf = 1;
777 }
778
779 void
780 vhost_enable_linearbuf(int vid)
781 {
782         struct virtio_net *dev = get_device(vid);
783
784         if (dev == NULL)
785                 return;
786
787         dev->linearbuf = 1;
788 }
789
790 int
791 rte_vhost_get_mtu(int vid, uint16_t *mtu)
792 {
793         struct virtio_net *dev = get_device(vid);
794
795         if (dev == NULL || mtu == NULL)
796                 return -ENODEV;
797
798         if (!(dev->flags & VIRTIO_DEV_READY))
799                 return -EAGAIN;
800
801         if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
802                 return -ENOTSUP;
803
804         *mtu = dev->mtu;
805
806         return 0;
807 }
808
809 int
810 rte_vhost_get_numa_node(int vid)
811 {
812 #ifdef RTE_LIBRTE_VHOST_NUMA
813         struct virtio_net *dev = get_device(vid);
814         int numa_node;
815         int ret;
816
817         if (dev == NULL || numa_available() != 0)
818                 return -1;
819
820         ret = get_mempolicy(&numa_node, NULL, 0, dev,
821                             MPOL_F_NODE | MPOL_F_ADDR);
822         if (ret < 0) {
823                 VHOST_LOG_CONFIG(ERR,
824                         "(%d) failed to query numa node: %s\n",
825                         vid, rte_strerror(errno));
826                 return -1;
827         }
828
829         return numa_node;
830 #else
831         RTE_SET_USED(vid);
832         return -1;
833 #endif
834 }
835
836 uint32_t
837 rte_vhost_get_queue_num(int vid)
838 {
839         struct virtio_net *dev = get_device(vid);
840
841         if (dev == NULL)
842                 return 0;
843
844         return dev->nr_vring / 2;
845 }
846
847 uint16_t
848 rte_vhost_get_vring_num(int vid)
849 {
850         struct virtio_net *dev = get_device(vid);
851
852         if (dev == NULL)
853                 return 0;
854
855         return dev->nr_vring;
856 }
857
858 int
859 rte_vhost_get_ifname(int vid, char *buf, size_t len)
860 {
861         struct virtio_net *dev = get_device(vid);
862
863         if (dev == NULL || buf == NULL)
864                 return -1;
865
866         len = RTE_MIN(len, sizeof(dev->ifname));
867
868         strncpy(buf, dev->ifname, len);
869         buf[len - 1] = '\0';
870
871         return 0;
872 }
873
874 int
875 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
876 {
877         struct virtio_net *dev;
878
879         dev = get_device(vid);
880         if (dev == NULL || features == NULL)
881                 return -1;
882
883         *features = dev->features;
884         return 0;
885 }
886
887 int
888 rte_vhost_get_negotiated_protocol_features(int vid,
889                                            uint64_t *protocol_features)
890 {
891         struct virtio_net *dev;
892
893         dev = get_device(vid);
894         if (dev == NULL || protocol_features == NULL)
895                 return -1;
896
897         *protocol_features = dev->protocol_features;
898         return 0;
899 }
900
901 int
902 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
903 {
904         struct virtio_net *dev;
905         struct rte_vhost_memory *m;
906         size_t size;
907
908         dev = get_device(vid);
909         if (dev == NULL || mem == NULL)
910                 return -1;
911
912         size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
913         m = malloc(sizeof(struct rte_vhost_memory) + size);
914         if (!m)
915                 return -1;
916
917         m->nregions = dev->mem->nregions;
918         memcpy(m->regions, dev->mem->regions, size);
919         *mem = m;
920
921         return 0;
922 }
923
924 int
925 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
926                           struct rte_vhost_vring *vring)
927 {
928         struct virtio_net *dev;
929         struct vhost_virtqueue *vq;
930
931         dev = get_device(vid);
932         if (dev == NULL || vring == NULL)
933                 return -1;
934
935         if (vring_idx >= VHOST_MAX_VRING)
936                 return -1;
937
938         vq = dev->virtqueue[vring_idx];
939         if (!vq)
940                 return -1;
941
942         if (vq_is_packed(dev)) {
943                 vring->desc_packed = vq->desc_packed;
944                 vring->driver_event = vq->driver_event;
945                 vring->device_event = vq->device_event;
946         } else {
947                 vring->desc = vq->desc;
948                 vring->avail = vq->avail;
949                 vring->used = vq->used;
950         }
951         vring->log_guest_addr  = vq->log_guest_addr;
952
953         vring->callfd  = vq->callfd;
954         vring->kickfd  = vq->kickfd;
955         vring->size    = vq->size;
956
957         return 0;
958 }
959
960 int
961 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
962                                   struct rte_vhost_ring_inflight *vring)
963 {
964         struct virtio_net *dev;
965         struct vhost_virtqueue *vq;
966
967         dev = get_device(vid);
968         if (unlikely(!dev))
969                 return -1;
970
971         if (vring_idx >= VHOST_MAX_VRING)
972                 return -1;
973
974         vq = dev->virtqueue[vring_idx];
975         if (unlikely(!vq))
976                 return -1;
977
978         if (vq_is_packed(dev)) {
979                 if (unlikely(!vq->inflight_packed))
980                         return -1;
981
982                 vring->inflight_packed = vq->inflight_packed;
983         } else {
984                 if (unlikely(!vq->inflight_split))
985                         return -1;
986
987                 vring->inflight_split = vq->inflight_split;
988         }
989
990         vring->resubmit_inflight = vq->resubmit_inflight;
991
992         return 0;
993 }
994
995 int
996 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
997                                   uint16_t idx)
998 {
999         struct vhost_virtqueue *vq;
1000         struct virtio_net *dev;
1001
1002         dev = get_device(vid);
1003         if (unlikely(!dev))
1004                 return -1;
1005
1006         if (unlikely(!(dev->protocol_features &
1007             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1008                 return 0;
1009
1010         if (unlikely(vq_is_packed(dev)))
1011                 return -1;
1012
1013         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1014                 return -1;
1015
1016         vq = dev->virtqueue[vring_idx];
1017         if (unlikely(!vq))
1018                 return -1;
1019
1020         if (unlikely(!vq->inflight_split))
1021                 return -1;
1022
1023         if (unlikely(idx >= vq->size))
1024                 return -1;
1025
1026         vq->inflight_split->desc[idx].counter = vq->global_counter++;
1027         vq->inflight_split->desc[idx].inflight = 1;
1028         return 0;
1029 }
1030
1031 int
1032 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1033                                    uint16_t head, uint16_t last,
1034                                    uint16_t *inflight_entry)
1035 {
1036         struct rte_vhost_inflight_info_packed *inflight_info;
1037         struct virtio_net *dev;
1038         struct vhost_virtqueue *vq;
1039         struct vring_packed_desc *desc;
1040         uint16_t old_free_head, free_head;
1041
1042         dev = get_device(vid);
1043         if (unlikely(!dev))
1044                 return -1;
1045
1046         if (unlikely(!(dev->protocol_features &
1047             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1048                 return 0;
1049
1050         if (unlikely(!vq_is_packed(dev)))
1051                 return -1;
1052
1053         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1054                 return -1;
1055
1056         vq = dev->virtqueue[vring_idx];
1057         if (unlikely(!vq))
1058                 return -1;
1059
1060         inflight_info = vq->inflight_packed;
1061         if (unlikely(!inflight_info))
1062                 return -1;
1063
1064         if (unlikely(head >= vq->size))
1065                 return -1;
1066
1067         desc = vq->desc_packed;
1068         old_free_head = inflight_info->old_free_head;
1069         if (unlikely(old_free_head >= vq->size))
1070                 return -1;
1071
1072         free_head = old_free_head;
1073
1074         /* init header descriptor */
1075         inflight_info->desc[old_free_head].num = 0;
1076         inflight_info->desc[old_free_head].counter = vq->global_counter++;
1077         inflight_info->desc[old_free_head].inflight = 1;
1078
1079         /* save desc entry in flight entry */
1080         while (head != ((last + 1) % vq->size)) {
1081                 inflight_info->desc[old_free_head].num++;
1082                 inflight_info->desc[free_head].addr = desc[head].addr;
1083                 inflight_info->desc[free_head].len = desc[head].len;
1084                 inflight_info->desc[free_head].flags = desc[head].flags;
1085                 inflight_info->desc[free_head].id = desc[head].id;
1086
1087                 inflight_info->desc[old_free_head].last = free_head;
1088                 free_head = inflight_info->desc[free_head].next;
1089                 inflight_info->free_head = free_head;
1090                 head = (head + 1) % vq->size;
1091         }
1092
1093         inflight_info->old_free_head = free_head;
1094         *inflight_entry = old_free_head;
1095
1096         return 0;
1097 }
1098
1099 int
1100 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1101                                   uint16_t last_used_idx, uint16_t idx)
1102 {
1103         struct virtio_net *dev;
1104         struct vhost_virtqueue *vq;
1105
1106         dev = get_device(vid);
1107         if (unlikely(!dev))
1108                 return -1;
1109
1110         if (unlikely(!(dev->protocol_features &
1111             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1112                 return 0;
1113
1114         if (unlikely(vq_is_packed(dev)))
1115                 return -1;
1116
1117         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1118                 return -1;
1119
1120         vq = dev->virtqueue[vring_idx];
1121         if (unlikely(!vq))
1122                 return -1;
1123
1124         if (unlikely(!vq->inflight_split))
1125                 return -1;
1126
1127         if (unlikely(idx >= vq->size))
1128                 return -1;
1129
1130         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1131
1132         vq->inflight_split->desc[idx].inflight = 0;
1133
1134         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1135
1136         vq->inflight_split->used_idx = last_used_idx;
1137         return 0;
1138 }
1139
1140 int
1141 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1142                                    uint16_t head)
1143 {
1144         struct rte_vhost_inflight_info_packed *inflight_info;
1145         struct virtio_net *dev;
1146         struct vhost_virtqueue *vq;
1147
1148         dev = get_device(vid);
1149         if (unlikely(!dev))
1150                 return -1;
1151
1152         if (unlikely(!(dev->protocol_features &
1153             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1154                 return 0;
1155
1156         if (unlikely(!vq_is_packed(dev)))
1157                 return -1;
1158
1159         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1160                 return -1;
1161
1162         vq = dev->virtqueue[vring_idx];
1163         if (unlikely(!vq))
1164                 return -1;
1165
1166         inflight_info = vq->inflight_packed;
1167         if (unlikely(!inflight_info))
1168                 return -1;
1169
1170         if (unlikely(head >= vq->size))
1171                 return -1;
1172
1173         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1174
1175         inflight_info->desc[head].inflight = 0;
1176
1177         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1178
1179         inflight_info->old_free_head = inflight_info->free_head;
1180         inflight_info->old_used_idx = inflight_info->used_idx;
1181         inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1182
1183         return 0;
1184 }
1185
1186 int
1187 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1188                                      uint16_t idx)
1189 {
1190         struct virtio_net *dev;
1191         struct vhost_virtqueue *vq;
1192
1193         dev = get_device(vid);
1194         if (unlikely(!dev))
1195                 return -1;
1196
1197         if (unlikely(!(dev->protocol_features &
1198             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1199                 return 0;
1200
1201         if (unlikely(vq_is_packed(dev)))
1202                 return -1;
1203
1204         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1205                 return -1;
1206
1207         vq = dev->virtqueue[vring_idx];
1208         if (unlikely(!vq))
1209                 return -1;
1210
1211         if (unlikely(!vq->inflight_split))
1212                 return -1;
1213
1214         vq->inflight_split->last_inflight_io = idx;
1215         return 0;
1216 }
1217
1218 int
1219 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1220                                       uint16_t head)
1221 {
1222         struct rte_vhost_inflight_info_packed *inflight_info;
1223         struct virtio_net *dev;
1224         struct vhost_virtqueue *vq;
1225         uint16_t last;
1226
1227         dev = get_device(vid);
1228         if (unlikely(!dev))
1229                 return -1;
1230
1231         if (unlikely(!(dev->protocol_features &
1232             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1233                 return 0;
1234
1235         if (unlikely(!vq_is_packed(dev)))
1236                 return -1;
1237
1238         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1239                 return -1;
1240
1241         vq = dev->virtqueue[vring_idx];
1242         if (unlikely(!vq))
1243                 return -1;
1244
1245         inflight_info = vq->inflight_packed;
1246         if (unlikely(!inflight_info))
1247                 return -1;
1248
1249         if (unlikely(head >= vq->size))
1250                 return -1;
1251
1252         last = inflight_info->desc[head].last;
1253         if (unlikely(last >= vq->size))
1254                 return -1;
1255
1256         inflight_info->desc[last].next = inflight_info->free_head;
1257         inflight_info->free_head = head;
1258         inflight_info->used_idx += inflight_info->desc[head].num;
1259         if (inflight_info->used_idx >= inflight_info->desc_num) {
1260                 inflight_info->used_idx -= inflight_info->desc_num;
1261                 inflight_info->used_wrap_counter =
1262                         !inflight_info->used_wrap_counter;
1263         }
1264
1265         return 0;
1266 }
1267
1268 int
1269 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1270 {
1271         struct virtio_net *dev;
1272         struct vhost_virtqueue *vq;
1273
1274         dev = get_device(vid);
1275         if (!dev)
1276                 return -1;
1277
1278         if (vring_idx >= VHOST_MAX_VRING)
1279                 return -1;
1280
1281         vq = dev->virtqueue[vring_idx];
1282         if (!vq)
1283                 return -1;
1284
1285         if (vq_is_packed(dev))
1286                 vhost_vring_call_packed(dev, vq);
1287         else
1288                 vhost_vring_call_split(dev, vq);
1289
1290         return 0;
1291 }
1292
1293 uint16_t
1294 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1295 {
1296         struct virtio_net *dev;
1297         struct vhost_virtqueue *vq;
1298         uint16_t ret = 0;
1299
1300         dev = get_device(vid);
1301         if (!dev)
1302                 return 0;
1303
1304         if (queue_id >= VHOST_MAX_VRING)
1305                 return 0;
1306
1307         vq = dev->virtqueue[queue_id];
1308         if (!vq)
1309                 return 0;
1310
1311         rte_spinlock_lock(&vq->access_lock);
1312
1313         if (unlikely(!vq->enabled || vq->avail == NULL))
1314                 goto out;
1315
1316         ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1317
1318 out:
1319         rte_spinlock_unlock(&vq->access_lock);
1320         return ret;
1321 }
1322
1323 static inline int
1324 vhost_enable_notify_split(struct virtio_net *dev,
1325                 struct vhost_virtqueue *vq, int enable)
1326 {
1327         if (vq->used == NULL)
1328                 return -1;
1329
1330         if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1331                 if (enable)
1332                         vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1333                 else
1334                         vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1335         } else {
1336                 if (enable)
1337                         vhost_avail_event(vq) = vq->last_avail_idx;
1338         }
1339         return 0;
1340 }
1341
1342 static inline int
1343 vhost_enable_notify_packed(struct virtio_net *dev,
1344                 struct vhost_virtqueue *vq, int enable)
1345 {
1346         uint16_t flags;
1347
1348         if (vq->device_event == NULL)
1349                 return -1;
1350
1351         if (!enable) {
1352                 vq->device_event->flags = VRING_EVENT_F_DISABLE;
1353                 return 0;
1354         }
1355
1356         flags = VRING_EVENT_F_ENABLE;
1357         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1358                 flags = VRING_EVENT_F_DESC;
1359                 vq->device_event->off_wrap = vq->last_avail_idx |
1360                         vq->avail_wrap_counter << 15;
1361         }
1362
1363         rte_atomic_thread_fence(__ATOMIC_RELEASE);
1364
1365         vq->device_event->flags = flags;
1366         return 0;
1367 }
1368
1369 int
1370 vhost_enable_guest_notification(struct virtio_net *dev,
1371                 struct vhost_virtqueue *vq, int enable)
1372 {
1373         /*
1374          * If the virtqueue is not ready yet, it will be applied
1375          * when it will become ready.
1376          */
1377         if (!vq->ready)
1378                 return 0;
1379
1380         if (vq_is_packed(dev))
1381                 return vhost_enable_notify_packed(dev, vq, enable);
1382         else
1383                 return vhost_enable_notify_split(dev, vq, enable);
1384 }
1385
1386 int
1387 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1388 {
1389         struct virtio_net *dev = get_device(vid);
1390         struct vhost_virtqueue *vq;
1391         int ret;
1392
1393         if (!dev)
1394                 return -1;
1395
1396         if (queue_id >= VHOST_MAX_VRING)
1397                 return -1;
1398
1399         vq = dev->virtqueue[queue_id];
1400         if (!vq)
1401                 return -1;
1402
1403         rte_spinlock_lock(&vq->access_lock);
1404
1405         vq->notif_enable = enable;
1406         ret = vhost_enable_guest_notification(dev, vq, enable);
1407
1408         rte_spinlock_unlock(&vq->access_lock);
1409
1410         return ret;
1411 }
1412
1413 void
1414 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1415 {
1416         struct virtio_net *dev = get_device(vid);
1417
1418         if (dev == NULL)
1419                 return;
1420
1421         vhost_log_write(dev, addr, len);
1422 }
1423
1424 void
1425 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1426                          uint64_t offset, uint64_t len)
1427 {
1428         struct virtio_net *dev;
1429         struct vhost_virtqueue *vq;
1430
1431         dev = get_device(vid);
1432         if (dev == NULL)
1433                 return;
1434
1435         if (vring_idx >= VHOST_MAX_VRING)
1436                 return;
1437         vq = dev->virtqueue[vring_idx];
1438         if (!vq)
1439                 return;
1440
1441         vhost_log_used_vring(dev, vq, offset, len);
1442 }
1443
1444 uint32_t
1445 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1446 {
1447         struct virtio_net *dev;
1448         struct vhost_virtqueue *vq;
1449         uint32_t ret = 0;
1450
1451         dev = get_device(vid);
1452         if (dev == NULL)
1453                 return 0;
1454
1455         if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1456                 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
1457                         dev->vid, __func__, qid);
1458                 return 0;
1459         }
1460
1461         vq = dev->virtqueue[qid];
1462         if (vq == NULL)
1463                 return 0;
1464
1465         rte_spinlock_lock(&vq->access_lock);
1466
1467         if (unlikely(!vq->enabled || vq->avail == NULL))
1468                 goto out;
1469
1470         ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1471
1472 out:
1473         rte_spinlock_unlock(&vq->access_lock);
1474         return ret;
1475 }
1476
1477 struct rte_vdpa_device *
1478 rte_vhost_get_vdpa_device(int vid)
1479 {
1480         struct virtio_net *dev = get_device(vid);
1481
1482         if (dev == NULL)
1483                 return NULL;
1484
1485         return dev->vdpa_dev;
1486 }
1487
1488 int rte_vhost_get_log_base(int vid, uint64_t *log_base,
1489                 uint64_t *log_size)
1490 {
1491         struct virtio_net *dev = get_device(vid);
1492
1493         if (dev == NULL || log_base == NULL || log_size == NULL)
1494                 return -1;
1495
1496         *log_base = dev->log_base;
1497         *log_size = dev->log_size;
1498
1499         return 0;
1500 }
1501
1502 int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1503                 uint16_t *last_avail_idx, uint16_t *last_used_idx)
1504 {
1505         struct vhost_virtqueue *vq;
1506         struct virtio_net *dev = get_device(vid);
1507
1508         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1509                 return -1;
1510
1511         if (queue_id >= VHOST_MAX_VRING)
1512                 return -1;
1513
1514         vq = dev->virtqueue[queue_id];
1515         if (!vq)
1516                 return -1;
1517
1518         if (vq_is_packed(dev)) {
1519                 *last_avail_idx = (vq->avail_wrap_counter << 15) |
1520                                   vq->last_avail_idx;
1521                 *last_used_idx = (vq->used_wrap_counter << 15) |
1522                                  vq->last_used_idx;
1523         } else {
1524                 *last_avail_idx = vq->last_avail_idx;
1525                 *last_used_idx = vq->last_used_idx;
1526         }
1527
1528         return 0;
1529 }
1530
1531 int rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1532                 uint16_t last_avail_idx, uint16_t last_used_idx)
1533 {
1534         struct vhost_virtqueue *vq;
1535         struct virtio_net *dev = get_device(vid);
1536
1537         if (!dev)
1538                 return -1;
1539
1540         if (queue_id >= VHOST_MAX_VRING)
1541                 return -1;
1542
1543         vq = dev->virtqueue[queue_id];
1544         if (!vq)
1545                 return -1;
1546
1547         if (vq_is_packed(dev)) {
1548                 vq->last_avail_idx = last_avail_idx & 0x7fff;
1549                 vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1550                 vq->last_used_idx = last_used_idx & 0x7fff;
1551                 vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1552         } else {
1553                 vq->last_avail_idx = last_avail_idx;
1554                 vq->last_used_idx = last_used_idx;
1555         }
1556
1557         return 0;
1558 }
1559
1560 int
1561 rte_vhost_get_vring_base_from_inflight(int vid,
1562                                        uint16_t queue_id,
1563                                        uint16_t *last_avail_idx,
1564                                        uint16_t *last_used_idx)
1565 {
1566         struct rte_vhost_inflight_info_packed *inflight_info;
1567         struct vhost_virtqueue *vq;
1568         struct virtio_net *dev = get_device(vid);
1569
1570         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1571                 return -1;
1572
1573         if (queue_id >= VHOST_MAX_VRING)
1574                 return -1;
1575
1576         vq = dev->virtqueue[queue_id];
1577         if (!vq)
1578                 return -1;
1579
1580         if (!vq_is_packed(dev))
1581                 return -1;
1582
1583         inflight_info = vq->inflight_packed;
1584         if (!inflight_info)
1585                 return -1;
1586
1587         *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1588                           inflight_info->old_used_idx;
1589         *last_used_idx = *last_avail_idx;
1590
1591         return 0;
1592 }
1593
1594 int rte_vhost_extern_callback_register(int vid,
1595                 struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1596 {
1597         struct virtio_net *dev = get_device(vid);
1598
1599         if (dev == NULL || ops == NULL)
1600                 return -1;
1601
1602         dev->extern_ops = *ops;
1603         dev->extern_data = ctx;
1604         return 0;
1605 }
1606
1607 int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
1608                                         uint32_t features,
1609                                         struct rte_vhost_async_channel_ops *ops)
1610 {
1611         struct vhost_virtqueue *vq;
1612         struct virtio_net *dev = get_device(vid);
1613         struct rte_vhost_async_features f;
1614         int node;
1615
1616         if (dev == NULL || ops == NULL)
1617                 return -1;
1618
1619         f.intval = features;
1620
1621         if (queue_id >= VHOST_MAX_VRING)
1622                 return -1;
1623
1624         vq = dev->virtqueue[queue_id];
1625
1626         if (unlikely(vq == NULL || !dev->async_copy))
1627                 return -1;
1628
1629         /* packed queue is not supported */
1630         if (unlikely(vq_is_packed(dev) || !f.async_inorder)) {
1631                 VHOST_LOG_CONFIG(ERR,
1632                         "async copy is not supported on packed queue or non-inorder mode "
1633                         "(vid %d, qid: %d)\n", vid, queue_id);
1634                 return -1;
1635         }
1636
1637         if (unlikely(ops->check_completed_copies == NULL ||
1638                 ops->transfer_data == NULL))
1639                 return -1;
1640
1641         rte_spinlock_lock(&vq->access_lock);
1642
1643         if (unlikely(vq->async_registered)) {
1644                 VHOST_LOG_CONFIG(ERR,
1645                         "async register failed: channel already registered "
1646                         "(vid %d, qid: %d)\n", vid, queue_id);
1647                 goto reg_out;
1648         }
1649
1650 #ifdef RTE_LIBRTE_VHOST_NUMA
1651         if (get_mempolicy(&node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
1652                 VHOST_LOG_CONFIG(ERR,
1653                         "unable to get numa information in async register. "
1654                         "allocating async buffer memory on the caller thread node\n");
1655                 node = SOCKET_ID_ANY;
1656         }
1657 #else
1658         node = SOCKET_ID_ANY;
1659 #endif
1660
1661         vq->async_pkts_info = rte_malloc_socket(NULL,
1662                         vq->size * sizeof(struct async_inflight_info),
1663                         RTE_CACHE_LINE_SIZE, node);
1664         vq->it_pool = rte_malloc_socket(NULL,
1665                         VHOST_MAX_ASYNC_IT * sizeof(struct rte_vhost_iov_iter),
1666                         RTE_CACHE_LINE_SIZE, node);
1667         vq->vec_pool = rte_malloc_socket(NULL,
1668                         VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
1669                         RTE_CACHE_LINE_SIZE, node);
1670         vq->async_descs_split = rte_malloc_socket(NULL,
1671                         vq->size * sizeof(struct vring_used_elem),
1672                         RTE_CACHE_LINE_SIZE, node);
1673         if (!vq->async_descs_split || !vq->async_pkts_info ||
1674                 !vq->it_pool || !vq->vec_pool) {
1675                 vhost_free_async_mem(vq);
1676                 VHOST_LOG_CONFIG(ERR,
1677                                 "async register failed: cannot allocate memory for vq data "
1678                                 "(vid %d, qid: %d)\n", vid, queue_id);
1679                 goto reg_out;
1680         }
1681
1682         vq->async_ops.check_completed_copies = ops->check_completed_copies;
1683         vq->async_ops.transfer_data = ops->transfer_data;
1684
1685         vq->async_inorder = f.async_inorder;
1686         vq->async_threshold = f.async_threshold;
1687
1688         vq->async_registered = true;
1689
1690 reg_out:
1691         rte_spinlock_unlock(&vq->access_lock);
1692
1693         return 0;
1694 }
1695
1696 int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1697 {
1698         struct vhost_virtqueue *vq;
1699         struct virtio_net *dev = get_device(vid);
1700         int ret = -1;
1701
1702         if (dev == NULL)
1703                 return ret;
1704
1705         if (queue_id >= VHOST_MAX_VRING)
1706                 return ret;
1707
1708         vq = dev->virtqueue[queue_id];
1709
1710         if (vq == NULL)
1711                 return ret;
1712
1713         ret = 0;
1714
1715         if (!vq->async_registered)
1716                 return ret;
1717
1718         if (!rte_spinlock_trylock(&vq->access_lock)) {
1719                 VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1720                         "virt queue busy.\n");
1721                 return -1;
1722         }
1723
1724         if (vq->async_pkts_inflight_n) {
1725                 VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1726                         "async inflight packets must be completed before unregistration.\n");
1727                 ret = -1;
1728                 goto out;
1729         }
1730
1731         vhost_free_async_mem(vq);
1732
1733         vq->async_ops.transfer_data = NULL;
1734         vq->async_ops.check_completed_copies = NULL;
1735         vq->async_registered = false;
1736
1737 out:
1738         rte_spinlock_unlock(&vq->access_lock);
1739
1740         return ret;
1741 }
1742
1743 RTE_LOG_REGISTER(vhost_config_log_level, lib.vhost.config, INFO);
1744 RTE_LOG_REGISTER(vhost_data_log_level, lib.vhost.data, WARNING);