c96f6335c8576c89ebba88dab268212de38754be
[dpdk.git] / lib / vhost / vhost.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #ifdef RTE_LIBRTE_VHOST_NUMA
11 #include <numa.h>
12 #include <numaif.h>
13 #endif
14
15 #include <rte_errno.h>
16 #include <rte_ethdev.h>
17 #include <rte_log.h>
18 #include <rte_string_fns.h>
19 #include <rte_memory.h>
20 #include <rte_malloc.h>
21 #include <rte_vhost.h>
22 #include <rte_rwlock.h>
23
24 #include "iotlb.h"
25 #include "vhost.h"
26 #include "vhost_user.h"
27
28 struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
29 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
30
31 /* Called with iotlb_lock read-locked */
32 uint64_t
33 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
34                     uint64_t iova, uint64_t *size, uint8_t perm)
35 {
36         uint64_t vva, tmp_size;
37
38         if (unlikely(!*size))
39                 return 0;
40
41         tmp_size = *size;
42
43         vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
44         if (tmp_size == *size)
45                 return vva;
46
47         iova += tmp_size;
48
49         if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
50                 /*
51                  * iotlb_lock is read-locked for a full burst,
52                  * but it only protects the iotlb cache.
53                  * In case of IOTLB miss, we might block on the socket,
54                  * which could cause a deadlock with QEMU if an IOTLB update
55                  * is being handled. We can safely unlock here to avoid it.
56                  */
57                 vhost_user_iotlb_rd_unlock(vq);
58
59                 vhost_user_iotlb_pending_insert(vq, iova, perm);
60                 if (vhost_user_iotlb_miss(dev, iova, perm)) {
61                         VHOST_LOG_CONFIG(ERR,
62                                 "IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
63                                 iova);
64                         vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
65                 }
66
67                 vhost_user_iotlb_rd_lock(vq);
68         }
69
70         return 0;
71 }
72
73 #define VHOST_LOG_PAGE  4096
74
75 /*
76  * Atomically set a bit in memory.
77  */
78 static __rte_always_inline void
79 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
80 {
81 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
82         /*
83          * __sync_ built-ins are deprecated, but __atomic_ ones
84          * are sub-optimized in older GCC versions.
85          */
86         __sync_fetch_and_or_1(addr, (1U << nr));
87 #else
88         __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
89 #endif
90 }
91
92 static __rte_always_inline void
93 vhost_log_page(uint8_t *log_base, uint64_t page)
94 {
95         vhost_set_bit(page % 8, &log_base[page / 8]);
96 }
97
98 void
99 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
100 {
101         uint64_t page;
102
103         if (unlikely(!dev->log_base || !len))
104                 return;
105
106         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
107                 return;
108
109         /* To make sure guest memory updates are committed before logging */
110         rte_atomic_thread_fence(__ATOMIC_RELEASE);
111
112         page = addr / VHOST_LOG_PAGE;
113         while (page * VHOST_LOG_PAGE < addr + len) {
114                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
115                 page += 1;
116         }
117 }
118
119 void
120 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
121                              uint64_t iova, uint64_t len)
122 {
123         uint64_t hva, gpa, map_len;
124         map_len = len;
125
126         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
127         if (map_len != len) {
128                 VHOST_LOG_DATA(ERR,
129                         "Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
130                         iova);
131                 return;
132         }
133
134         gpa = hva_to_gpa(dev, hva, len);
135         if (gpa)
136                 __vhost_log_write(dev, gpa, len);
137 }
138
139 void
140 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
141 {
142         unsigned long *log_base;
143         int i;
144
145         if (unlikely(!dev->log_base))
146                 return;
147
148         /* No cache, nothing to sync */
149         if (unlikely(!vq->log_cache))
150                 return;
151
152         rte_atomic_thread_fence(__ATOMIC_RELEASE);
153
154         log_base = (unsigned long *)(uintptr_t)dev->log_base;
155
156         for (i = 0; i < vq->log_cache_nb_elem; i++) {
157                 struct log_cache_entry *elem = vq->log_cache + i;
158
159 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
160                 /*
161                  * '__sync' builtins are deprecated, but '__atomic' ones
162                  * are sub-optimized in older GCC versions.
163                  */
164                 __sync_fetch_and_or(log_base + elem->offset, elem->val);
165 #else
166                 __atomic_fetch_or(log_base + elem->offset, elem->val,
167                                 __ATOMIC_RELAXED);
168 #endif
169         }
170
171         rte_atomic_thread_fence(__ATOMIC_RELEASE);
172
173         vq->log_cache_nb_elem = 0;
174 }
175
176 static __rte_always_inline void
177 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
178                         uint64_t page)
179 {
180         uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
181         uint32_t offset = page / (sizeof(unsigned long) << 3);
182         int i;
183
184         if (unlikely(!vq->log_cache)) {
185                 /* No logging cache allocated, write dirty log map directly */
186                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
187                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
188
189                 return;
190         }
191
192         for (i = 0; i < vq->log_cache_nb_elem; i++) {
193                 struct log_cache_entry *elem = vq->log_cache + i;
194
195                 if (elem->offset == offset) {
196                         elem->val |= (1UL << bit_nr);
197                         return;
198                 }
199         }
200
201         if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
202                 /*
203                  * No more room for a new log cache entry,
204                  * so write the dirty log map directly.
205                  */
206                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
207                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
208
209                 return;
210         }
211
212         vq->log_cache[i].offset = offset;
213         vq->log_cache[i].val = (1UL << bit_nr);
214         vq->log_cache_nb_elem++;
215 }
216
217 void
218 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
219                         uint64_t addr, uint64_t len)
220 {
221         uint64_t page;
222
223         if (unlikely(!dev->log_base || !len))
224                 return;
225
226         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
227                 return;
228
229         page = addr / VHOST_LOG_PAGE;
230         while (page * VHOST_LOG_PAGE < addr + len) {
231                 vhost_log_cache_page(dev, vq, page);
232                 page += 1;
233         }
234 }
235
236 void
237 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
238                              uint64_t iova, uint64_t len)
239 {
240         uint64_t hva, gpa, map_len;
241         map_len = len;
242
243         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
244         if (map_len != len) {
245                 VHOST_LOG_DATA(ERR,
246                         "Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
247                         iova);
248                 return;
249         }
250
251         gpa = hva_to_gpa(dev, hva, len);
252         if (gpa)
253                 __vhost_log_cache_write(dev, vq, gpa, len);
254 }
255
256 void *
257 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
258                 uint64_t desc_addr, uint64_t desc_len)
259 {
260         void *idesc;
261         uint64_t src, dst;
262         uint64_t len, remain = desc_len;
263
264         idesc = rte_malloc(__func__, desc_len, 0);
265         if (unlikely(!idesc))
266                 return NULL;
267
268         dst = (uint64_t)(uintptr_t)idesc;
269
270         while (remain) {
271                 len = remain;
272                 src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
273                                 VHOST_ACCESS_RO);
274                 if (unlikely(!src || !len)) {
275                         rte_free(idesc);
276                         return NULL;
277                 }
278
279                 rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
280
281                 remain -= len;
282                 dst += len;
283                 desc_addr += len;
284         }
285
286         return idesc;
287 }
288
289 void
290 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
291 {
292         if ((vq->callfd >= 0) && (destroy != 0))
293                 close(vq->callfd);
294         if (vq->kickfd >= 0)
295                 close(vq->kickfd);
296 }
297
298 void
299 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
300 {
301         if (!(dev->protocol_features &
302             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
303                 return;
304
305         if (vq_is_packed(dev)) {
306                 if (vq->inflight_packed)
307                         vq->inflight_packed = NULL;
308         } else {
309                 if (vq->inflight_split)
310                         vq->inflight_split = NULL;
311         }
312
313         if (vq->resubmit_inflight) {
314                 if (vq->resubmit_inflight->resubmit_list) {
315                         free(vq->resubmit_inflight->resubmit_list);
316                         vq->resubmit_inflight->resubmit_list = NULL;
317                 }
318                 free(vq->resubmit_inflight);
319                 vq->resubmit_inflight = NULL;
320         }
321 }
322
323 /*
324  * Unmap any memory, close any file descriptors and
325  * free any memory owned by a device.
326  */
327 void
328 cleanup_device(struct virtio_net *dev, int destroy)
329 {
330         uint32_t i;
331
332         vhost_backend_cleanup(dev);
333
334         for (i = 0; i < dev->nr_vring; i++) {
335                 cleanup_vq(dev->virtqueue[i], destroy);
336                 cleanup_vq_inflight(dev, dev->virtqueue[i]);
337         }
338 }
339
340 static void
341 vhost_free_async_mem(struct vhost_virtqueue *vq)
342 {
343         rte_free(vq->async_pkts_info);
344
345         rte_free(vq->async_buffers_packed);
346         vq->async_buffers_packed = NULL;
347         rte_free(vq->async_descs_split);
348         vq->async_descs_split = NULL;
349
350         rte_free(vq->it_pool);
351         rte_free(vq->vec_pool);
352
353         vq->async_pkts_info = NULL;
354         vq->it_pool = NULL;
355         vq->vec_pool = NULL;
356 }
357
358 void
359 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
360 {
361         if (vq_is_packed(dev))
362                 rte_free(vq->shadow_used_packed);
363         else
364                 rte_free(vq->shadow_used_split);
365
366         vhost_free_async_mem(vq);
367         rte_free(vq->batch_copy_elems);
368         rte_mempool_free(vq->iotlb_pool);
369         rte_free(vq->log_cache);
370         rte_free(vq);
371 }
372
373 /*
374  * Release virtqueues and device memory.
375  */
376 static void
377 free_device(struct virtio_net *dev)
378 {
379         uint32_t i;
380
381         for (i = 0; i < dev->nr_vring; i++)
382                 free_vq(dev, dev->virtqueue[i]);
383
384         rte_free(dev);
385 }
386
387 static __rte_always_inline int
388 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
389 {
390         if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
391                 return 0;
392
393         vq->log_guest_addr = translate_log_addr(dev, vq,
394                                                 vq->ring_addrs.log_guest_addr);
395         if (vq->log_guest_addr == 0)
396                 return -1;
397
398         return 0;
399 }
400
401 /*
402  * Converts vring log address to GPA
403  * If IOMMU is enabled, the log address is IOVA
404  * If IOMMU not enabled, the log address is already GPA
405  *
406  * Caller should have iotlb_lock read-locked
407  */
408 uint64_t
409 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
410                 uint64_t log_addr)
411 {
412         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
413                 const uint64_t exp_size = sizeof(uint64_t);
414                 uint64_t hva, gpa;
415                 uint64_t size = exp_size;
416
417                 hva = vhost_iova_to_vva(dev, vq, log_addr,
418                                         &size, VHOST_ACCESS_RW);
419
420                 if (size != exp_size)
421                         return 0;
422
423                 gpa = hva_to_gpa(dev, hva, exp_size);
424                 if (!gpa) {
425                         VHOST_LOG_CONFIG(ERR,
426                                 "VQ: Failed to find GPA for log_addr: 0x%"
427                                 PRIx64 " hva: 0x%" PRIx64 "\n",
428                                 log_addr, hva);
429                         return 0;
430                 }
431                 return gpa;
432
433         } else
434                 return log_addr;
435 }
436
437 /* Caller should have iotlb_lock read-locked */
438 static int
439 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
440 {
441         uint64_t req_size, size;
442
443         req_size = sizeof(struct vring_desc) * vq->size;
444         size = req_size;
445         vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
446                                                 vq->ring_addrs.desc_user_addr,
447                                                 &size, VHOST_ACCESS_RW);
448         if (!vq->desc || size != req_size)
449                 return -1;
450
451         req_size = sizeof(struct vring_avail);
452         req_size += sizeof(uint16_t) * vq->size;
453         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
454                 req_size += sizeof(uint16_t);
455         size = req_size;
456         vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
457                                                 vq->ring_addrs.avail_user_addr,
458                                                 &size, VHOST_ACCESS_RW);
459         if (!vq->avail || size != req_size)
460                 return -1;
461
462         req_size = sizeof(struct vring_used);
463         req_size += sizeof(struct vring_used_elem) * vq->size;
464         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
465                 req_size += sizeof(uint16_t);
466         size = req_size;
467         vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
468                                                 vq->ring_addrs.used_user_addr,
469                                                 &size, VHOST_ACCESS_RW);
470         if (!vq->used || size != req_size)
471                 return -1;
472
473         return 0;
474 }
475
476 /* Caller should have iotlb_lock read-locked */
477 static int
478 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
479 {
480         uint64_t req_size, size;
481
482         req_size = sizeof(struct vring_packed_desc) * vq->size;
483         size = req_size;
484         vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
485                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
486                                 &size, VHOST_ACCESS_RW);
487         if (!vq->desc_packed || size != req_size)
488                 return -1;
489
490         req_size = sizeof(struct vring_packed_desc_event);
491         size = req_size;
492         vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
493                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
494                                 &size, VHOST_ACCESS_RW);
495         if (!vq->driver_event || size != req_size)
496                 return -1;
497
498         req_size = sizeof(struct vring_packed_desc_event);
499         size = req_size;
500         vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
501                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
502                                 &size, VHOST_ACCESS_RW);
503         if (!vq->device_event || size != req_size)
504                 return -1;
505
506         return 0;
507 }
508
509 int
510 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
511 {
512
513         if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
514                 return -1;
515
516         if (vq_is_packed(dev)) {
517                 if (vring_translate_packed(dev, vq) < 0)
518                         return -1;
519         } else {
520                 if (vring_translate_split(dev, vq) < 0)
521                         return -1;
522         }
523
524         if (log_translate(dev, vq) < 0)
525                 return -1;
526
527         vq->access_ok = true;
528
529         return 0;
530 }
531
532 void
533 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
534 {
535         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
536                 vhost_user_iotlb_wr_lock(vq);
537
538         vq->access_ok = false;
539         vq->desc = NULL;
540         vq->avail = NULL;
541         vq->used = NULL;
542         vq->log_guest_addr = 0;
543
544         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
545                 vhost_user_iotlb_wr_unlock(vq);
546 }
547
548 static void
549 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
550 {
551         struct vhost_virtqueue *vq;
552
553         if (vring_idx >= VHOST_MAX_VRING) {
554                 VHOST_LOG_CONFIG(ERR,
555                                 "Failed not init vring, out of bound (%d)\n",
556                                 vring_idx);
557                 return;
558         }
559
560         vq = dev->virtqueue[vring_idx];
561         if (!vq) {
562                 VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
563                                 vring_idx);
564                 return;
565         }
566
567         memset(vq, 0, sizeof(struct vhost_virtqueue));
568
569         vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
570         vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
571         vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
572
573         vhost_user_iotlb_init(dev, vring_idx);
574 }
575
576 static void
577 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
578 {
579         struct vhost_virtqueue *vq;
580         int callfd;
581
582         if (vring_idx >= VHOST_MAX_VRING) {
583                 VHOST_LOG_CONFIG(ERR,
584                                 "Failed not init vring, out of bound (%d)\n",
585                                 vring_idx);
586                 return;
587         }
588
589         vq = dev->virtqueue[vring_idx];
590         if (!vq) {
591                 VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
592                                 vring_idx);
593                 return;
594         }
595
596         callfd = vq->callfd;
597         init_vring_queue(dev, vring_idx);
598         vq->callfd = callfd;
599 }
600
601 int
602 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
603 {
604         struct vhost_virtqueue *vq;
605         uint32_t i;
606
607         /* Also allocate holes, if any, up to requested vring index. */
608         for (i = 0; i <= vring_idx; i++) {
609                 if (dev->virtqueue[i])
610                         continue;
611
612                 vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
613                 if (vq == NULL) {
614                         VHOST_LOG_CONFIG(ERR,
615                                 "Failed to allocate memory for vring:%u.\n", i);
616                         return -1;
617                 }
618
619                 dev->virtqueue[i] = vq;
620                 init_vring_queue(dev, i);
621                 rte_spinlock_init(&vq->access_lock);
622                 vq->avail_wrap_counter = 1;
623                 vq->used_wrap_counter = 1;
624                 vq->signalled_used_valid = false;
625         }
626
627         dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
628
629         return 0;
630 }
631
632 /*
633  * Reset some variables in device structure, while keeping few
634  * others untouched, such as vid, ifname, nr_vring: they
635  * should be same unless the device is removed.
636  */
637 void
638 reset_device(struct virtio_net *dev)
639 {
640         uint32_t i;
641
642         dev->features = 0;
643         dev->protocol_features = 0;
644         dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
645
646         for (i = 0; i < dev->nr_vring; i++)
647                 reset_vring_queue(dev, i);
648 }
649
650 /*
651  * Invoked when there is a new vhost-user connection established (when
652  * there is a new virtio device being attached).
653  */
654 int
655 vhost_new_device(void)
656 {
657         struct virtio_net *dev;
658         int i;
659
660         pthread_mutex_lock(&vhost_dev_lock);
661         for (i = 0; i < MAX_VHOST_DEVICE; i++) {
662                 if (vhost_devices[i] == NULL)
663                         break;
664         }
665
666         if (i == MAX_VHOST_DEVICE) {
667                 VHOST_LOG_CONFIG(ERR,
668                         "Failed to find a free slot for new device.\n");
669                 pthread_mutex_unlock(&vhost_dev_lock);
670                 return -1;
671         }
672
673         dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
674         if (dev == NULL) {
675                 VHOST_LOG_CONFIG(ERR,
676                         "Failed to allocate memory for new dev.\n");
677                 pthread_mutex_unlock(&vhost_dev_lock);
678                 return -1;
679         }
680
681         vhost_devices[i] = dev;
682         pthread_mutex_unlock(&vhost_dev_lock);
683
684         dev->vid = i;
685         dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
686         dev->slave_req_fd = -1;
687         dev->postcopy_ufd = -1;
688         rte_spinlock_init(&dev->slave_req_lock);
689
690         return i;
691 }
692
693 void
694 vhost_destroy_device_notify(struct virtio_net *dev)
695 {
696         struct rte_vdpa_device *vdpa_dev;
697
698         if (dev->flags & VIRTIO_DEV_RUNNING) {
699                 vdpa_dev = dev->vdpa_dev;
700                 if (vdpa_dev)
701                         vdpa_dev->ops->dev_close(dev->vid);
702                 dev->flags &= ~VIRTIO_DEV_RUNNING;
703                 dev->notify_ops->destroy_device(dev->vid);
704         }
705 }
706
707 /*
708  * Invoked when there is the vhost-user connection is broken (when
709  * the virtio device is being detached).
710  */
711 void
712 vhost_destroy_device(int vid)
713 {
714         struct virtio_net *dev = get_device(vid);
715
716         if (dev == NULL)
717                 return;
718
719         vhost_destroy_device_notify(dev);
720
721         cleanup_device(dev, 1);
722         free_device(dev);
723
724         vhost_devices[vid] = NULL;
725 }
726
727 void
728 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
729 {
730         struct virtio_net *dev = get_device(vid);
731
732         if (dev == NULL)
733                 return;
734
735         dev->vdpa_dev = vdpa_dev;
736 }
737
738 void
739 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
740 {
741         struct virtio_net *dev;
742         unsigned int len;
743
744         dev = get_device(vid);
745         if (dev == NULL)
746                 return;
747
748         len = if_len > sizeof(dev->ifname) ?
749                 sizeof(dev->ifname) : if_len;
750
751         strncpy(dev->ifname, if_name, len);
752         dev->ifname[sizeof(dev->ifname) - 1] = '\0';
753 }
754
755 void
756 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags)
757 {
758         struct virtio_net *dev = get_device(vid);
759
760         if (dev == NULL)
761                 return;
762
763         if (enable)
764                 dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
765         else
766                 dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
767         if (!compliant_ol_flags)
768                 dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
769         else
770                 dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
771 }
772
773 void
774 vhost_enable_extbuf(int vid)
775 {
776         struct virtio_net *dev = get_device(vid);
777
778         if (dev == NULL)
779                 return;
780
781         dev->extbuf = 1;
782 }
783
784 void
785 vhost_enable_linearbuf(int vid)
786 {
787         struct virtio_net *dev = get_device(vid);
788
789         if (dev == NULL)
790                 return;
791
792         dev->linearbuf = 1;
793 }
794
795 int
796 rte_vhost_get_mtu(int vid, uint16_t *mtu)
797 {
798         struct virtio_net *dev = get_device(vid);
799
800         if (dev == NULL || mtu == NULL)
801                 return -ENODEV;
802
803         if (!(dev->flags & VIRTIO_DEV_READY))
804                 return -EAGAIN;
805
806         if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
807                 return -ENOTSUP;
808
809         *mtu = dev->mtu;
810
811         return 0;
812 }
813
814 int
815 rte_vhost_get_numa_node(int vid)
816 {
817 #ifdef RTE_LIBRTE_VHOST_NUMA
818         struct virtio_net *dev = get_device(vid);
819         int numa_node;
820         int ret;
821
822         if (dev == NULL || numa_available() != 0)
823                 return -1;
824
825         ret = get_mempolicy(&numa_node, NULL, 0, dev,
826                             MPOL_F_NODE | MPOL_F_ADDR);
827         if (ret < 0) {
828                 VHOST_LOG_CONFIG(ERR,
829                         "(%d) failed to query numa node: %s\n",
830                         vid, rte_strerror(errno));
831                 return -1;
832         }
833
834         return numa_node;
835 #else
836         RTE_SET_USED(vid);
837         return -1;
838 #endif
839 }
840
841 uint32_t
842 rte_vhost_get_queue_num(int vid)
843 {
844         struct virtio_net *dev = get_device(vid);
845
846         if (dev == NULL)
847                 return 0;
848
849         return dev->nr_vring / 2;
850 }
851
852 uint16_t
853 rte_vhost_get_vring_num(int vid)
854 {
855         struct virtio_net *dev = get_device(vid);
856
857         if (dev == NULL)
858                 return 0;
859
860         return dev->nr_vring;
861 }
862
863 int
864 rte_vhost_get_ifname(int vid, char *buf, size_t len)
865 {
866         struct virtio_net *dev = get_device(vid);
867
868         if (dev == NULL || buf == NULL)
869                 return -1;
870
871         len = RTE_MIN(len, sizeof(dev->ifname));
872
873         strncpy(buf, dev->ifname, len);
874         buf[len - 1] = '\0';
875
876         return 0;
877 }
878
879 int
880 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
881 {
882         struct virtio_net *dev;
883
884         dev = get_device(vid);
885         if (dev == NULL || features == NULL)
886                 return -1;
887
888         *features = dev->features;
889         return 0;
890 }
891
892 int
893 rte_vhost_get_negotiated_protocol_features(int vid,
894                                            uint64_t *protocol_features)
895 {
896         struct virtio_net *dev;
897
898         dev = get_device(vid);
899         if (dev == NULL || protocol_features == NULL)
900                 return -1;
901
902         *protocol_features = dev->protocol_features;
903         return 0;
904 }
905
906 int
907 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
908 {
909         struct virtio_net *dev;
910         struct rte_vhost_memory *m;
911         size_t size;
912
913         dev = get_device(vid);
914         if (dev == NULL || mem == NULL)
915                 return -1;
916
917         size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
918         m = malloc(sizeof(struct rte_vhost_memory) + size);
919         if (!m)
920                 return -1;
921
922         m->nregions = dev->mem->nregions;
923         memcpy(m->regions, dev->mem->regions, size);
924         *mem = m;
925
926         return 0;
927 }
928
929 int
930 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
931                           struct rte_vhost_vring *vring)
932 {
933         struct virtio_net *dev;
934         struct vhost_virtqueue *vq;
935
936         dev = get_device(vid);
937         if (dev == NULL || vring == NULL)
938                 return -1;
939
940         if (vring_idx >= VHOST_MAX_VRING)
941                 return -1;
942
943         vq = dev->virtqueue[vring_idx];
944         if (!vq)
945                 return -1;
946
947         if (vq_is_packed(dev)) {
948                 vring->desc_packed = vq->desc_packed;
949                 vring->driver_event = vq->driver_event;
950                 vring->device_event = vq->device_event;
951         } else {
952                 vring->desc = vq->desc;
953                 vring->avail = vq->avail;
954                 vring->used = vq->used;
955         }
956         vring->log_guest_addr  = vq->log_guest_addr;
957
958         vring->callfd  = vq->callfd;
959         vring->kickfd  = vq->kickfd;
960         vring->size    = vq->size;
961
962         return 0;
963 }
964
965 int
966 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
967                                   struct rte_vhost_ring_inflight *vring)
968 {
969         struct virtio_net *dev;
970         struct vhost_virtqueue *vq;
971
972         dev = get_device(vid);
973         if (unlikely(!dev))
974                 return -1;
975
976         if (vring_idx >= VHOST_MAX_VRING)
977                 return -1;
978
979         vq = dev->virtqueue[vring_idx];
980         if (unlikely(!vq))
981                 return -1;
982
983         if (vq_is_packed(dev)) {
984                 if (unlikely(!vq->inflight_packed))
985                         return -1;
986
987                 vring->inflight_packed = vq->inflight_packed;
988         } else {
989                 if (unlikely(!vq->inflight_split))
990                         return -1;
991
992                 vring->inflight_split = vq->inflight_split;
993         }
994
995         vring->resubmit_inflight = vq->resubmit_inflight;
996
997         return 0;
998 }
999
1000 int
1001 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
1002                                   uint16_t idx)
1003 {
1004         struct vhost_virtqueue *vq;
1005         struct virtio_net *dev;
1006
1007         dev = get_device(vid);
1008         if (unlikely(!dev))
1009                 return -1;
1010
1011         if (unlikely(!(dev->protocol_features &
1012             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1013                 return 0;
1014
1015         if (unlikely(vq_is_packed(dev)))
1016                 return -1;
1017
1018         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1019                 return -1;
1020
1021         vq = dev->virtqueue[vring_idx];
1022         if (unlikely(!vq))
1023                 return -1;
1024
1025         if (unlikely(!vq->inflight_split))
1026                 return -1;
1027
1028         if (unlikely(idx >= vq->size))
1029                 return -1;
1030
1031         vq->inflight_split->desc[idx].counter = vq->global_counter++;
1032         vq->inflight_split->desc[idx].inflight = 1;
1033         return 0;
1034 }
1035
1036 int
1037 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1038                                    uint16_t head, uint16_t last,
1039                                    uint16_t *inflight_entry)
1040 {
1041         struct rte_vhost_inflight_info_packed *inflight_info;
1042         struct virtio_net *dev;
1043         struct vhost_virtqueue *vq;
1044         struct vring_packed_desc *desc;
1045         uint16_t old_free_head, free_head;
1046
1047         dev = get_device(vid);
1048         if (unlikely(!dev))
1049                 return -1;
1050
1051         if (unlikely(!(dev->protocol_features &
1052             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1053                 return 0;
1054
1055         if (unlikely(!vq_is_packed(dev)))
1056                 return -1;
1057
1058         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1059                 return -1;
1060
1061         vq = dev->virtqueue[vring_idx];
1062         if (unlikely(!vq))
1063                 return -1;
1064
1065         inflight_info = vq->inflight_packed;
1066         if (unlikely(!inflight_info))
1067                 return -1;
1068
1069         if (unlikely(head >= vq->size))
1070                 return -1;
1071
1072         desc = vq->desc_packed;
1073         old_free_head = inflight_info->old_free_head;
1074         if (unlikely(old_free_head >= vq->size))
1075                 return -1;
1076
1077         free_head = old_free_head;
1078
1079         /* init header descriptor */
1080         inflight_info->desc[old_free_head].num = 0;
1081         inflight_info->desc[old_free_head].counter = vq->global_counter++;
1082         inflight_info->desc[old_free_head].inflight = 1;
1083
1084         /* save desc entry in flight entry */
1085         while (head != ((last + 1) % vq->size)) {
1086                 inflight_info->desc[old_free_head].num++;
1087                 inflight_info->desc[free_head].addr = desc[head].addr;
1088                 inflight_info->desc[free_head].len = desc[head].len;
1089                 inflight_info->desc[free_head].flags = desc[head].flags;
1090                 inflight_info->desc[free_head].id = desc[head].id;
1091
1092                 inflight_info->desc[old_free_head].last = free_head;
1093                 free_head = inflight_info->desc[free_head].next;
1094                 inflight_info->free_head = free_head;
1095                 head = (head + 1) % vq->size;
1096         }
1097
1098         inflight_info->old_free_head = free_head;
1099         *inflight_entry = old_free_head;
1100
1101         return 0;
1102 }
1103
1104 int
1105 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1106                                   uint16_t last_used_idx, uint16_t idx)
1107 {
1108         struct virtio_net *dev;
1109         struct vhost_virtqueue *vq;
1110
1111         dev = get_device(vid);
1112         if (unlikely(!dev))
1113                 return -1;
1114
1115         if (unlikely(!(dev->protocol_features &
1116             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1117                 return 0;
1118
1119         if (unlikely(vq_is_packed(dev)))
1120                 return -1;
1121
1122         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1123                 return -1;
1124
1125         vq = dev->virtqueue[vring_idx];
1126         if (unlikely(!vq))
1127                 return -1;
1128
1129         if (unlikely(!vq->inflight_split))
1130                 return -1;
1131
1132         if (unlikely(idx >= vq->size))
1133                 return -1;
1134
1135         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1136
1137         vq->inflight_split->desc[idx].inflight = 0;
1138
1139         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1140
1141         vq->inflight_split->used_idx = last_used_idx;
1142         return 0;
1143 }
1144
1145 int
1146 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1147                                    uint16_t head)
1148 {
1149         struct rte_vhost_inflight_info_packed *inflight_info;
1150         struct virtio_net *dev;
1151         struct vhost_virtqueue *vq;
1152
1153         dev = get_device(vid);
1154         if (unlikely(!dev))
1155                 return -1;
1156
1157         if (unlikely(!(dev->protocol_features &
1158             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1159                 return 0;
1160
1161         if (unlikely(!vq_is_packed(dev)))
1162                 return -1;
1163
1164         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1165                 return -1;
1166
1167         vq = dev->virtqueue[vring_idx];
1168         if (unlikely(!vq))
1169                 return -1;
1170
1171         inflight_info = vq->inflight_packed;
1172         if (unlikely(!inflight_info))
1173                 return -1;
1174
1175         if (unlikely(head >= vq->size))
1176                 return -1;
1177
1178         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1179
1180         inflight_info->desc[head].inflight = 0;
1181
1182         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1183
1184         inflight_info->old_free_head = inflight_info->free_head;
1185         inflight_info->old_used_idx = inflight_info->used_idx;
1186         inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1187
1188         return 0;
1189 }
1190
1191 int
1192 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1193                                      uint16_t idx)
1194 {
1195         struct virtio_net *dev;
1196         struct vhost_virtqueue *vq;
1197
1198         dev = get_device(vid);
1199         if (unlikely(!dev))
1200                 return -1;
1201
1202         if (unlikely(!(dev->protocol_features &
1203             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1204                 return 0;
1205
1206         if (unlikely(vq_is_packed(dev)))
1207                 return -1;
1208
1209         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1210                 return -1;
1211
1212         vq = dev->virtqueue[vring_idx];
1213         if (unlikely(!vq))
1214                 return -1;
1215
1216         if (unlikely(!vq->inflight_split))
1217                 return -1;
1218
1219         vq->inflight_split->last_inflight_io = idx;
1220         return 0;
1221 }
1222
1223 int
1224 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1225                                       uint16_t head)
1226 {
1227         struct rte_vhost_inflight_info_packed *inflight_info;
1228         struct virtio_net *dev;
1229         struct vhost_virtqueue *vq;
1230         uint16_t last;
1231
1232         dev = get_device(vid);
1233         if (unlikely(!dev))
1234                 return -1;
1235
1236         if (unlikely(!(dev->protocol_features &
1237             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1238                 return 0;
1239
1240         if (unlikely(!vq_is_packed(dev)))
1241                 return -1;
1242
1243         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1244                 return -1;
1245
1246         vq = dev->virtqueue[vring_idx];
1247         if (unlikely(!vq))
1248                 return -1;
1249
1250         inflight_info = vq->inflight_packed;
1251         if (unlikely(!inflight_info))
1252                 return -1;
1253
1254         if (unlikely(head >= vq->size))
1255                 return -1;
1256
1257         last = inflight_info->desc[head].last;
1258         if (unlikely(last >= vq->size))
1259                 return -1;
1260
1261         inflight_info->desc[last].next = inflight_info->free_head;
1262         inflight_info->free_head = head;
1263         inflight_info->used_idx += inflight_info->desc[head].num;
1264         if (inflight_info->used_idx >= inflight_info->desc_num) {
1265                 inflight_info->used_idx -= inflight_info->desc_num;
1266                 inflight_info->used_wrap_counter =
1267                         !inflight_info->used_wrap_counter;
1268         }
1269
1270         return 0;
1271 }
1272
1273 int
1274 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1275 {
1276         struct virtio_net *dev;
1277         struct vhost_virtqueue *vq;
1278
1279         dev = get_device(vid);
1280         if (!dev)
1281                 return -1;
1282
1283         if (vring_idx >= VHOST_MAX_VRING)
1284                 return -1;
1285
1286         vq = dev->virtqueue[vring_idx];
1287         if (!vq)
1288                 return -1;
1289
1290         if (vq_is_packed(dev))
1291                 vhost_vring_call_packed(dev, vq);
1292         else
1293                 vhost_vring_call_split(dev, vq);
1294
1295         return 0;
1296 }
1297
1298 uint16_t
1299 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1300 {
1301         struct virtio_net *dev;
1302         struct vhost_virtqueue *vq;
1303         uint16_t ret = 0;
1304
1305         dev = get_device(vid);
1306         if (!dev)
1307                 return 0;
1308
1309         if (queue_id >= VHOST_MAX_VRING)
1310                 return 0;
1311
1312         vq = dev->virtqueue[queue_id];
1313         if (!vq)
1314                 return 0;
1315
1316         rte_spinlock_lock(&vq->access_lock);
1317
1318         if (unlikely(!vq->enabled || vq->avail == NULL))
1319                 goto out;
1320
1321         ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1322
1323 out:
1324         rte_spinlock_unlock(&vq->access_lock);
1325         return ret;
1326 }
1327
1328 static inline int
1329 vhost_enable_notify_split(struct virtio_net *dev,
1330                 struct vhost_virtqueue *vq, int enable)
1331 {
1332         if (vq->used == NULL)
1333                 return -1;
1334
1335         if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1336                 if (enable)
1337                         vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1338                 else
1339                         vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1340         } else {
1341                 if (enable)
1342                         vhost_avail_event(vq) = vq->last_avail_idx;
1343         }
1344         return 0;
1345 }
1346
1347 static inline int
1348 vhost_enable_notify_packed(struct virtio_net *dev,
1349                 struct vhost_virtqueue *vq, int enable)
1350 {
1351         uint16_t flags;
1352
1353         if (vq->device_event == NULL)
1354                 return -1;
1355
1356         if (!enable) {
1357                 vq->device_event->flags = VRING_EVENT_F_DISABLE;
1358                 return 0;
1359         }
1360
1361         flags = VRING_EVENT_F_ENABLE;
1362         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1363                 flags = VRING_EVENT_F_DESC;
1364                 vq->device_event->off_wrap = vq->last_avail_idx |
1365                         vq->avail_wrap_counter << 15;
1366         }
1367
1368         rte_atomic_thread_fence(__ATOMIC_RELEASE);
1369
1370         vq->device_event->flags = flags;
1371         return 0;
1372 }
1373
1374 int
1375 vhost_enable_guest_notification(struct virtio_net *dev,
1376                 struct vhost_virtqueue *vq, int enable)
1377 {
1378         /*
1379          * If the virtqueue is not ready yet, it will be applied
1380          * when it will become ready.
1381          */
1382         if (!vq->ready)
1383                 return 0;
1384
1385         if (vq_is_packed(dev))
1386                 return vhost_enable_notify_packed(dev, vq, enable);
1387         else
1388                 return vhost_enable_notify_split(dev, vq, enable);
1389 }
1390
1391 int
1392 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1393 {
1394         struct virtio_net *dev = get_device(vid);
1395         struct vhost_virtqueue *vq;
1396         int ret;
1397
1398         if (!dev)
1399                 return -1;
1400
1401         if (queue_id >= VHOST_MAX_VRING)
1402                 return -1;
1403
1404         vq = dev->virtqueue[queue_id];
1405         if (!vq)
1406                 return -1;
1407
1408         rte_spinlock_lock(&vq->access_lock);
1409
1410         vq->notif_enable = enable;
1411         ret = vhost_enable_guest_notification(dev, vq, enable);
1412
1413         rte_spinlock_unlock(&vq->access_lock);
1414
1415         return ret;
1416 }
1417
1418 void
1419 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1420 {
1421         struct virtio_net *dev = get_device(vid);
1422
1423         if (dev == NULL)
1424                 return;
1425
1426         vhost_log_write(dev, addr, len);
1427 }
1428
1429 void
1430 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1431                          uint64_t offset, uint64_t len)
1432 {
1433         struct virtio_net *dev;
1434         struct vhost_virtqueue *vq;
1435
1436         dev = get_device(vid);
1437         if (dev == NULL)
1438                 return;
1439
1440         if (vring_idx >= VHOST_MAX_VRING)
1441                 return;
1442         vq = dev->virtqueue[vring_idx];
1443         if (!vq)
1444                 return;
1445
1446         vhost_log_used_vring(dev, vq, offset, len);
1447 }
1448
1449 uint32_t
1450 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1451 {
1452         struct virtio_net *dev;
1453         struct vhost_virtqueue *vq;
1454         uint32_t ret = 0;
1455
1456         dev = get_device(vid);
1457         if (dev == NULL)
1458                 return 0;
1459
1460         if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1461                 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
1462                         dev->vid, __func__, qid);
1463                 return 0;
1464         }
1465
1466         vq = dev->virtqueue[qid];
1467         if (vq == NULL)
1468                 return 0;
1469
1470         rte_spinlock_lock(&vq->access_lock);
1471
1472         if (unlikely(!vq->enabled || vq->avail == NULL))
1473                 goto out;
1474
1475         ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1476
1477 out:
1478         rte_spinlock_unlock(&vq->access_lock);
1479         return ret;
1480 }
1481
1482 struct rte_vdpa_device *
1483 rte_vhost_get_vdpa_device(int vid)
1484 {
1485         struct virtio_net *dev = get_device(vid);
1486
1487         if (dev == NULL)
1488                 return NULL;
1489
1490         return dev->vdpa_dev;
1491 }
1492
1493 int rte_vhost_get_log_base(int vid, uint64_t *log_base,
1494                 uint64_t *log_size)
1495 {
1496         struct virtio_net *dev = get_device(vid);
1497
1498         if (dev == NULL || log_base == NULL || log_size == NULL)
1499                 return -1;
1500
1501         *log_base = dev->log_base;
1502         *log_size = dev->log_size;
1503
1504         return 0;
1505 }
1506
1507 int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1508                 uint16_t *last_avail_idx, uint16_t *last_used_idx)
1509 {
1510         struct vhost_virtqueue *vq;
1511         struct virtio_net *dev = get_device(vid);
1512
1513         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1514                 return -1;
1515
1516         if (queue_id >= VHOST_MAX_VRING)
1517                 return -1;
1518
1519         vq = dev->virtqueue[queue_id];
1520         if (!vq)
1521                 return -1;
1522
1523         if (vq_is_packed(dev)) {
1524                 *last_avail_idx = (vq->avail_wrap_counter << 15) |
1525                                   vq->last_avail_idx;
1526                 *last_used_idx = (vq->used_wrap_counter << 15) |
1527                                  vq->last_used_idx;
1528         } else {
1529                 *last_avail_idx = vq->last_avail_idx;
1530                 *last_used_idx = vq->last_used_idx;
1531         }
1532
1533         return 0;
1534 }
1535
1536 int rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1537                 uint16_t last_avail_idx, uint16_t last_used_idx)
1538 {
1539         struct vhost_virtqueue *vq;
1540         struct virtio_net *dev = get_device(vid);
1541
1542         if (!dev)
1543                 return -1;
1544
1545         if (queue_id >= VHOST_MAX_VRING)
1546                 return -1;
1547
1548         vq = dev->virtqueue[queue_id];
1549         if (!vq)
1550                 return -1;
1551
1552         if (vq_is_packed(dev)) {
1553                 vq->last_avail_idx = last_avail_idx & 0x7fff;
1554                 vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1555                 vq->last_used_idx = last_used_idx & 0x7fff;
1556                 vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1557         } else {
1558                 vq->last_avail_idx = last_avail_idx;
1559                 vq->last_used_idx = last_used_idx;
1560         }
1561
1562         return 0;
1563 }
1564
1565 int
1566 rte_vhost_get_vring_base_from_inflight(int vid,
1567                                        uint16_t queue_id,
1568                                        uint16_t *last_avail_idx,
1569                                        uint16_t *last_used_idx)
1570 {
1571         struct rte_vhost_inflight_info_packed *inflight_info;
1572         struct vhost_virtqueue *vq;
1573         struct virtio_net *dev = get_device(vid);
1574
1575         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1576                 return -1;
1577
1578         if (queue_id >= VHOST_MAX_VRING)
1579                 return -1;
1580
1581         vq = dev->virtqueue[queue_id];
1582         if (!vq)
1583                 return -1;
1584
1585         if (!vq_is_packed(dev))
1586                 return -1;
1587
1588         inflight_info = vq->inflight_packed;
1589         if (!inflight_info)
1590                 return -1;
1591
1592         *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1593                           inflight_info->old_used_idx;
1594         *last_used_idx = *last_avail_idx;
1595
1596         return 0;
1597 }
1598
1599 int rte_vhost_extern_callback_register(int vid,
1600                 struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1601 {
1602         struct virtio_net *dev = get_device(vid);
1603
1604         if (dev == NULL || ops == NULL)
1605                 return -1;
1606
1607         dev->extern_ops = *ops;
1608         dev->extern_data = ctx;
1609         return 0;
1610 }
1611
1612 int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
1613                                         uint32_t features,
1614                                         struct rte_vhost_async_channel_ops *ops)
1615 {
1616         struct vhost_virtqueue *vq;
1617         struct virtio_net *dev = get_device(vid);
1618         struct rte_vhost_async_features f;
1619         int node;
1620
1621         if (dev == NULL || ops == NULL)
1622                 return -1;
1623
1624         f.intval = features;
1625
1626         if (queue_id >= VHOST_MAX_VRING)
1627                 return -1;
1628
1629         vq = dev->virtqueue[queue_id];
1630
1631         if (unlikely(vq == NULL || !dev->async_copy))
1632                 return -1;
1633
1634         if (unlikely(!f.async_inorder)) {
1635                 VHOST_LOG_CONFIG(ERR,
1636                         "async copy is not supported on non-inorder mode "
1637                         "(vid %d, qid: %d)\n", vid, queue_id);
1638                 return -1;
1639         }
1640
1641         if (unlikely(ops->check_completed_copies == NULL ||
1642                 ops->transfer_data == NULL))
1643                 return -1;
1644
1645         rte_spinlock_lock(&vq->access_lock);
1646
1647         if (unlikely(vq->async_registered)) {
1648                 VHOST_LOG_CONFIG(ERR,
1649                         "async register failed: channel already registered "
1650                         "(vid %d, qid: %d)\n", vid, queue_id);
1651                 goto reg_out;
1652         }
1653
1654 #ifdef RTE_LIBRTE_VHOST_NUMA
1655         if (get_mempolicy(&node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
1656                 VHOST_LOG_CONFIG(ERR,
1657                         "unable to get numa information in async register. "
1658                         "allocating async buffer memory on the caller thread node\n");
1659                 node = SOCKET_ID_ANY;
1660         }
1661 #else
1662         node = SOCKET_ID_ANY;
1663 #endif
1664
1665         vq->async_pkts_info = rte_malloc_socket(NULL,
1666                         vq->size * sizeof(struct async_inflight_info),
1667                         RTE_CACHE_LINE_SIZE, node);
1668         if (!vq->async_pkts_info) {
1669                 vhost_free_async_mem(vq);
1670                 VHOST_LOG_CONFIG(ERR,
1671                         "async register failed: cannot allocate memory for async_pkts_info "
1672                         "(vid %d, qid: %d)\n", vid, queue_id);
1673                 goto reg_out;
1674         }
1675
1676         vq->it_pool = rte_malloc_socket(NULL,
1677                         VHOST_MAX_ASYNC_IT * sizeof(struct rte_vhost_iov_iter),
1678                         RTE_CACHE_LINE_SIZE, node);
1679         if (!vq->it_pool) {
1680                 vhost_free_async_mem(vq);
1681                 VHOST_LOG_CONFIG(ERR,
1682                         "async register failed: cannot allocate memory for it_pool "
1683                         "(vid %d, qid: %d)\n", vid, queue_id);
1684                 goto reg_out;
1685         }
1686
1687         vq->vec_pool = rte_malloc_socket(NULL,
1688                         VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
1689                         RTE_CACHE_LINE_SIZE, node);
1690         if (!vq->vec_pool) {
1691                 vhost_free_async_mem(vq);
1692                 VHOST_LOG_CONFIG(ERR,
1693                         "async register failed: cannot allocate memory for vec_pool "
1694                         "(vid %d, qid: %d)\n", vid, queue_id);
1695                 goto reg_out;
1696         }
1697
1698         if (vq_is_packed(dev)) {
1699                 vq->async_buffers_packed = rte_malloc_socket(NULL,
1700                         vq->size * sizeof(struct vring_used_elem_packed),
1701                         RTE_CACHE_LINE_SIZE, node);
1702                 if (!vq->async_buffers_packed) {
1703                         vhost_free_async_mem(vq);
1704                         VHOST_LOG_CONFIG(ERR,
1705                                 "async register failed: cannot allocate memory for async buffers "
1706                                 "(vid %d, qid: %d)\n", vid, queue_id);
1707                         goto reg_out;
1708                 }
1709         } else {
1710                 vq->async_descs_split = rte_malloc_socket(NULL,
1711                         vq->size * sizeof(struct vring_used_elem),
1712                         RTE_CACHE_LINE_SIZE, node);
1713                 if (!vq->async_descs_split) {
1714                         vhost_free_async_mem(vq);
1715                         VHOST_LOG_CONFIG(ERR,
1716                                 "async register failed: cannot allocate memory for async descs "
1717                                 "(vid %d, qid: %d)\n", vid, queue_id);
1718                         goto reg_out;
1719                 }
1720         }
1721
1722         vq->async_ops.check_completed_copies = ops->check_completed_copies;
1723         vq->async_ops.transfer_data = ops->transfer_data;
1724
1725         vq->async_inorder = f.async_inorder;
1726         vq->async_threshold = f.async_threshold;
1727
1728         vq->async_registered = true;
1729
1730 reg_out:
1731         rte_spinlock_unlock(&vq->access_lock);
1732
1733         return 0;
1734 }
1735
1736 int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1737 {
1738         struct vhost_virtqueue *vq;
1739         struct virtio_net *dev = get_device(vid);
1740         int ret = -1;
1741
1742         if (dev == NULL)
1743                 return ret;
1744
1745         if (queue_id >= VHOST_MAX_VRING)
1746                 return ret;
1747
1748         vq = dev->virtqueue[queue_id];
1749
1750         if (vq == NULL)
1751                 return ret;
1752
1753         ret = 0;
1754
1755         if (!vq->async_registered)
1756                 return ret;
1757
1758         if (!rte_spinlock_trylock(&vq->access_lock)) {
1759                 VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1760                         "virt queue busy.\n");
1761                 return -1;
1762         }
1763
1764         if (vq->async_pkts_inflight_n) {
1765                 VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1766                         "async inflight packets must be completed before unregistration.\n");
1767                 ret = -1;
1768                 goto out;
1769         }
1770
1771         vhost_free_async_mem(vq);
1772
1773         vq->async_ops.transfer_data = NULL;
1774         vq->async_ops.check_completed_copies = NULL;
1775         vq->async_registered = false;
1776
1777 out:
1778         rte_spinlock_unlock(&vq->access_lock);
1779
1780         return ret;
1781 }
1782
1783 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
1784 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);