log: register with standardized names
[dpdk.git] / lib / vhost / vhost.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #ifdef RTE_LIBRTE_VHOST_NUMA
11 #include <numa.h>
12 #include <numaif.h>
13 #endif
14
15 #include <rte_errno.h>
16 #include <rte_ethdev.h>
17 #include <rte_log.h>
18 #include <rte_string_fns.h>
19 #include <rte_memory.h>
20 #include <rte_malloc.h>
21 #include <rte_vhost.h>
22 #include <rte_rwlock.h>
23
24 #include "iotlb.h"
25 #include "vhost.h"
26 #include "vhost_user.h"
27
28 struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
29 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
30
31 /* Called with iotlb_lock read-locked */
32 uint64_t
33 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
34                     uint64_t iova, uint64_t *size, uint8_t perm)
35 {
36         uint64_t vva, tmp_size;
37
38         if (unlikely(!*size))
39                 return 0;
40
41         tmp_size = *size;
42
43         vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
44         if (tmp_size == *size)
45                 return vva;
46
47         iova += tmp_size;
48
49         if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
50                 /*
51                  * iotlb_lock is read-locked for a full burst,
52                  * but it only protects the iotlb cache.
53                  * In case of IOTLB miss, we might block on the socket,
54                  * which could cause a deadlock with QEMU if an IOTLB update
55                  * is being handled. We can safely unlock here to avoid it.
56                  */
57                 vhost_user_iotlb_rd_unlock(vq);
58
59                 vhost_user_iotlb_pending_insert(vq, iova, perm);
60                 if (vhost_user_iotlb_miss(dev, iova, perm)) {
61                         VHOST_LOG_CONFIG(ERR,
62                                 "IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
63                                 iova);
64                         vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
65                 }
66
67                 vhost_user_iotlb_rd_lock(vq);
68         }
69
70         return 0;
71 }
72
73 #define VHOST_LOG_PAGE  4096
74
75 /*
76  * Atomically set a bit in memory.
77  */
78 static __rte_always_inline void
79 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
80 {
81 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
82         /*
83          * __sync_ built-ins are deprecated, but __atomic_ ones
84          * are sub-optimized in older GCC versions.
85          */
86         __sync_fetch_and_or_1(addr, (1U << nr));
87 #else
88         __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
89 #endif
90 }
91
92 static __rte_always_inline void
93 vhost_log_page(uint8_t *log_base, uint64_t page)
94 {
95         vhost_set_bit(page % 8, &log_base[page / 8]);
96 }
97
98 void
99 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
100 {
101         uint64_t page;
102
103         if (unlikely(!dev->log_base || !len))
104                 return;
105
106         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
107                 return;
108
109         /* To make sure guest memory updates are committed before logging */
110         rte_atomic_thread_fence(__ATOMIC_RELEASE);
111
112         page = addr / VHOST_LOG_PAGE;
113         while (page * VHOST_LOG_PAGE < addr + len) {
114                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
115                 page += 1;
116         }
117 }
118
119 void
120 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
121                              uint64_t iova, uint64_t len)
122 {
123         uint64_t hva, gpa, map_len;
124         map_len = len;
125
126         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
127         if (map_len != len) {
128                 VHOST_LOG_DATA(ERR,
129                         "Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
130                         iova);
131                 return;
132         }
133
134         gpa = hva_to_gpa(dev, hva, len);
135         if (gpa)
136                 __vhost_log_write(dev, gpa, len);
137 }
138
139 void
140 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
141 {
142         unsigned long *log_base;
143         int i;
144
145         if (unlikely(!dev->log_base))
146                 return;
147
148         /* No cache, nothing to sync */
149         if (unlikely(!vq->log_cache))
150                 return;
151
152         rte_atomic_thread_fence(__ATOMIC_RELEASE);
153
154         log_base = (unsigned long *)(uintptr_t)dev->log_base;
155
156         for (i = 0; i < vq->log_cache_nb_elem; i++) {
157                 struct log_cache_entry *elem = vq->log_cache + i;
158
159 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
160                 /*
161                  * '__sync' builtins are deprecated, but '__atomic' ones
162                  * are sub-optimized in older GCC versions.
163                  */
164                 __sync_fetch_and_or(log_base + elem->offset, elem->val);
165 #else
166                 __atomic_fetch_or(log_base + elem->offset, elem->val,
167                                 __ATOMIC_RELAXED);
168 #endif
169         }
170
171         rte_atomic_thread_fence(__ATOMIC_RELEASE);
172
173         vq->log_cache_nb_elem = 0;
174 }
175
176 static __rte_always_inline void
177 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
178                         uint64_t page)
179 {
180         uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
181         uint32_t offset = page / (sizeof(unsigned long) << 3);
182         int i;
183
184         if (unlikely(!vq->log_cache)) {
185                 /* No logging cache allocated, write dirty log map directly */
186                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
187                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
188
189                 return;
190         }
191
192         for (i = 0; i < vq->log_cache_nb_elem; i++) {
193                 struct log_cache_entry *elem = vq->log_cache + i;
194
195                 if (elem->offset == offset) {
196                         elem->val |= (1UL << bit_nr);
197                         return;
198                 }
199         }
200
201         if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
202                 /*
203                  * No more room for a new log cache entry,
204                  * so write the dirty log map directly.
205                  */
206                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
207                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
208
209                 return;
210         }
211
212         vq->log_cache[i].offset = offset;
213         vq->log_cache[i].val = (1UL << bit_nr);
214         vq->log_cache_nb_elem++;
215 }
216
217 void
218 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
219                         uint64_t addr, uint64_t len)
220 {
221         uint64_t page;
222
223         if (unlikely(!dev->log_base || !len))
224                 return;
225
226         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
227                 return;
228
229         page = addr / VHOST_LOG_PAGE;
230         while (page * VHOST_LOG_PAGE < addr + len) {
231                 vhost_log_cache_page(dev, vq, page);
232                 page += 1;
233         }
234 }
235
236 void
237 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
238                              uint64_t iova, uint64_t len)
239 {
240         uint64_t hva, gpa, map_len;
241         map_len = len;
242
243         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
244         if (map_len != len) {
245                 VHOST_LOG_DATA(ERR,
246                         "Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
247                         iova);
248                 return;
249         }
250
251         gpa = hva_to_gpa(dev, hva, len);
252         if (gpa)
253                 __vhost_log_cache_write(dev, vq, gpa, len);
254 }
255
256 void *
257 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
258                 uint64_t desc_addr, uint64_t desc_len)
259 {
260         void *idesc;
261         uint64_t src, dst;
262         uint64_t len, remain = desc_len;
263
264         idesc = rte_malloc(__func__, desc_len, 0);
265         if (unlikely(!idesc))
266                 return NULL;
267
268         dst = (uint64_t)(uintptr_t)idesc;
269
270         while (remain) {
271                 len = remain;
272                 src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
273                                 VHOST_ACCESS_RO);
274                 if (unlikely(!src || !len)) {
275                         rte_free(idesc);
276                         return NULL;
277                 }
278
279                 rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
280
281                 remain -= len;
282                 dst += len;
283                 desc_addr += len;
284         }
285
286         return idesc;
287 }
288
289 void
290 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
291 {
292         if ((vq->callfd >= 0) && (destroy != 0))
293                 close(vq->callfd);
294         if (vq->kickfd >= 0)
295                 close(vq->kickfd);
296 }
297
298 void
299 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
300 {
301         if (!(dev->protocol_features &
302             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
303                 return;
304
305         if (vq_is_packed(dev)) {
306                 if (vq->inflight_packed)
307                         vq->inflight_packed = NULL;
308         } else {
309                 if (vq->inflight_split)
310                         vq->inflight_split = NULL;
311         }
312
313         if (vq->resubmit_inflight) {
314                 if (vq->resubmit_inflight->resubmit_list) {
315                         free(vq->resubmit_inflight->resubmit_list);
316                         vq->resubmit_inflight->resubmit_list = NULL;
317                 }
318                 free(vq->resubmit_inflight);
319                 vq->resubmit_inflight = NULL;
320         }
321 }
322
323 /*
324  * Unmap any memory, close any file descriptors and
325  * free any memory owned by a device.
326  */
327 void
328 cleanup_device(struct virtio_net *dev, int destroy)
329 {
330         uint32_t i;
331
332         vhost_backend_cleanup(dev);
333
334         for (i = 0; i < dev->nr_vring; i++) {
335                 cleanup_vq(dev->virtqueue[i], destroy);
336                 cleanup_vq_inflight(dev, dev->virtqueue[i]);
337         }
338 }
339
340 static void
341 vhost_free_async_mem(struct vhost_virtqueue *vq)
342 {
343         rte_free(vq->async_pkts_info);
344
345         rte_free(vq->async_buffers_packed);
346         vq->async_buffers_packed = NULL;
347         rte_free(vq->async_descs_split);
348         vq->async_descs_split = NULL;
349
350         rte_free(vq->it_pool);
351         rte_free(vq->vec_pool);
352
353         vq->async_pkts_info = NULL;
354         vq->it_pool = NULL;
355         vq->vec_pool = NULL;
356 }
357
358 void
359 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
360 {
361         if (vq_is_packed(dev))
362                 rte_free(vq->shadow_used_packed);
363         else
364                 rte_free(vq->shadow_used_split);
365
366         vhost_free_async_mem(vq);
367         rte_free(vq->batch_copy_elems);
368         if (vq->iotlb_pool)
369                 rte_mempool_free(vq->iotlb_pool);
370         rte_free(vq->log_cache);
371         rte_free(vq);
372 }
373
374 /*
375  * Release virtqueues and device memory.
376  */
377 static void
378 free_device(struct virtio_net *dev)
379 {
380         uint32_t i;
381
382         for (i = 0; i < dev->nr_vring; i++)
383                 free_vq(dev, dev->virtqueue[i]);
384
385         rte_free(dev);
386 }
387
388 static __rte_always_inline int
389 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
390 {
391         if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
392                 return 0;
393
394         vq->log_guest_addr = translate_log_addr(dev, vq,
395                                                 vq->ring_addrs.log_guest_addr);
396         if (vq->log_guest_addr == 0)
397                 return -1;
398
399         return 0;
400 }
401
402 /*
403  * Converts vring log address to GPA
404  * If IOMMU is enabled, the log address is IOVA
405  * If IOMMU not enabled, the log address is already GPA
406  *
407  * Caller should have iotlb_lock read-locked
408  */
409 uint64_t
410 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
411                 uint64_t log_addr)
412 {
413         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
414                 const uint64_t exp_size = sizeof(uint64_t);
415                 uint64_t hva, gpa;
416                 uint64_t size = exp_size;
417
418                 hva = vhost_iova_to_vva(dev, vq, log_addr,
419                                         &size, VHOST_ACCESS_RW);
420
421                 if (size != exp_size)
422                         return 0;
423
424                 gpa = hva_to_gpa(dev, hva, exp_size);
425                 if (!gpa) {
426                         VHOST_LOG_CONFIG(ERR,
427                                 "VQ: Failed to find GPA for log_addr: 0x%"
428                                 PRIx64 " hva: 0x%" PRIx64 "\n",
429                                 log_addr, hva);
430                         return 0;
431                 }
432                 return gpa;
433
434         } else
435                 return log_addr;
436 }
437
438 /* Caller should have iotlb_lock read-locked */
439 static int
440 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
441 {
442         uint64_t req_size, size;
443
444         req_size = sizeof(struct vring_desc) * vq->size;
445         size = req_size;
446         vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
447                                                 vq->ring_addrs.desc_user_addr,
448                                                 &size, VHOST_ACCESS_RW);
449         if (!vq->desc || size != req_size)
450                 return -1;
451
452         req_size = sizeof(struct vring_avail);
453         req_size += sizeof(uint16_t) * vq->size;
454         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
455                 req_size += sizeof(uint16_t);
456         size = req_size;
457         vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
458                                                 vq->ring_addrs.avail_user_addr,
459                                                 &size, VHOST_ACCESS_RW);
460         if (!vq->avail || size != req_size)
461                 return -1;
462
463         req_size = sizeof(struct vring_used);
464         req_size += sizeof(struct vring_used_elem) * vq->size;
465         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
466                 req_size += sizeof(uint16_t);
467         size = req_size;
468         vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
469                                                 vq->ring_addrs.used_user_addr,
470                                                 &size, VHOST_ACCESS_RW);
471         if (!vq->used || size != req_size)
472                 return -1;
473
474         return 0;
475 }
476
477 /* Caller should have iotlb_lock read-locked */
478 static int
479 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
480 {
481         uint64_t req_size, size;
482
483         req_size = sizeof(struct vring_packed_desc) * vq->size;
484         size = req_size;
485         vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
486                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
487                                 &size, VHOST_ACCESS_RW);
488         if (!vq->desc_packed || size != req_size)
489                 return -1;
490
491         req_size = sizeof(struct vring_packed_desc_event);
492         size = req_size;
493         vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
494                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
495                                 &size, VHOST_ACCESS_RW);
496         if (!vq->driver_event || size != req_size)
497                 return -1;
498
499         req_size = sizeof(struct vring_packed_desc_event);
500         size = req_size;
501         vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
502                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
503                                 &size, VHOST_ACCESS_RW);
504         if (!vq->device_event || size != req_size)
505                 return -1;
506
507         return 0;
508 }
509
510 int
511 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
512 {
513
514         if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
515                 return -1;
516
517         if (vq_is_packed(dev)) {
518                 if (vring_translate_packed(dev, vq) < 0)
519                         return -1;
520         } else {
521                 if (vring_translate_split(dev, vq) < 0)
522                         return -1;
523         }
524
525         if (log_translate(dev, vq) < 0)
526                 return -1;
527
528         vq->access_ok = true;
529
530         return 0;
531 }
532
533 void
534 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
535 {
536         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
537                 vhost_user_iotlb_wr_lock(vq);
538
539         vq->access_ok = false;
540         vq->desc = NULL;
541         vq->avail = NULL;
542         vq->used = NULL;
543         vq->log_guest_addr = 0;
544
545         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
546                 vhost_user_iotlb_wr_unlock(vq);
547 }
548
549 static void
550 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
551 {
552         struct vhost_virtqueue *vq;
553
554         if (vring_idx >= VHOST_MAX_VRING) {
555                 VHOST_LOG_CONFIG(ERR,
556                                 "Failed not init vring, out of bound (%d)\n",
557                                 vring_idx);
558                 return;
559         }
560
561         vq = dev->virtqueue[vring_idx];
562         if (!vq) {
563                 VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
564                                 vring_idx);
565                 return;
566         }
567
568         memset(vq, 0, sizeof(struct vhost_virtqueue));
569
570         vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
571         vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
572         vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
573 }
574
575 static void
576 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
577 {
578         struct vhost_virtqueue *vq;
579         int callfd;
580
581         if (vring_idx >= VHOST_MAX_VRING) {
582                 VHOST_LOG_CONFIG(ERR,
583                                 "Failed not init vring, out of bound (%d)\n",
584                                 vring_idx);
585                 return;
586         }
587
588         vq = dev->virtqueue[vring_idx];
589         if (!vq) {
590                 VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
591                                 vring_idx);
592                 return;
593         }
594
595         callfd = vq->callfd;
596         init_vring_queue(dev, vring_idx);
597         vq->callfd = callfd;
598 }
599
600 int
601 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
602 {
603         struct vhost_virtqueue *vq;
604         uint32_t i;
605
606         /* Also allocate holes, if any, up to requested vring index. */
607         for (i = 0; i <= vring_idx; i++) {
608                 if (dev->virtqueue[i])
609                         continue;
610
611                 vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
612                 if (vq == NULL) {
613                         VHOST_LOG_CONFIG(ERR,
614                                 "Failed to allocate memory for vring:%u.\n", i);
615                         return -1;
616                 }
617
618                 dev->virtqueue[i] = vq;
619                 init_vring_queue(dev, i);
620                 rte_spinlock_init(&vq->access_lock);
621                 vq->avail_wrap_counter = 1;
622                 vq->used_wrap_counter = 1;
623                 vq->signalled_used_valid = false;
624         }
625
626         dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
627
628         return 0;
629 }
630
631 /*
632  * Reset some variables in device structure, while keeping few
633  * others untouched, such as vid, ifname, nr_vring: they
634  * should be same unless the device is removed.
635  */
636 void
637 reset_device(struct virtio_net *dev)
638 {
639         uint32_t i;
640
641         dev->features = 0;
642         dev->protocol_features = 0;
643         dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
644
645         for (i = 0; i < dev->nr_vring; i++)
646                 reset_vring_queue(dev, i);
647 }
648
649 /*
650  * Invoked when there is a new vhost-user connection established (when
651  * there is a new virtio device being attached).
652  */
653 int
654 vhost_new_device(void)
655 {
656         struct virtio_net *dev;
657         int i;
658
659         pthread_mutex_lock(&vhost_dev_lock);
660         for (i = 0; i < MAX_VHOST_DEVICE; i++) {
661                 if (vhost_devices[i] == NULL)
662                         break;
663         }
664
665         if (i == MAX_VHOST_DEVICE) {
666                 VHOST_LOG_CONFIG(ERR,
667                         "Failed to find a free slot for new device.\n");
668                 pthread_mutex_unlock(&vhost_dev_lock);
669                 return -1;
670         }
671
672         dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
673         if (dev == NULL) {
674                 VHOST_LOG_CONFIG(ERR,
675                         "Failed to allocate memory for new dev.\n");
676                 pthread_mutex_unlock(&vhost_dev_lock);
677                 return -1;
678         }
679
680         vhost_devices[i] = dev;
681         pthread_mutex_unlock(&vhost_dev_lock);
682
683         dev->vid = i;
684         dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
685         dev->slave_req_fd = -1;
686         dev->postcopy_ufd = -1;
687         rte_spinlock_init(&dev->slave_req_lock);
688
689         return i;
690 }
691
692 void
693 vhost_destroy_device_notify(struct virtio_net *dev)
694 {
695         struct rte_vdpa_device *vdpa_dev;
696
697         if (dev->flags & VIRTIO_DEV_RUNNING) {
698                 vdpa_dev = dev->vdpa_dev;
699                 if (vdpa_dev)
700                         vdpa_dev->ops->dev_close(dev->vid);
701                 dev->flags &= ~VIRTIO_DEV_RUNNING;
702                 dev->notify_ops->destroy_device(dev->vid);
703         }
704 }
705
706 /*
707  * Invoked when there is the vhost-user connection is broken (when
708  * the virtio device is being detached).
709  */
710 void
711 vhost_destroy_device(int vid)
712 {
713         struct virtio_net *dev = get_device(vid);
714
715         if (dev == NULL)
716                 return;
717
718         vhost_destroy_device_notify(dev);
719
720         cleanup_device(dev, 1);
721         free_device(dev);
722
723         vhost_devices[vid] = NULL;
724 }
725
726 void
727 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
728 {
729         struct virtio_net *dev = get_device(vid);
730
731         if (dev == NULL)
732                 return;
733
734         dev->vdpa_dev = vdpa_dev;
735 }
736
737 void
738 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
739 {
740         struct virtio_net *dev;
741         unsigned int len;
742
743         dev = get_device(vid);
744         if (dev == NULL)
745                 return;
746
747         len = if_len > sizeof(dev->ifname) ?
748                 sizeof(dev->ifname) : if_len;
749
750         strncpy(dev->ifname, if_name, len);
751         dev->ifname[sizeof(dev->ifname) - 1] = '\0';
752 }
753
754 void
755 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags)
756 {
757         struct virtio_net *dev = get_device(vid);
758
759         if (dev == NULL)
760                 return;
761
762         if (enable)
763                 dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
764         else
765                 dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
766         if (!compliant_ol_flags)
767                 dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
768         else
769                 dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
770 }
771
772 void
773 vhost_enable_extbuf(int vid)
774 {
775         struct virtio_net *dev = get_device(vid);
776
777         if (dev == NULL)
778                 return;
779
780         dev->extbuf = 1;
781 }
782
783 void
784 vhost_enable_linearbuf(int vid)
785 {
786         struct virtio_net *dev = get_device(vid);
787
788         if (dev == NULL)
789                 return;
790
791         dev->linearbuf = 1;
792 }
793
794 int
795 rte_vhost_get_mtu(int vid, uint16_t *mtu)
796 {
797         struct virtio_net *dev = get_device(vid);
798
799         if (dev == NULL || mtu == NULL)
800                 return -ENODEV;
801
802         if (!(dev->flags & VIRTIO_DEV_READY))
803                 return -EAGAIN;
804
805         if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
806                 return -ENOTSUP;
807
808         *mtu = dev->mtu;
809
810         return 0;
811 }
812
813 int
814 rte_vhost_get_numa_node(int vid)
815 {
816 #ifdef RTE_LIBRTE_VHOST_NUMA
817         struct virtio_net *dev = get_device(vid);
818         int numa_node;
819         int ret;
820
821         if (dev == NULL || numa_available() != 0)
822                 return -1;
823
824         ret = get_mempolicy(&numa_node, NULL, 0, dev,
825                             MPOL_F_NODE | MPOL_F_ADDR);
826         if (ret < 0) {
827                 VHOST_LOG_CONFIG(ERR,
828                         "(%d) failed to query numa node: %s\n",
829                         vid, rte_strerror(errno));
830                 return -1;
831         }
832
833         return numa_node;
834 #else
835         RTE_SET_USED(vid);
836         return -1;
837 #endif
838 }
839
840 uint32_t
841 rte_vhost_get_queue_num(int vid)
842 {
843         struct virtio_net *dev = get_device(vid);
844
845         if (dev == NULL)
846                 return 0;
847
848         return dev->nr_vring / 2;
849 }
850
851 uint16_t
852 rte_vhost_get_vring_num(int vid)
853 {
854         struct virtio_net *dev = get_device(vid);
855
856         if (dev == NULL)
857                 return 0;
858
859         return dev->nr_vring;
860 }
861
862 int
863 rte_vhost_get_ifname(int vid, char *buf, size_t len)
864 {
865         struct virtio_net *dev = get_device(vid);
866
867         if (dev == NULL || buf == NULL)
868                 return -1;
869
870         len = RTE_MIN(len, sizeof(dev->ifname));
871
872         strncpy(buf, dev->ifname, len);
873         buf[len - 1] = '\0';
874
875         return 0;
876 }
877
878 int
879 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
880 {
881         struct virtio_net *dev;
882
883         dev = get_device(vid);
884         if (dev == NULL || features == NULL)
885                 return -1;
886
887         *features = dev->features;
888         return 0;
889 }
890
891 int
892 rte_vhost_get_negotiated_protocol_features(int vid,
893                                            uint64_t *protocol_features)
894 {
895         struct virtio_net *dev;
896
897         dev = get_device(vid);
898         if (dev == NULL || protocol_features == NULL)
899                 return -1;
900
901         *protocol_features = dev->protocol_features;
902         return 0;
903 }
904
905 int
906 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
907 {
908         struct virtio_net *dev;
909         struct rte_vhost_memory *m;
910         size_t size;
911
912         dev = get_device(vid);
913         if (dev == NULL || mem == NULL)
914                 return -1;
915
916         size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
917         m = malloc(sizeof(struct rte_vhost_memory) + size);
918         if (!m)
919                 return -1;
920
921         m->nregions = dev->mem->nregions;
922         memcpy(m->regions, dev->mem->regions, size);
923         *mem = m;
924
925         return 0;
926 }
927
928 int
929 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
930                           struct rte_vhost_vring *vring)
931 {
932         struct virtio_net *dev;
933         struct vhost_virtqueue *vq;
934
935         dev = get_device(vid);
936         if (dev == NULL || vring == NULL)
937                 return -1;
938
939         if (vring_idx >= VHOST_MAX_VRING)
940                 return -1;
941
942         vq = dev->virtqueue[vring_idx];
943         if (!vq)
944                 return -1;
945
946         if (vq_is_packed(dev)) {
947                 vring->desc_packed = vq->desc_packed;
948                 vring->driver_event = vq->driver_event;
949                 vring->device_event = vq->device_event;
950         } else {
951                 vring->desc = vq->desc;
952                 vring->avail = vq->avail;
953                 vring->used = vq->used;
954         }
955         vring->log_guest_addr  = vq->log_guest_addr;
956
957         vring->callfd  = vq->callfd;
958         vring->kickfd  = vq->kickfd;
959         vring->size    = vq->size;
960
961         return 0;
962 }
963
964 int
965 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
966                                   struct rte_vhost_ring_inflight *vring)
967 {
968         struct virtio_net *dev;
969         struct vhost_virtqueue *vq;
970
971         dev = get_device(vid);
972         if (unlikely(!dev))
973                 return -1;
974
975         if (vring_idx >= VHOST_MAX_VRING)
976                 return -1;
977
978         vq = dev->virtqueue[vring_idx];
979         if (unlikely(!vq))
980                 return -1;
981
982         if (vq_is_packed(dev)) {
983                 if (unlikely(!vq->inflight_packed))
984                         return -1;
985
986                 vring->inflight_packed = vq->inflight_packed;
987         } else {
988                 if (unlikely(!vq->inflight_split))
989                         return -1;
990
991                 vring->inflight_split = vq->inflight_split;
992         }
993
994         vring->resubmit_inflight = vq->resubmit_inflight;
995
996         return 0;
997 }
998
999 int
1000 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
1001                                   uint16_t idx)
1002 {
1003         struct vhost_virtqueue *vq;
1004         struct virtio_net *dev;
1005
1006         dev = get_device(vid);
1007         if (unlikely(!dev))
1008                 return -1;
1009
1010         if (unlikely(!(dev->protocol_features &
1011             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1012                 return 0;
1013
1014         if (unlikely(vq_is_packed(dev)))
1015                 return -1;
1016
1017         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1018                 return -1;
1019
1020         vq = dev->virtqueue[vring_idx];
1021         if (unlikely(!vq))
1022                 return -1;
1023
1024         if (unlikely(!vq->inflight_split))
1025                 return -1;
1026
1027         if (unlikely(idx >= vq->size))
1028                 return -1;
1029
1030         vq->inflight_split->desc[idx].counter = vq->global_counter++;
1031         vq->inflight_split->desc[idx].inflight = 1;
1032         return 0;
1033 }
1034
1035 int
1036 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1037                                    uint16_t head, uint16_t last,
1038                                    uint16_t *inflight_entry)
1039 {
1040         struct rte_vhost_inflight_info_packed *inflight_info;
1041         struct virtio_net *dev;
1042         struct vhost_virtqueue *vq;
1043         struct vring_packed_desc *desc;
1044         uint16_t old_free_head, free_head;
1045
1046         dev = get_device(vid);
1047         if (unlikely(!dev))
1048                 return -1;
1049
1050         if (unlikely(!(dev->protocol_features &
1051             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1052                 return 0;
1053
1054         if (unlikely(!vq_is_packed(dev)))
1055                 return -1;
1056
1057         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1058                 return -1;
1059
1060         vq = dev->virtqueue[vring_idx];
1061         if (unlikely(!vq))
1062                 return -1;
1063
1064         inflight_info = vq->inflight_packed;
1065         if (unlikely(!inflight_info))
1066                 return -1;
1067
1068         if (unlikely(head >= vq->size))
1069                 return -1;
1070
1071         desc = vq->desc_packed;
1072         old_free_head = inflight_info->old_free_head;
1073         if (unlikely(old_free_head >= vq->size))
1074                 return -1;
1075
1076         free_head = old_free_head;
1077
1078         /* init header descriptor */
1079         inflight_info->desc[old_free_head].num = 0;
1080         inflight_info->desc[old_free_head].counter = vq->global_counter++;
1081         inflight_info->desc[old_free_head].inflight = 1;
1082
1083         /* save desc entry in flight entry */
1084         while (head != ((last + 1) % vq->size)) {
1085                 inflight_info->desc[old_free_head].num++;
1086                 inflight_info->desc[free_head].addr = desc[head].addr;
1087                 inflight_info->desc[free_head].len = desc[head].len;
1088                 inflight_info->desc[free_head].flags = desc[head].flags;
1089                 inflight_info->desc[free_head].id = desc[head].id;
1090
1091                 inflight_info->desc[old_free_head].last = free_head;
1092                 free_head = inflight_info->desc[free_head].next;
1093                 inflight_info->free_head = free_head;
1094                 head = (head + 1) % vq->size;
1095         }
1096
1097         inflight_info->old_free_head = free_head;
1098         *inflight_entry = old_free_head;
1099
1100         return 0;
1101 }
1102
1103 int
1104 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1105                                   uint16_t last_used_idx, uint16_t idx)
1106 {
1107         struct virtio_net *dev;
1108         struct vhost_virtqueue *vq;
1109
1110         dev = get_device(vid);
1111         if (unlikely(!dev))
1112                 return -1;
1113
1114         if (unlikely(!(dev->protocol_features &
1115             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1116                 return 0;
1117
1118         if (unlikely(vq_is_packed(dev)))
1119                 return -1;
1120
1121         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1122                 return -1;
1123
1124         vq = dev->virtqueue[vring_idx];
1125         if (unlikely(!vq))
1126                 return -1;
1127
1128         if (unlikely(!vq->inflight_split))
1129                 return -1;
1130
1131         if (unlikely(idx >= vq->size))
1132                 return -1;
1133
1134         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1135
1136         vq->inflight_split->desc[idx].inflight = 0;
1137
1138         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1139
1140         vq->inflight_split->used_idx = last_used_idx;
1141         return 0;
1142 }
1143
1144 int
1145 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1146                                    uint16_t head)
1147 {
1148         struct rte_vhost_inflight_info_packed *inflight_info;
1149         struct virtio_net *dev;
1150         struct vhost_virtqueue *vq;
1151
1152         dev = get_device(vid);
1153         if (unlikely(!dev))
1154                 return -1;
1155
1156         if (unlikely(!(dev->protocol_features &
1157             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1158                 return 0;
1159
1160         if (unlikely(!vq_is_packed(dev)))
1161                 return -1;
1162
1163         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1164                 return -1;
1165
1166         vq = dev->virtqueue[vring_idx];
1167         if (unlikely(!vq))
1168                 return -1;
1169
1170         inflight_info = vq->inflight_packed;
1171         if (unlikely(!inflight_info))
1172                 return -1;
1173
1174         if (unlikely(head >= vq->size))
1175                 return -1;
1176
1177         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1178
1179         inflight_info->desc[head].inflight = 0;
1180
1181         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1182
1183         inflight_info->old_free_head = inflight_info->free_head;
1184         inflight_info->old_used_idx = inflight_info->used_idx;
1185         inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1186
1187         return 0;
1188 }
1189
1190 int
1191 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1192                                      uint16_t idx)
1193 {
1194         struct virtio_net *dev;
1195         struct vhost_virtqueue *vq;
1196
1197         dev = get_device(vid);
1198         if (unlikely(!dev))
1199                 return -1;
1200
1201         if (unlikely(!(dev->protocol_features &
1202             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1203                 return 0;
1204
1205         if (unlikely(vq_is_packed(dev)))
1206                 return -1;
1207
1208         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1209                 return -1;
1210
1211         vq = dev->virtqueue[vring_idx];
1212         if (unlikely(!vq))
1213                 return -1;
1214
1215         if (unlikely(!vq->inflight_split))
1216                 return -1;
1217
1218         vq->inflight_split->last_inflight_io = idx;
1219         return 0;
1220 }
1221
1222 int
1223 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1224                                       uint16_t head)
1225 {
1226         struct rte_vhost_inflight_info_packed *inflight_info;
1227         struct virtio_net *dev;
1228         struct vhost_virtqueue *vq;
1229         uint16_t last;
1230
1231         dev = get_device(vid);
1232         if (unlikely(!dev))
1233                 return -1;
1234
1235         if (unlikely(!(dev->protocol_features &
1236             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1237                 return 0;
1238
1239         if (unlikely(!vq_is_packed(dev)))
1240                 return -1;
1241
1242         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1243                 return -1;
1244
1245         vq = dev->virtqueue[vring_idx];
1246         if (unlikely(!vq))
1247                 return -1;
1248
1249         inflight_info = vq->inflight_packed;
1250         if (unlikely(!inflight_info))
1251                 return -1;
1252
1253         if (unlikely(head >= vq->size))
1254                 return -1;
1255
1256         last = inflight_info->desc[head].last;
1257         if (unlikely(last >= vq->size))
1258                 return -1;
1259
1260         inflight_info->desc[last].next = inflight_info->free_head;
1261         inflight_info->free_head = head;
1262         inflight_info->used_idx += inflight_info->desc[head].num;
1263         if (inflight_info->used_idx >= inflight_info->desc_num) {
1264                 inflight_info->used_idx -= inflight_info->desc_num;
1265                 inflight_info->used_wrap_counter =
1266                         !inflight_info->used_wrap_counter;
1267         }
1268
1269         return 0;
1270 }
1271
1272 int
1273 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1274 {
1275         struct virtio_net *dev;
1276         struct vhost_virtqueue *vq;
1277
1278         dev = get_device(vid);
1279         if (!dev)
1280                 return -1;
1281
1282         if (vring_idx >= VHOST_MAX_VRING)
1283                 return -1;
1284
1285         vq = dev->virtqueue[vring_idx];
1286         if (!vq)
1287                 return -1;
1288
1289         if (vq_is_packed(dev))
1290                 vhost_vring_call_packed(dev, vq);
1291         else
1292                 vhost_vring_call_split(dev, vq);
1293
1294         return 0;
1295 }
1296
1297 uint16_t
1298 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1299 {
1300         struct virtio_net *dev;
1301         struct vhost_virtqueue *vq;
1302         uint16_t ret = 0;
1303
1304         dev = get_device(vid);
1305         if (!dev)
1306                 return 0;
1307
1308         if (queue_id >= VHOST_MAX_VRING)
1309                 return 0;
1310
1311         vq = dev->virtqueue[queue_id];
1312         if (!vq)
1313                 return 0;
1314
1315         rte_spinlock_lock(&vq->access_lock);
1316
1317         if (unlikely(!vq->enabled || vq->avail == NULL))
1318                 goto out;
1319
1320         ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1321
1322 out:
1323         rte_spinlock_unlock(&vq->access_lock);
1324         return ret;
1325 }
1326
1327 static inline int
1328 vhost_enable_notify_split(struct virtio_net *dev,
1329                 struct vhost_virtqueue *vq, int enable)
1330 {
1331         if (vq->used == NULL)
1332                 return -1;
1333
1334         if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1335                 if (enable)
1336                         vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1337                 else
1338                         vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1339         } else {
1340                 if (enable)
1341                         vhost_avail_event(vq) = vq->last_avail_idx;
1342         }
1343         return 0;
1344 }
1345
1346 static inline int
1347 vhost_enable_notify_packed(struct virtio_net *dev,
1348                 struct vhost_virtqueue *vq, int enable)
1349 {
1350         uint16_t flags;
1351
1352         if (vq->device_event == NULL)
1353                 return -1;
1354
1355         if (!enable) {
1356                 vq->device_event->flags = VRING_EVENT_F_DISABLE;
1357                 return 0;
1358         }
1359
1360         flags = VRING_EVENT_F_ENABLE;
1361         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1362                 flags = VRING_EVENT_F_DESC;
1363                 vq->device_event->off_wrap = vq->last_avail_idx |
1364                         vq->avail_wrap_counter << 15;
1365         }
1366
1367         rte_atomic_thread_fence(__ATOMIC_RELEASE);
1368
1369         vq->device_event->flags = flags;
1370         return 0;
1371 }
1372
1373 int
1374 vhost_enable_guest_notification(struct virtio_net *dev,
1375                 struct vhost_virtqueue *vq, int enable)
1376 {
1377         /*
1378          * If the virtqueue is not ready yet, it will be applied
1379          * when it will become ready.
1380          */
1381         if (!vq->ready)
1382                 return 0;
1383
1384         if (vq_is_packed(dev))
1385                 return vhost_enable_notify_packed(dev, vq, enable);
1386         else
1387                 return vhost_enable_notify_split(dev, vq, enable);
1388 }
1389
1390 int
1391 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1392 {
1393         struct virtio_net *dev = get_device(vid);
1394         struct vhost_virtqueue *vq;
1395         int ret;
1396
1397         if (!dev)
1398                 return -1;
1399
1400         if (queue_id >= VHOST_MAX_VRING)
1401                 return -1;
1402
1403         vq = dev->virtqueue[queue_id];
1404         if (!vq)
1405                 return -1;
1406
1407         rte_spinlock_lock(&vq->access_lock);
1408
1409         vq->notif_enable = enable;
1410         ret = vhost_enable_guest_notification(dev, vq, enable);
1411
1412         rte_spinlock_unlock(&vq->access_lock);
1413
1414         return ret;
1415 }
1416
1417 void
1418 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1419 {
1420         struct virtio_net *dev = get_device(vid);
1421
1422         if (dev == NULL)
1423                 return;
1424
1425         vhost_log_write(dev, addr, len);
1426 }
1427
1428 void
1429 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1430                          uint64_t offset, uint64_t len)
1431 {
1432         struct virtio_net *dev;
1433         struct vhost_virtqueue *vq;
1434
1435         dev = get_device(vid);
1436         if (dev == NULL)
1437                 return;
1438
1439         if (vring_idx >= VHOST_MAX_VRING)
1440                 return;
1441         vq = dev->virtqueue[vring_idx];
1442         if (!vq)
1443                 return;
1444
1445         vhost_log_used_vring(dev, vq, offset, len);
1446 }
1447
1448 uint32_t
1449 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1450 {
1451         struct virtio_net *dev;
1452         struct vhost_virtqueue *vq;
1453         uint32_t ret = 0;
1454
1455         dev = get_device(vid);
1456         if (dev == NULL)
1457                 return 0;
1458
1459         if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1460                 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
1461                         dev->vid, __func__, qid);
1462                 return 0;
1463         }
1464
1465         vq = dev->virtqueue[qid];
1466         if (vq == NULL)
1467                 return 0;
1468
1469         rte_spinlock_lock(&vq->access_lock);
1470
1471         if (unlikely(!vq->enabled || vq->avail == NULL))
1472                 goto out;
1473
1474         ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1475
1476 out:
1477         rte_spinlock_unlock(&vq->access_lock);
1478         return ret;
1479 }
1480
1481 struct rte_vdpa_device *
1482 rte_vhost_get_vdpa_device(int vid)
1483 {
1484         struct virtio_net *dev = get_device(vid);
1485
1486         if (dev == NULL)
1487                 return NULL;
1488
1489         return dev->vdpa_dev;
1490 }
1491
1492 int rte_vhost_get_log_base(int vid, uint64_t *log_base,
1493                 uint64_t *log_size)
1494 {
1495         struct virtio_net *dev = get_device(vid);
1496
1497         if (dev == NULL || log_base == NULL || log_size == NULL)
1498                 return -1;
1499
1500         *log_base = dev->log_base;
1501         *log_size = dev->log_size;
1502
1503         return 0;
1504 }
1505
1506 int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1507                 uint16_t *last_avail_idx, uint16_t *last_used_idx)
1508 {
1509         struct vhost_virtqueue *vq;
1510         struct virtio_net *dev = get_device(vid);
1511
1512         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1513                 return -1;
1514
1515         if (queue_id >= VHOST_MAX_VRING)
1516                 return -1;
1517
1518         vq = dev->virtqueue[queue_id];
1519         if (!vq)
1520                 return -1;
1521
1522         if (vq_is_packed(dev)) {
1523                 *last_avail_idx = (vq->avail_wrap_counter << 15) |
1524                                   vq->last_avail_idx;
1525                 *last_used_idx = (vq->used_wrap_counter << 15) |
1526                                  vq->last_used_idx;
1527         } else {
1528                 *last_avail_idx = vq->last_avail_idx;
1529                 *last_used_idx = vq->last_used_idx;
1530         }
1531
1532         return 0;
1533 }
1534
1535 int rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1536                 uint16_t last_avail_idx, uint16_t last_used_idx)
1537 {
1538         struct vhost_virtqueue *vq;
1539         struct virtio_net *dev = get_device(vid);
1540
1541         if (!dev)
1542                 return -1;
1543
1544         if (queue_id >= VHOST_MAX_VRING)
1545                 return -1;
1546
1547         vq = dev->virtqueue[queue_id];
1548         if (!vq)
1549                 return -1;
1550
1551         if (vq_is_packed(dev)) {
1552                 vq->last_avail_idx = last_avail_idx & 0x7fff;
1553                 vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1554                 vq->last_used_idx = last_used_idx & 0x7fff;
1555                 vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1556         } else {
1557                 vq->last_avail_idx = last_avail_idx;
1558                 vq->last_used_idx = last_used_idx;
1559         }
1560
1561         return 0;
1562 }
1563
1564 int
1565 rte_vhost_get_vring_base_from_inflight(int vid,
1566                                        uint16_t queue_id,
1567                                        uint16_t *last_avail_idx,
1568                                        uint16_t *last_used_idx)
1569 {
1570         struct rte_vhost_inflight_info_packed *inflight_info;
1571         struct vhost_virtqueue *vq;
1572         struct virtio_net *dev = get_device(vid);
1573
1574         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1575                 return -1;
1576
1577         if (queue_id >= VHOST_MAX_VRING)
1578                 return -1;
1579
1580         vq = dev->virtqueue[queue_id];
1581         if (!vq)
1582                 return -1;
1583
1584         if (!vq_is_packed(dev))
1585                 return -1;
1586
1587         inflight_info = vq->inflight_packed;
1588         if (!inflight_info)
1589                 return -1;
1590
1591         *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1592                           inflight_info->old_used_idx;
1593         *last_used_idx = *last_avail_idx;
1594
1595         return 0;
1596 }
1597
1598 int rte_vhost_extern_callback_register(int vid,
1599                 struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1600 {
1601         struct virtio_net *dev = get_device(vid);
1602
1603         if (dev == NULL || ops == NULL)
1604                 return -1;
1605
1606         dev->extern_ops = *ops;
1607         dev->extern_data = ctx;
1608         return 0;
1609 }
1610
1611 int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
1612                                         uint32_t features,
1613                                         struct rte_vhost_async_channel_ops *ops)
1614 {
1615         struct vhost_virtqueue *vq;
1616         struct virtio_net *dev = get_device(vid);
1617         struct rte_vhost_async_features f;
1618         int node;
1619
1620         if (dev == NULL || ops == NULL)
1621                 return -1;
1622
1623         f.intval = features;
1624
1625         if (queue_id >= VHOST_MAX_VRING)
1626                 return -1;
1627
1628         vq = dev->virtqueue[queue_id];
1629
1630         if (unlikely(vq == NULL || !dev->async_copy))
1631                 return -1;
1632
1633         if (unlikely(!f.async_inorder)) {
1634                 VHOST_LOG_CONFIG(ERR,
1635                         "async copy is not supported on non-inorder mode "
1636                         "(vid %d, qid: %d)\n", vid, queue_id);
1637                 return -1;
1638         }
1639
1640         if (unlikely(ops->check_completed_copies == NULL ||
1641                 ops->transfer_data == NULL))
1642                 return -1;
1643
1644         rte_spinlock_lock(&vq->access_lock);
1645
1646         if (unlikely(vq->async_registered)) {
1647                 VHOST_LOG_CONFIG(ERR,
1648                         "async register failed: channel already registered "
1649                         "(vid %d, qid: %d)\n", vid, queue_id);
1650                 goto reg_out;
1651         }
1652
1653 #ifdef RTE_LIBRTE_VHOST_NUMA
1654         if (get_mempolicy(&node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
1655                 VHOST_LOG_CONFIG(ERR,
1656                         "unable to get numa information in async register. "
1657                         "allocating async buffer memory on the caller thread node\n");
1658                 node = SOCKET_ID_ANY;
1659         }
1660 #else
1661         node = SOCKET_ID_ANY;
1662 #endif
1663
1664         vq->async_pkts_info = rte_malloc_socket(NULL,
1665                         vq->size * sizeof(struct async_inflight_info),
1666                         RTE_CACHE_LINE_SIZE, node);
1667         if (!vq->async_pkts_info) {
1668                 vhost_free_async_mem(vq);
1669                 VHOST_LOG_CONFIG(ERR,
1670                         "async register failed: cannot allocate memory for async_pkts_info "
1671                         "(vid %d, qid: %d)\n", vid, queue_id);
1672                 goto reg_out;
1673         }
1674
1675         vq->it_pool = rte_malloc_socket(NULL,
1676                         VHOST_MAX_ASYNC_IT * sizeof(struct rte_vhost_iov_iter),
1677                         RTE_CACHE_LINE_SIZE, node);
1678         if (!vq->it_pool) {
1679                 vhost_free_async_mem(vq);
1680                 VHOST_LOG_CONFIG(ERR,
1681                         "async register failed: cannot allocate memory for it_pool "
1682                         "(vid %d, qid: %d)\n", vid, queue_id);
1683                 goto reg_out;
1684         }
1685
1686         vq->vec_pool = rte_malloc_socket(NULL,
1687                         VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
1688                         RTE_CACHE_LINE_SIZE, node);
1689         if (!vq->vec_pool) {
1690                 vhost_free_async_mem(vq);
1691                 VHOST_LOG_CONFIG(ERR,
1692                         "async register failed: cannot allocate memory for vec_pool "
1693                         "(vid %d, qid: %d)\n", vid, queue_id);
1694                 goto reg_out;
1695         }
1696
1697         if (vq_is_packed(dev)) {
1698                 vq->async_buffers_packed = rte_malloc_socket(NULL,
1699                         vq->size * sizeof(struct vring_used_elem_packed),
1700                         RTE_CACHE_LINE_SIZE, node);
1701                 if (!vq->async_buffers_packed) {
1702                         vhost_free_async_mem(vq);
1703                         VHOST_LOG_CONFIG(ERR,
1704                                 "async register failed: cannot allocate memory for async buffers "
1705                                 "(vid %d, qid: %d)\n", vid, queue_id);
1706                         goto reg_out;
1707                 }
1708         } else {
1709                 vq->async_descs_split = rte_malloc_socket(NULL,
1710                         vq->size * sizeof(struct vring_used_elem),
1711                         RTE_CACHE_LINE_SIZE, node);
1712                 if (!vq->async_descs_split) {
1713                         vhost_free_async_mem(vq);
1714                         VHOST_LOG_CONFIG(ERR,
1715                                 "async register failed: cannot allocate memory for async descs "
1716                                 "(vid %d, qid: %d)\n", vid, queue_id);
1717                         goto reg_out;
1718                 }
1719         }
1720
1721         vq->async_ops.check_completed_copies = ops->check_completed_copies;
1722         vq->async_ops.transfer_data = ops->transfer_data;
1723
1724         vq->async_inorder = f.async_inorder;
1725         vq->async_threshold = f.async_threshold;
1726
1727         vq->async_registered = true;
1728
1729 reg_out:
1730         rte_spinlock_unlock(&vq->access_lock);
1731
1732         return 0;
1733 }
1734
1735 int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1736 {
1737         struct vhost_virtqueue *vq;
1738         struct virtio_net *dev = get_device(vid);
1739         int ret = -1;
1740
1741         if (dev == NULL)
1742                 return ret;
1743
1744         if (queue_id >= VHOST_MAX_VRING)
1745                 return ret;
1746
1747         vq = dev->virtqueue[queue_id];
1748
1749         if (vq == NULL)
1750                 return ret;
1751
1752         ret = 0;
1753
1754         if (!vq->async_registered)
1755                 return ret;
1756
1757         if (!rte_spinlock_trylock(&vq->access_lock)) {
1758                 VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1759                         "virt queue busy.\n");
1760                 return -1;
1761         }
1762
1763         if (vq->async_pkts_inflight_n) {
1764                 VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1765                         "async inflight packets must be completed before unregistration.\n");
1766                 ret = -1;
1767                 goto out;
1768         }
1769
1770         vhost_free_async_mem(vq);
1771
1772         vq->async_ops.transfer_data = NULL;
1773         vq->async_ops.check_completed_copies = NULL;
1774         vq->async_registered = false;
1775
1776 out:
1777         rte_spinlock_unlock(&vq->access_lock);
1778
1779         return ret;
1780 }
1781
1782 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
1783 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);