mem: cleanup multiprocess resources
[dpdk.git] / lib / vhost / vhost.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #ifdef RTE_LIBRTE_VHOST_NUMA
11 #include <numa.h>
12 #include <numaif.h>
13 #endif
14
15 #include <rte_errno.h>
16 #include <rte_ethdev.h>
17 #include <rte_log.h>
18 #include <rte_string_fns.h>
19 #include <rte_memory.h>
20 #include <rte_malloc.h>
21 #include <rte_vhost.h>
22 #include <rte_rwlock.h>
23
24 #include "iotlb.h"
25 #include "vhost.h"
26 #include "vhost_user.h"
27
28 struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
29 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
30
31 /* Called with iotlb_lock read-locked */
32 uint64_t
33 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
34                     uint64_t iova, uint64_t *size, uint8_t perm)
35 {
36         uint64_t vva, tmp_size;
37
38         if (unlikely(!*size))
39                 return 0;
40
41         tmp_size = *size;
42
43         vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
44         if (tmp_size == *size)
45                 return vva;
46
47         iova += tmp_size;
48
49         if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
50                 /*
51                  * iotlb_lock is read-locked for a full burst,
52                  * but it only protects the iotlb cache.
53                  * In case of IOTLB miss, we might block on the socket,
54                  * which could cause a deadlock with QEMU if an IOTLB update
55                  * is being handled. We can safely unlock here to avoid it.
56                  */
57                 vhost_user_iotlb_rd_unlock(vq);
58
59                 vhost_user_iotlb_pending_insert(dev, vq, iova, perm);
60                 if (vhost_user_iotlb_miss(dev, iova, perm)) {
61                         VHOST_LOG_DATA(ERR, "(%s) IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
62                                 dev->ifname, iova);
63                         vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
64                 }
65
66                 vhost_user_iotlb_rd_lock(vq);
67         }
68
69         return 0;
70 }
71
72 #define VHOST_LOG_PAGE  4096
73
74 /*
75  * Atomically set a bit in memory.
76  */
77 static __rte_always_inline void
78 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
79 {
80 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
81         /*
82          * __sync_ built-ins are deprecated, but __atomic_ ones
83          * are sub-optimized in older GCC versions.
84          */
85         __sync_fetch_and_or_1(addr, (1U << nr));
86 #else
87         __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
88 #endif
89 }
90
91 static __rte_always_inline void
92 vhost_log_page(uint8_t *log_base, uint64_t page)
93 {
94         vhost_set_bit(page % 8, &log_base[page / 8]);
95 }
96
97 void
98 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
99 {
100         uint64_t page;
101
102         if (unlikely(!dev->log_base || !len))
103                 return;
104
105         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
106                 return;
107
108         /* To make sure guest memory updates are committed before logging */
109         rte_atomic_thread_fence(__ATOMIC_RELEASE);
110
111         page = addr / VHOST_LOG_PAGE;
112         while (page * VHOST_LOG_PAGE < addr + len) {
113                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
114                 page += 1;
115         }
116 }
117
118 void
119 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
120                              uint64_t iova, uint64_t len)
121 {
122         uint64_t hva, gpa, map_len;
123         map_len = len;
124
125         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
126         if (map_len != len) {
127                 VHOST_LOG_DATA(ERR,
128                         "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
129                         dev->ifname, iova);
130                 return;
131         }
132
133         gpa = hva_to_gpa(dev, hva, len);
134         if (gpa)
135                 __vhost_log_write(dev, gpa, len);
136 }
137
138 void
139 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
140 {
141         unsigned long *log_base;
142         int i;
143
144         if (unlikely(!dev->log_base))
145                 return;
146
147         /* No cache, nothing to sync */
148         if (unlikely(!vq->log_cache))
149                 return;
150
151         rte_atomic_thread_fence(__ATOMIC_RELEASE);
152
153         log_base = (unsigned long *)(uintptr_t)dev->log_base;
154
155         for (i = 0; i < vq->log_cache_nb_elem; i++) {
156                 struct log_cache_entry *elem = vq->log_cache + i;
157
158 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
159                 /*
160                  * '__sync' builtins are deprecated, but '__atomic' ones
161                  * are sub-optimized in older GCC versions.
162                  */
163                 __sync_fetch_and_or(log_base + elem->offset, elem->val);
164 #else
165                 __atomic_fetch_or(log_base + elem->offset, elem->val,
166                                 __ATOMIC_RELAXED);
167 #endif
168         }
169
170         rte_atomic_thread_fence(__ATOMIC_RELEASE);
171
172         vq->log_cache_nb_elem = 0;
173 }
174
175 static __rte_always_inline void
176 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
177                         uint64_t page)
178 {
179         uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
180         uint32_t offset = page / (sizeof(unsigned long) << 3);
181         int i;
182
183         if (unlikely(!vq->log_cache)) {
184                 /* No logging cache allocated, write dirty log map directly */
185                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
186                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
187
188                 return;
189         }
190
191         for (i = 0; i < vq->log_cache_nb_elem; i++) {
192                 struct log_cache_entry *elem = vq->log_cache + i;
193
194                 if (elem->offset == offset) {
195                         elem->val |= (1UL << bit_nr);
196                         return;
197                 }
198         }
199
200         if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
201                 /*
202                  * No more room for a new log cache entry,
203                  * so write the dirty log map directly.
204                  */
205                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
206                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
207
208                 return;
209         }
210
211         vq->log_cache[i].offset = offset;
212         vq->log_cache[i].val = (1UL << bit_nr);
213         vq->log_cache_nb_elem++;
214 }
215
216 void
217 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
218                         uint64_t addr, uint64_t len)
219 {
220         uint64_t page;
221
222         if (unlikely(!dev->log_base || !len))
223                 return;
224
225         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
226                 return;
227
228         page = addr / VHOST_LOG_PAGE;
229         while (page * VHOST_LOG_PAGE < addr + len) {
230                 vhost_log_cache_page(dev, vq, page);
231                 page += 1;
232         }
233 }
234
235 void
236 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
237                              uint64_t iova, uint64_t len)
238 {
239         uint64_t hva, gpa, map_len;
240         map_len = len;
241
242         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
243         if (map_len != len) {
244                 VHOST_LOG_DATA(ERR,
245                         "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
246                         dev->ifname, iova);
247                 return;
248         }
249
250         gpa = hva_to_gpa(dev, hva, len);
251         if (gpa)
252                 __vhost_log_cache_write(dev, vq, gpa, len);
253 }
254
255 void *
256 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
257                 uint64_t desc_addr, uint64_t desc_len)
258 {
259         void *idesc;
260         uint64_t src, dst;
261         uint64_t len, remain = desc_len;
262
263         idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
264         if (unlikely(!idesc))
265                 return NULL;
266
267         dst = (uint64_t)(uintptr_t)idesc;
268
269         while (remain) {
270                 len = remain;
271                 src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
272                                 VHOST_ACCESS_RO);
273                 if (unlikely(!src || !len)) {
274                         rte_free(idesc);
275                         return NULL;
276                 }
277
278                 rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
279
280                 remain -= len;
281                 dst += len;
282                 desc_addr += len;
283         }
284
285         return idesc;
286 }
287
288 void
289 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
290 {
291         if ((vq->callfd >= 0) && (destroy != 0))
292                 close(vq->callfd);
293         if (vq->kickfd >= 0)
294                 close(vq->kickfd);
295 }
296
297 void
298 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
299 {
300         if (!(dev->protocol_features &
301             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
302                 return;
303
304         if (vq_is_packed(dev)) {
305                 if (vq->inflight_packed)
306                         vq->inflight_packed = NULL;
307         } else {
308                 if (vq->inflight_split)
309                         vq->inflight_split = NULL;
310         }
311
312         if (vq->resubmit_inflight) {
313                 if (vq->resubmit_inflight->resubmit_list) {
314                         rte_free(vq->resubmit_inflight->resubmit_list);
315                         vq->resubmit_inflight->resubmit_list = NULL;
316                 }
317                 rte_free(vq->resubmit_inflight);
318                 vq->resubmit_inflight = NULL;
319         }
320 }
321
322 /*
323  * Unmap any memory, close any file descriptors and
324  * free any memory owned by a device.
325  */
326 void
327 cleanup_device(struct virtio_net *dev, int destroy)
328 {
329         uint32_t i;
330
331         vhost_backend_cleanup(dev);
332
333         for (i = 0; i < dev->nr_vring; i++) {
334                 cleanup_vq(dev->virtqueue[i], destroy);
335                 cleanup_vq_inflight(dev, dev->virtqueue[i]);
336         }
337 }
338
339 static void
340 vhost_free_async_mem(struct vhost_virtqueue *vq)
341 {
342         if (!vq->async)
343                 return;
344
345         rte_free(vq->async->pkts_info);
346
347         rte_free(vq->async->buffers_packed);
348         vq->async->buffers_packed = NULL;
349         rte_free(vq->async->descs_split);
350         vq->async->descs_split = NULL;
351
352         rte_free(vq->async);
353         vq->async = NULL;
354 }
355
356 void
357 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
358 {
359         if (vq_is_packed(dev))
360                 rte_free(vq->shadow_used_packed);
361         else
362                 rte_free(vq->shadow_used_split);
363
364         vhost_free_async_mem(vq);
365         rte_free(vq->batch_copy_elems);
366         rte_mempool_free(vq->iotlb_pool);
367         rte_free(vq->log_cache);
368         rte_free(vq);
369 }
370
371 /*
372  * Release virtqueues and device memory.
373  */
374 static void
375 free_device(struct virtio_net *dev)
376 {
377         uint32_t i;
378
379         for (i = 0; i < dev->nr_vring; i++)
380                 free_vq(dev, dev->virtqueue[i]);
381
382         rte_free(dev);
383 }
384
385 static __rte_always_inline int
386 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
387 {
388         if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
389                 return 0;
390
391         vq->log_guest_addr = translate_log_addr(dev, vq,
392                                                 vq->ring_addrs.log_guest_addr);
393         if (vq->log_guest_addr == 0)
394                 return -1;
395
396         return 0;
397 }
398
399 /*
400  * Converts vring log address to GPA
401  * If IOMMU is enabled, the log address is IOVA
402  * If IOMMU not enabled, the log address is already GPA
403  *
404  * Caller should have iotlb_lock read-locked
405  */
406 uint64_t
407 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
408                 uint64_t log_addr)
409 {
410         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
411                 const uint64_t exp_size = sizeof(uint64_t);
412                 uint64_t hva, gpa;
413                 uint64_t size = exp_size;
414
415                 hva = vhost_iova_to_vva(dev, vq, log_addr,
416                                         &size, VHOST_ACCESS_RW);
417
418                 if (size != exp_size)
419                         return 0;
420
421                 gpa = hva_to_gpa(dev, hva, exp_size);
422                 if (!gpa) {
423                         VHOST_LOG_DATA(ERR,
424                                 "(%s) failed to find GPA for log_addr: 0x%"
425                                 PRIx64 " hva: 0x%" PRIx64 "\n",
426                                 dev->ifname, log_addr, hva);
427                         return 0;
428                 }
429                 return gpa;
430
431         } else
432                 return log_addr;
433 }
434
435 /* Caller should have iotlb_lock read-locked */
436 static int
437 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
438 {
439         uint64_t req_size, size;
440
441         req_size = sizeof(struct vring_desc) * vq->size;
442         size = req_size;
443         vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
444                                                 vq->ring_addrs.desc_user_addr,
445                                                 &size, VHOST_ACCESS_RW);
446         if (!vq->desc || size != req_size)
447                 return -1;
448
449         req_size = sizeof(struct vring_avail);
450         req_size += sizeof(uint16_t) * vq->size;
451         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
452                 req_size += sizeof(uint16_t);
453         size = req_size;
454         vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
455                                                 vq->ring_addrs.avail_user_addr,
456                                                 &size, VHOST_ACCESS_RW);
457         if (!vq->avail || size != req_size)
458                 return -1;
459
460         req_size = sizeof(struct vring_used);
461         req_size += sizeof(struct vring_used_elem) * vq->size;
462         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
463                 req_size += sizeof(uint16_t);
464         size = req_size;
465         vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
466                                                 vq->ring_addrs.used_user_addr,
467                                                 &size, VHOST_ACCESS_RW);
468         if (!vq->used || size != req_size)
469                 return -1;
470
471         return 0;
472 }
473
474 /* Caller should have iotlb_lock read-locked */
475 static int
476 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
477 {
478         uint64_t req_size, size;
479
480         req_size = sizeof(struct vring_packed_desc) * vq->size;
481         size = req_size;
482         vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
483                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
484                                 &size, VHOST_ACCESS_RW);
485         if (!vq->desc_packed || size != req_size)
486                 return -1;
487
488         req_size = sizeof(struct vring_packed_desc_event);
489         size = req_size;
490         vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
491                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
492                                 &size, VHOST_ACCESS_RW);
493         if (!vq->driver_event || size != req_size)
494                 return -1;
495
496         req_size = sizeof(struct vring_packed_desc_event);
497         size = req_size;
498         vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
499                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
500                                 &size, VHOST_ACCESS_RW);
501         if (!vq->device_event || size != req_size)
502                 return -1;
503
504         return 0;
505 }
506
507 int
508 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
509 {
510
511         if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
512                 return -1;
513
514         if (vq_is_packed(dev)) {
515                 if (vring_translate_packed(dev, vq) < 0)
516                         return -1;
517         } else {
518                 if (vring_translate_split(dev, vq) < 0)
519                         return -1;
520         }
521
522         if (log_translate(dev, vq) < 0)
523                 return -1;
524
525         vq->access_ok = true;
526
527         return 0;
528 }
529
530 void
531 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
532 {
533         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
534                 vhost_user_iotlb_wr_lock(vq);
535
536         vq->access_ok = false;
537         vq->desc = NULL;
538         vq->avail = NULL;
539         vq->used = NULL;
540         vq->log_guest_addr = 0;
541
542         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
543                 vhost_user_iotlb_wr_unlock(vq);
544 }
545
546 static void
547 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
548 {
549         struct vhost_virtqueue *vq;
550         int numa_node = SOCKET_ID_ANY;
551
552         if (vring_idx >= VHOST_MAX_VRING) {
553                 VHOST_LOG_CONFIG(ERR, "(%s) failed to init vring, out of bound (%d)\n",
554                                 dev->ifname, vring_idx);
555                 return;
556         }
557
558         vq = dev->virtqueue[vring_idx];
559         if (!vq) {
560                 VHOST_LOG_CONFIG(ERR, "(%s) virtqueue not allocated (%d)\n",
561                                 dev->ifname, vring_idx);
562                 return;
563         }
564
565         memset(vq, 0, sizeof(struct vhost_virtqueue));
566
567         vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
568         vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
569         vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
570
571 #ifdef RTE_LIBRTE_VHOST_NUMA
572         if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
573                 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
574                         dev->ifname, rte_strerror(errno));
575                 numa_node = SOCKET_ID_ANY;
576         }
577 #endif
578         vq->numa_node = numa_node;
579
580         vhost_user_iotlb_init(dev, vring_idx);
581 }
582
583 static void
584 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
585 {
586         struct vhost_virtqueue *vq;
587         int callfd;
588
589         if (vring_idx >= VHOST_MAX_VRING) {
590                 VHOST_LOG_CONFIG(ERR,
591                                 "(%s) failed to reset vring, out of bound (%d)\n",
592                                 dev->ifname, vring_idx);
593                 return;
594         }
595
596         vq = dev->virtqueue[vring_idx];
597         if (!vq) {
598                 VHOST_LOG_CONFIG(ERR, "(%s) failed to reset vring, virtqueue not allocated (%d)\n",
599                                 dev->ifname, vring_idx);
600                 return;
601         }
602
603         callfd = vq->callfd;
604         init_vring_queue(dev, vring_idx);
605         vq->callfd = callfd;
606 }
607
608 int
609 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
610 {
611         struct vhost_virtqueue *vq;
612         uint32_t i;
613
614         /* Also allocate holes, if any, up to requested vring index. */
615         for (i = 0; i <= vring_idx; i++) {
616                 if (dev->virtqueue[i])
617                         continue;
618
619                 vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
620                 if (vq == NULL) {
621                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate memory for vring %u.\n",
622                                         dev->ifname, i);
623                         return -1;
624                 }
625
626                 dev->virtqueue[i] = vq;
627                 init_vring_queue(dev, i);
628                 rte_spinlock_init(&vq->access_lock);
629                 vq->avail_wrap_counter = 1;
630                 vq->used_wrap_counter = 1;
631                 vq->signalled_used_valid = false;
632         }
633
634         dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
635
636         return 0;
637 }
638
639 /*
640  * Reset some variables in device structure, while keeping few
641  * others untouched, such as vid, ifname, nr_vring: they
642  * should be same unless the device is removed.
643  */
644 void
645 reset_device(struct virtio_net *dev)
646 {
647         uint32_t i;
648
649         dev->features = 0;
650         dev->protocol_features = 0;
651         dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
652
653         for (i = 0; i < dev->nr_vring; i++)
654                 reset_vring_queue(dev, i);
655 }
656
657 /*
658  * Invoked when there is a new vhost-user connection established (when
659  * there is a new virtio device being attached).
660  */
661 int
662 vhost_new_device(void)
663 {
664         struct virtio_net *dev;
665         int i;
666
667         pthread_mutex_lock(&vhost_dev_lock);
668         for (i = 0; i < MAX_VHOST_DEVICE; i++) {
669                 if (vhost_devices[i] == NULL)
670                         break;
671         }
672
673         if (i == MAX_VHOST_DEVICE) {
674                 VHOST_LOG_CONFIG(ERR, "failed to find a free slot for new device.\n");
675                 pthread_mutex_unlock(&vhost_dev_lock);
676                 return -1;
677         }
678
679         dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
680         if (dev == NULL) {
681                 VHOST_LOG_CONFIG(ERR, "failed to allocate memory for new device.\n");
682                 pthread_mutex_unlock(&vhost_dev_lock);
683                 return -1;
684         }
685
686         vhost_devices[i] = dev;
687         pthread_mutex_unlock(&vhost_dev_lock);
688
689         dev->vid = i;
690         dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
691         dev->slave_req_fd = -1;
692         dev->postcopy_ufd = -1;
693         rte_spinlock_init(&dev->slave_req_lock);
694
695         return i;
696 }
697
698 void
699 vhost_destroy_device_notify(struct virtio_net *dev)
700 {
701         struct rte_vdpa_device *vdpa_dev;
702
703         if (dev->flags & VIRTIO_DEV_RUNNING) {
704                 vdpa_dev = dev->vdpa_dev;
705                 if (vdpa_dev)
706                         vdpa_dev->ops->dev_close(dev->vid);
707                 dev->flags &= ~VIRTIO_DEV_RUNNING;
708                 dev->notify_ops->destroy_device(dev->vid);
709         }
710 }
711
712 /*
713  * Invoked when there is the vhost-user connection is broken (when
714  * the virtio device is being detached).
715  */
716 void
717 vhost_destroy_device(int vid)
718 {
719         struct virtio_net *dev = get_device(vid);
720
721         if (dev == NULL)
722                 return;
723
724         vhost_destroy_device_notify(dev);
725
726         cleanup_device(dev, 1);
727         free_device(dev);
728
729         vhost_devices[vid] = NULL;
730 }
731
732 void
733 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
734 {
735         struct virtio_net *dev = get_device(vid);
736
737         if (dev == NULL)
738                 return;
739
740         dev->vdpa_dev = vdpa_dev;
741 }
742
743 void
744 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
745 {
746         struct virtio_net *dev;
747         unsigned int len;
748
749         dev = get_device(vid);
750         if (dev == NULL)
751                 return;
752
753         len = if_len > sizeof(dev->ifname) ?
754                 sizeof(dev->ifname) : if_len;
755
756         strncpy(dev->ifname, if_name, len);
757         dev->ifname[sizeof(dev->ifname) - 1] = '\0';
758 }
759
760 void
761 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags)
762 {
763         struct virtio_net *dev = get_device(vid);
764
765         if (dev == NULL)
766                 return;
767
768         if (enable)
769                 dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
770         else
771                 dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
772         if (!compliant_ol_flags)
773                 dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
774         else
775                 dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
776 }
777
778 void
779 vhost_enable_extbuf(int vid)
780 {
781         struct virtio_net *dev = get_device(vid);
782
783         if (dev == NULL)
784                 return;
785
786         dev->extbuf = 1;
787 }
788
789 void
790 vhost_enable_linearbuf(int vid)
791 {
792         struct virtio_net *dev = get_device(vid);
793
794         if (dev == NULL)
795                 return;
796
797         dev->linearbuf = 1;
798 }
799
800 int
801 rte_vhost_get_mtu(int vid, uint16_t *mtu)
802 {
803         struct virtio_net *dev = get_device(vid);
804
805         if (dev == NULL || mtu == NULL)
806                 return -ENODEV;
807
808         if (!(dev->flags & VIRTIO_DEV_READY))
809                 return -EAGAIN;
810
811         if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
812                 return -ENOTSUP;
813
814         *mtu = dev->mtu;
815
816         return 0;
817 }
818
819 int
820 rte_vhost_get_numa_node(int vid)
821 {
822 #ifdef RTE_LIBRTE_VHOST_NUMA
823         struct virtio_net *dev = get_device(vid);
824         int numa_node;
825         int ret;
826
827         if (dev == NULL || numa_available() != 0)
828                 return -1;
829
830         ret = get_mempolicy(&numa_node, NULL, 0, dev,
831                             MPOL_F_NODE | MPOL_F_ADDR);
832         if (ret < 0) {
833                 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
834                         dev->ifname, rte_strerror(errno));
835                 return -1;
836         }
837
838         return numa_node;
839 #else
840         RTE_SET_USED(vid);
841         return -1;
842 #endif
843 }
844
845 uint32_t
846 rte_vhost_get_queue_num(int vid)
847 {
848         struct virtio_net *dev = get_device(vid);
849
850         if (dev == NULL)
851                 return 0;
852
853         return dev->nr_vring / 2;
854 }
855
856 uint16_t
857 rte_vhost_get_vring_num(int vid)
858 {
859         struct virtio_net *dev = get_device(vid);
860
861         if (dev == NULL)
862                 return 0;
863
864         return dev->nr_vring;
865 }
866
867 int
868 rte_vhost_get_ifname(int vid, char *buf, size_t len)
869 {
870         struct virtio_net *dev = get_device(vid);
871
872         if (dev == NULL || buf == NULL)
873                 return -1;
874
875         len = RTE_MIN(len, sizeof(dev->ifname));
876
877         strncpy(buf, dev->ifname, len);
878         buf[len - 1] = '\0';
879
880         return 0;
881 }
882
883 int
884 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
885 {
886         struct virtio_net *dev;
887
888         dev = get_device(vid);
889         if (dev == NULL || features == NULL)
890                 return -1;
891
892         *features = dev->features;
893         return 0;
894 }
895
896 int
897 rte_vhost_get_negotiated_protocol_features(int vid,
898                                            uint64_t *protocol_features)
899 {
900         struct virtio_net *dev;
901
902         dev = get_device(vid);
903         if (dev == NULL || protocol_features == NULL)
904                 return -1;
905
906         *protocol_features = dev->protocol_features;
907         return 0;
908 }
909
910 int
911 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
912 {
913         struct virtio_net *dev;
914         struct rte_vhost_memory *m;
915         size_t size;
916
917         dev = get_device(vid);
918         if (dev == NULL || mem == NULL)
919                 return -1;
920
921         size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
922         m = malloc(sizeof(struct rte_vhost_memory) + size);
923         if (!m)
924                 return -1;
925
926         m->nregions = dev->mem->nregions;
927         memcpy(m->regions, dev->mem->regions, size);
928         *mem = m;
929
930         return 0;
931 }
932
933 int
934 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
935                           struct rte_vhost_vring *vring)
936 {
937         struct virtio_net *dev;
938         struct vhost_virtqueue *vq;
939
940         dev = get_device(vid);
941         if (dev == NULL || vring == NULL)
942                 return -1;
943
944         if (vring_idx >= VHOST_MAX_VRING)
945                 return -1;
946
947         vq = dev->virtqueue[vring_idx];
948         if (!vq)
949                 return -1;
950
951         if (vq_is_packed(dev)) {
952                 vring->desc_packed = vq->desc_packed;
953                 vring->driver_event = vq->driver_event;
954                 vring->device_event = vq->device_event;
955         } else {
956                 vring->desc = vq->desc;
957                 vring->avail = vq->avail;
958                 vring->used = vq->used;
959         }
960         vring->log_guest_addr  = vq->log_guest_addr;
961
962         vring->callfd  = vq->callfd;
963         vring->kickfd  = vq->kickfd;
964         vring->size    = vq->size;
965
966         return 0;
967 }
968
969 int
970 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
971                                   struct rte_vhost_ring_inflight *vring)
972 {
973         struct virtio_net *dev;
974         struct vhost_virtqueue *vq;
975
976         dev = get_device(vid);
977         if (unlikely(!dev))
978                 return -1;
979
980         if (vring_idx >= VHOST_MAX_VRING)
981                 return -1;
982
983         vq = dev->virtqueue[vring_idx];
984         if (unlikely(!vq))
985                 return -1;
986
987         if (vq_is_packed(dev)) {
988                 if (unlikely(!vq->inflight_packed))
989                         return -1;
990
991                 vring->inflight_packed = vq->inflight_packed;
992         } else {
993                 if (unlikely(!vq->inflight_split))
994                         return -1;
995
996                 vring->inflight_split = vq->inflight_split;
997         }
998
999         vring->resubmit_inflight = vq->resubmit_inflight;
1000
1001         return 0;
1002 }
1003
1004 int
1005 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
1006                                   uint16_t idx)
1007 {
1008         struct vhost_virtqueue *vq;
1009         struct virtio_net *dev;
1010
1011         dev = get_device(vid);
1012         if (unlikely(!dev))
1013                 return -1;
1014
1015         if (unlikely(!(dev->protocol_features &
1016             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1017                 return 0;
1018
1019         if (unlikely(vq_is_packed(dev)))
1020                 return -1;
1021
1022         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1023                 return -1;
1024
1025         vq = dev->virtqueue[vring_idx];
1026         if (unlikely(!vq))
1027                 return -1;
1028
1029         if (unlikely(!vq->inflight_split))
1030                 return -1;
1031
1032         if (unlikely(idx >= vq->size))
1033                 return -1;
1034
1035         vq->inflight_split->desc[idx].counter = vq->global_counter++;
1036         vq->inflight_split->desc[idx].inflight = 1;
1037         return 0;
1038 }
1039
1040 int
1041 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1042                                    uint16_t head, uint16_t last,
1043                                    uint16_t *inflight_entry)
1044 {
1045         struct rte_vhost_inflight_info_packed *inflight_info;
1046         struct virtio_net *dev;
1047         struct vhost_virtqueue *vq;
1048         struct vring_packed_desc *desc;
1049         uint16_t old_free_head, free_head;
1050
1051         dev = get_device(vid);
1052         if (unlikely(!dev))
1053                 return -1;
1054
1055         if (unlikely(!(dev->protocol_features &
1056             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1057                 return 0;
1058
1059         if (unlikely(!vq_is_packed(dev)))
1060                 return -1;
1061
1062         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1063                 return -1;
1064
1065         vq = dev->virtqueue[vring_idx];
1066         if (unlikely(!vq))
1067                 return -1;
1068
1069         inflight_info = vq->inflight_packed;
1070         if (unlikely(!inflight_info))
1071                 return -1;
1072
1073         if (unlikely(head >= vq->size))
1074                 return -1;
1075
1076         desc = vq->desc_packed;
1077         old_free_head = inflight_info->old_free_head;
1078         if (unlikely(old_free_head >= vq->size))
1079                 return -1;
1080
1081         free_head = old_free_head;
1082
1083         /* init header descriptor */
1084         inflight_info->desc[old_free_head].num = 0;
1085         inflight_info->desc[old_free_head].counter = vq->global_counter++;
1086         inflight_info->desc[old_free_head].inflight = 1;
1087
1088         /* save desc entry in flight entry */
1089         while (head != ((last + 1) % vq->size)) {
1090                 inflight_info->desc[old_free_head].num++;
1091                 inflight_info->desc[free_head].addr = desc[head].addr;
1092                 inflight_info->desc[free_head].len = desc[head].len;
1093                 inflight_info->desc[free_head].flags = desc[head].flags;
1094                 inflight_info->desc[free_head].id = desc[head].id;
1095
1096                 inflight_info->desc[old_free_head].last = free_head;
1097                 free_head = inflight_info->desc[free_head].next;
1098                 inflight_info->free_head = free_head;
1099                 head = (head + 1) % vq->size;
1100         }
1101
1102         inflight_info->old_free_head = free_head;
1103         *inflight_entry = old_free_head;
1104
1105         return 0;
1106 }
1107
1108 int
1109 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1110                                   uint16_t last_used_idx, uint16_t idx)
1111 {
1112         struct virtio_net *dev;
1113         struct vhost_virtqueue *vq;
1114
1115         dev = get_device(vid);
1116         if (unlikely(!dev))
1117                 return -1;
1118
1119         if (unlikely(!(dev->protocol_features &
1120             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1121                 return 0;
1122
1123         if (unlikely(vq_is_packed(dev)))
1124                 return -1;
1125
1126         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1127                 return -1;
1128
1129         vq = dev->virtqueue[vring_idx];
1130         if (unlikely(!vq))
1131                 return -1;
1132
1133         if (unlikely(!vq->inflight_split))
1134                 return -1;
1135
1136         if (unlikely(idx >= vq->size))
1137                 return -1;
1138
1139         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1140
1141         vq->inflight_split->desc[idx].inflight = 0;
1142
1143         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1144
1145         vq->inflight_split->used_idx = last_used_idx;
1146         return 0;
1147 }
1148
1149 int
1150 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1151                                    uint16_t head)
1152 {
1153         struct rte_vhost_inflight_info_packed *inflight_info;
1154         struct virtio_net *dev;
1155         struct vhost_virtqueue *vq;
1156
1157         dev = get_device(vid);
1158         if (unlikely(!dev))
1159                 return -1;
1160
1161         if (unlikely(!(dev->protocol_features &
1162             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1163                 return 0;
1164
1165         if (unlikely(!vq_is_packed(dev)))
1166                 return -1;
1167
1168         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1169                 return -1;
1170
1171         vq = dev->virtqueue[vring_idx];
1172         if (unlikely(!vq))
1173                 return -1;
1174
1175         inflight_info = vq->inflight_packed;
1176         if (unlikely(!inflight_info))
1177                 return -1;
1178
1179         if (unlikely(head >= vq->size))
1180                 return -1;
1181
1182         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1183
1184         inflight_info->desc[head].inflight = 0;
1185
1186         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1187
1188         inflight_info->old_free_head = inflight_info->free_head;
1189         inflight_info->old_used_idx = inflight_info->used_idx;
1190         inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1191
1192         return 0;
1193 }
1194
1195 int
1196 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1197                                      uint16_t idx)
1198 {
1199         struct virtio_net *dev;
1200         struct vhost_virtqueue *vq;
1201
1202         dev = get_device(vid);
1203         if (unlikely(!dev))
1204                 return -1;
1205
1206         if (unlikely(!(dev->protocol_features &
1207             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1208                 return 0;
1209
1210         if (unlikely(vq_is_packed(dev)))
1211                 return -1;
1212
1213         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1214                 return -1;
1215
1216         vq = dev->virtqueue[vring_idx];
1217         if (unlikely(!vq))
1218                 return -1;
1219
1220         if (unlikely(!vq->inflight_split))
1221                 return -1;
1222
1223         if (unlikely(idx >= vq->size))
1224                 return -1;
1225
1226         vq->inflight_split->last_inflight_io = idx;
1227         return 0;
1228 }
1229
1230 int
1231 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1232                                       uint16_t head)
1233 {
1234         struct rte_vhost_inflight_info_packed *inflight_info;
1235         struct virtio_net *dev;
1236         struct vhost_virtqueue *vq;
1237         uint16_t last;
1238
1239         dev = get_device(vid);
1240         if (unlikely(!dev))
1241                 return -1;
1242
1243         if (unlikely(!(dev->protocol_features &
1244             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1245                 return 0;
1246
1247         if (unlikely(!vq_is_packed(dev)))
1248                 return -1;
1249
1250         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1251                 return -1;
1252
1253         vq = dev->virtqueue[vring_idx];
1254         if (unlikely(!vq))
1255                 return -1;
1256
1257         inflight_info = vq->inflight_packed;
1258         if (unlikely(!inflight_info))
1259                 return -1;
1260
1261         if (unlikely(head >= vq->size))
1262                 return -1;
1263
1264         last = inflight_info->desc[head].last;
1265         if (unlikely(last >= vq->size))
1266                 return -1;
1267
1268         inflight_info->desc[last].next = inflight_info->free_head;
1269         inflight_info->free_head = head;
1270         inflight_info->used_idx += inflight_info->desc[head].num;
1271         if (inflight_info->used_idx >= inflight_info->desc_num) {
1272                 inflight_info->used_idx -= inflight_info->desc_num;
1273                 inflight_info->used_wrap_counter =
1274                         !inflight_info->used_wrap_counter;
1275         }
1276
1277         return 0;
1278 }
1279
1280 int
1281 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1282 {
1283         struct virtio_net *dev;
1284         struct vhost_virtqueue *vq;
1285
1286         dev = get_device(vid);
1287         if (!dev)
1288                 return -1;
1289
1290         if (vring_idx >= VHOST_MAX_VRING)
1291                 return -1;
1292
1293         vq = dev->virtqueue[vring_idx];
1294         if (!vq)
1295                 return -1;
1296
1297         if (vq_is_packed(dev))
1298                 vhost_vring_call_packed(dev, vq);
1299         else
1300                 vhost_vring_call_split(dev, vq);
1301
1302         return 0;
1303 }
1304
1305 uint16_t
1306 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1307 {
1308         struct virtio_net *dev;
1309         struct vhost_virtqueue *vq;
1310         uint16_t ret = 0;
1311
1312         dev = get_device(vid);
1313         if (!dev)
1314                 return 0;
1315
1316         if (queue_id >= VHOST_MAX_VRING)
1317                 return 0;
1318
1319         vq = dev->virtqueue[queue_id];
1320         if (!vq)
1321                 return 0;
1322
1323         rte_spinlock_lock(&vq->access_lock);
1324
1325         if (unlikely(!vq->enabled || vq->avail == NULL))
1326                 goto out;
1327
1328         ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1329
1330 out:
1331         rte_spinlock_unlock(&vq->access_lock);
1332         return ret;
1333 }
1334
1335 static inline int
1336 vhost_enable_notify_split(struct virtio_net *dev,
1337                 struct vhost_virtqueue *vq, int enable)
1338 {
1339         if (vq->used == NULL)
1340                 return -1;
1341
1342         if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1343                 if (enable)
1344                         vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1345                 else
1346                         vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1347         } else {
1348                 if (enable)
1349                         vhost_avail_event(vq) = vq->last_avail_idx;
1350         }
1351         return 0;
1352 }
1353
1354 static inline int
1355 vhost_enable_notify_packed(struct virtio_net *dev,
1356                 struct vhost_virtqueue *vq, int enable)
1357 {
1358         uint16_t flags;
1359
1360         if (vq->device_event == NULL)
1361                 return -1;
1362
1363         if (!enable) {
1364                 vq->device_event->flags = VRING_EVENT_F_DISABLE;
1365                 return 0;
1366         }
1367
1368         flags = VRING_EVENT_F_ENABLE;
1369         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1370                 flags = VRING_EVENT_F_DESC;
1371                 vq->device_event->off_wrap = vq->last_avail_idx |
1372                         vq->avail_wrap_counter << 15;
1373         }
1374
1375         rte_atomic_thread_fence(__ATOMIC_RELEASE);
1376
1377         vq->device_event->flags = flags;
1378         return 0;
1379 }
1380
1381 int
1382 vhost_enable_guest_notification(struct virtio_net *dev,
1383                 struct vhost_virtqueue *vq, int enable)
1384 {
1385         /*
1386          * If the virtqueue is not ready yet, it will be applied
1387          * when it will become ready.
1388          */
1389         if (!vq->ready)
1390                 return 0;
1391
1392         if (vq_is_packed(dev))
1393                 return vhost_enable_notify_packed(dev, vq, enable);
1394         else
1395                 return vhost_enable_notify_split(dev, vq, enable);
1396 }
1397
1398 int
1399 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1400 {
1401         struct virtio_net *dev = get_device(vid);
1402         struct vhost_virtqueue *vq;
1403         int ret;
1404
1405         if (!dev)
1406                 return -1;
1407
1408         if (queue_id >= VHOST_MAX_VRING)
1409                 return -1;
1410
1411         vq = dev->virtqueue[queue_id];
1412         if (!vq)
1413                 return -1;
1414
1415         rte_spinlock_lock(&vq->access_lock);
1416
1417         vq->notif_enable = enable;
1418         ret = vhost_enable_guest_notification(dev, vq, enable);
1419
1420         rte_spinlock_unlock(&vq->access_lock);
1421
1422         return ret;
1423 }
1424
1425 void
1426 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1427 {
1428         struct virtio_net *dev = get_device(vid);
1429
1430         if (dev == NULL)
1431                 return;
1432
1433         vhost_log_write(dev, addr, len);
1434 }
1435
1436 void
1437 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1438                          uint64_t offset, uint64_t len)
1439 {
1440         struct virtio_net *dev;
1441         struct vhost_virtqueue *vq;
1442
1443         dev = get_device(vid);
1444         if (dev == NULL)
1445                 return;
1446
1447         if (vring_idx >= VHOST_MAX_VRING)
1448                 return;
1449         vq = dev->virtqueue[vring_idx];
1450         if (!vq)
1451                 return;
1452
1453         vhost_log_used_vring(dev, vq, offset, len);
1454 }
1455
1456 uint32_t
1457 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1458 {
1459         struct virtio_net *dev;
1460         struct vhost_virtqueue *vq;
1461         uint32_t ret = 0;
1462
1463         dev = get_device(vid);
1464         if (dev == NULL)
1465                 return 0;
1466
1467         if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1468                 VHOST_LOG_DATA(ERR, "(%s) %s: invalid virtqueue idx %d.\n",
1469                         dev->ifname, __func__, qid);
1470                 return 0;
1471         }
1472
1473         vq = dev->virtqueue[qid];
1474         if (vq == NULL)
1475                 return 0;
1476
1477         rte_spinlock_lock(&vq->access_lock);
1478
1479         if (unlikely(!vq->enabled || vq->avail == NULL))
1480                 goto out;
1481
1482         ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1483
1484 out:
1485         rte_spinlock_unlock(&vq->access_lock);
1486         return ret;
1487 }
1488
1489 struct rte_vdpa_device *
1490 rte_vhost_get_vdpa_device(int vid)
1491 {
1492         struct virtio_net *dev = get_device(vid);
1493
1494         if (dev == NULL)
1495                 return NULL;
1496
1497         return dev->vdpa_dev;
1498 }
1499
1500 int
1501 rte_vhost_get_log_base(int vid, uint64_t *log_base,
1502                 uint64_t *log_size)
1503 {
1504         struct virtio_net *dev = get_device(vid);
1505
1506         if (dev == NULL || log_base == NULL || log_size == NULL)
1507                 return -1;
1508
1509         *log_base = dev->log_base;
1510         *log_size = dev->log_size;
1511
1512         return 0;
1513 }
1514
1515 int
1516 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1517                 uint16_t *last_avail_idx, uint16_t *last_used_idx)
1518 {
1519         struct vhost_virtqueue *vq;
1520         struct virtio_net *dev = get_device(vid);
1521
1522         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1523                 return -1;
1524
1525         if (queue_id >= VHOST_MAX_VRING)
1526                 return -1;
1527
1528         vq = dev->virtqueue[queue_id];
1529         if (!vq)
1530                 return -1;
1531
1532         if (vq_is_packed(dev)) {
1533                 *last_avail_idx = (vq->avail_wrap_counter << 15) |
1534                                   vq->last_avail_idx;
1535                 *last_used_idx = (vq->used_wrap_counter << 15) |
1536                                  vq->last_used_idx;
1537         } else {
1538                 *last_avail_idx = vq->last_avail_idx;
1539                 *last_used_idx = vq->last_used_idx;
1540         }
1541
1542         return 0;
1543 }
1544
1545 int
1546 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1547                 uint16_t last_avail_idx, uint16_t last_used_idx)
1548 {
1549         struct vhost_virtqueue *vq;
1550         struct virtio_net *dev = get_device(vid);
1551
1552         if (!dev)
1553                 return -1;
1554
1555         if (queue_id >= VHOST_MAX_VRING)
1556                 return -1;
1557
1558         vq = dev->virtqueue[queue_id];
1559         if (!vq)
1560                 return -1;
1561
1562         if (vq_is_packed(dev)) {
1563                 vq->last_avail_idx = last_avail_idx & 0x7fff;
1564                 vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1565                 vq->last_used_idx = last_used_idx & 0x7fff;
1566                 vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1567         } else {
1568                 vq->last_avail_idx = last_avail_idx;
1569                 vq->last_used_idx = last_used_idx;
1570         }
1571
1572         return 0;
1573 }
1574
1575 int
1576 rte_vhost_get_vring_base_from_inflight(int vid,
1577                                        uint16_t queue_id,
1578                                        uint16_t *last_avail_idx,
1579                                        uint16_t *last_used_idx)
1580 {
1581         struct rte_vhost_inflight_info_packed *inflight_info;
1582         struct vhost_virtqueue *vq;
1583         struct virtio_net *dev = get_device(vid);
1584
1585         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1586                 return -1;
1587
1588         if (queue_id >= VHOST_MAX_VRING)
1589                 return -1;
1590
1591         vq = dev->virtqueue[queue_id];
1592         if (!vq)
1593                 return -1;
1594
1595         if (!vq_is_packed(dev))
1596                 return -1;
1597
1598         inflight_info = vq->inflight_packed;
1599         if (!inflight_info)
1600                 return -1;
1601
1602         *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1603                           inflight_info->old_used_idx;
1604         *last_used_idx = *last_avail_idx;
1605
1606         return 0;
1607 }
1608
1609 int
1610 rte_vhost_extern_callback_register(int vid,
1611                 struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1612 {
1613         struct virtio_net *dev = get_device(vid);
1614
1615         if (dev == NULL || ops == NULL)
1616                 return -1;
1617
1618         dev->extern_ops = *ops;
1619         dev->extern_data = ctx;
1620         return 0;
1621 }
1622
1623 static __rte_always_inline int
1624 async_channel_register(int vid, uint16_t queue_id,
1625                 struct rte_vhost_async_channel_ops *ops)
1626 {
1627         struct virtio_net *dev = get_device(vid);
1628         struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
1629         struct vhost_async *async;
1630         int node = vq->numa_node;
1631
1632         if (unlikely(vq->async)) {
1633                 VHOST_LOG_CONFIG(ERR,
1634                                 "(%s) async register failed: already registered (qid: %d)\n",
1635                                 dev->ifname, queue_id);
1636                 return -1;
1637         }
1638
1639         async = rte_zmalloc_socket(NULL, sizeof(struct vhost_async), 0, node);
1640         if (!async) {
1641                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async metadata (qid: %d)\n",
1642                                 dev->ifname, queue_id);
1643                 return -1;
1644         }
1645
1646         async->pkts_info = rte_malloc_socket(NULL, vq->size * sizeof(struct async_inflight_info),
1647                         RTE_CACHE_LINE_SIZE, node);
1648         if (!async->pkts_info) {
1649                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async_pkts_info (qid: %d)\n",
1650                                 dev->ifname, queue_id);
1651                 goto out_free_async;
1652         }
1653
1654         if (vq_is_packed(dev)) {
1655                 async->buffers_packed = rte_malloc_socket(NULL,
1656                                 vq->size * sizeof(struct vring_used_elem_packed),
1657                                 RTE_CACHE_LINE_SIZE, node);
1658                 if (!async->buffers_packed) {
1659                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async buffers (qid: %d)\n",
1660                                         dev->ifname, queue_id);
1661                         goto out_free_inflight;
1662                 }
1663         } else {
1664                 async->descs_split = rte_malloc_socket(NULL,
1665                                 vq->size * sizeof(struct vring_used_elem),
1666                                 RTE_CACHE_LINE_SIZE, node);
1667                 if (!async->descs_split) {
1668                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async descs (qid: %d)\n",
1669                                         dev->ifname, queue_id);
1670                         goto out_free_inflight;
1671                 }
1672         }
1673
1674         async->ops.check_completed_copies = ops->check_completed_copies;
1675         async->ops.transfer_data = ops->transfer_data;
1676
1677         vq->async = async;
1678
1679         return 0;
1680 out_free_inflight:
1681         rte_free(async->pkts_info);
1682 out_free_async:
1683         rte_free(async);
1684
1685         return -1;
1686 }
1687
1688 int
1689 rte_vhost_async_channel_register(int vid, uint16_t queue_id,
1690                 struct rte_vhost_async_config config,
1691                 struct rte_vhost_async_channel_ops *ops)
1692 {
1693         struct vhost_virtqueue *vq;
1694         struct virtio_net *dev = get_device(vid);
1695         int ret;
1696
1697         if (dev == NULL || ops == NULL)
1698                 return -1;
1699
1700         if (queue_id >= VHOST_MAX_VRING)
1701                 return -1;
1702
1703         vq = dev->virtqueue[queue_id];
1704
1705         if (unlikely(vq == NULL || !dev->async_copy))
1706                 return -1;
1707
1708         if (unlikely(!(config.features & RTE_VHOST_ASYNC_INORDER))) {
1709                 VHOST_LOG_CONFIG(ERR,
1710                         "(%s) async copy is not supported on non-inorder mode (qid: %d)\n",
1711                         dev->ifname, queue_id);
1712                 return -1;
1713         }
1714
1715         if (unlikely(ops->check_completed_copies == NULL ||
1716                 ops->transfer_data == NULL))
1717                 return -1;
1718
1719         rte_spinlock_lock(&vq->access_lock);
1720         ret = async_channel_register(vid, queue_id, ops);
1721         rte_spinlock_unlock(&vq->access_lock);
1722
1723         return ret;
1724 }
1725
1726 int
1727 rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id,
1728                 struct rte_vhost_async_config config,
1729                 struct rte_vhost_async_channel_ops *ops)
1730 {
1731         struct vhost_virtqueue *vq;
1732         struct virtio_net *dev = get_device(vid);
1733
1734         if (dev == NULL || ops == NULL)
1735                 return -1;
1736
1737         if (queue_id >= VHOST_MAX_VRING)
1738                 return -1;
1739
1740         vq = dev->virtqueue[queue_id];
1741
1742         if (unlikely(vq == NULL || !dev->async_copy))
1743                 return -1;
1744
1745         if (unlikely(!(config.features & RTE_VHOST_ASYNC_INORDER))) {
1746                 VHOST_LOG_CONFIG(ERR,
1747                         "(%s) async copy is not supported on non-inorder mode (qid: %d)\n",
1748                         dev->ifname, queue_id);
1749                 return -1;
1750         }
1751
1752         if (unlikely(ops->check_completed_copies == NULL ||
1753                 ops->transfer_data == NULL))
1754                 return -1;
1755
1756         return async_channel_register(vid, queue_id, ops);
1757 }
1758
1759 int
1760 rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1761 {
1762         struct vhost_virtqueue *vq;
1763         struct virtio_net *dev = get_device(vid);
1764         int ret = -1;
1765
1766         if (dev == NULL)
1767                 return ret;
1768
1769         if (queue_id >= VHOST_MAX_VRING)
1770                 return ret;
1771
1772         vq = dev->virtqueue[queue_id];
1773
1774         if (vq == NULL)
1775                 return ret;
1776
1777         ret = 0;
1778
1779         if (!vq->async)
1780                 return ret;
1781
1782         if (!rte_spinlock_trylock(&vq->access_lock)) {
1783                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel, virtqueue busy.\n",
1784                                 dev->ifname);
1785                 return -1;
1786         }
1787
1788         if (vq->async->pkts_inflight_n) {
1789                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1790                 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1791                         dev->ifname);
1792                 ret = -1;
1793                 goto out;
1794         }
1795
1796         vhost_free_async_mem(vq);
1797 out:
1798         rte_spinlock_unlock(&vq->access_lock);
1799
1800         return ret;
1801 }
1802
1803 int
1804 rte_vhost_async_channel_unregister_thread_unsafe(int vid, uint16_t queue_id)
1805 {
1806         struct vhost_virtqueue *vq;
1807         struct virtio_net *dev = get_device(vid);
1808
1809         if (dev == NULL)
1810                 return -1;
1811
1812         if (queue_id >= VHOST_MAX_VRING)
1813                 return -1;
1814
1815         vq = dev->virtqueue[queue_id];
1816
1817         if (vq == NULL)
1818                 return -1;
1819
1820         if (!vq->async)
1821                 return 0;
1822
1823         if (vq->async->pkts_inflight_n) {
1824                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1825                 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1826                         dev->ifname);
1827                 return -1;
1828         }
1829
1830         vhost_free_async_mem(vq);
1831
1832         return 0;
1833 }
1834
1835 int
1836 rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
1837 {
1838         struct vhost_virtqueue *vq;
1839         struct virtio_net *dev = get_device(vid);
1840         int ret = -1;
1841
1842         if (dev == NULL)
1843                 return ret;
1844
1845         if (queue_id >= VHOST_MAX_VRING)
1846                 return ret;
1847
1848         vq = dev->virtqueue[queue_id];
1849
1850         if (vq == NULL)
1851                 return ret;
1852
1853         if (!vq->async)
1854                 return ret;
1855
1856         if (!rte_spinlock_trylock(&vq->access_lock)) {
1857                 VHOST_LOG_CONFIG(DEBUG,
1858                         "(%s) failed to check in-flight packets. virtqueue busy.\n",
1859                         dev->ifname);
1860                 return ret;
1861         }
1862
1863         ret = vq->async->pkts_inflight_n;
1864         rte_spinlock_unlock(&vq->access_lock);
1865
1866         return ret;
1867 }
1868
1869 int
1870 rte_vhost_get_monitor_addr(int vid, uint16_t queue_id,
1871                 struct rte_vhost_power_monitor_cond *pmc)
1872 {
1873         struct virtio_net *dev = get_device(vid);
1874         struct vhost_virtqueue *vq;
1875
1876         if (dev == NULL)
1877                 return -1;
1878         if (queue_id >= VHOST_MAX_VRING)
1879                 return -1;
1880
1881         vq = dev->virtqueue[queue_id];
1882         if (vq == NULL)
1883                 return -1;
1884
1885         if (vq_is_packed(dev)) {
1886                 struct vring_packed_desc *desc;
1887                 desc = vq->desc_packed;
1888                 pmc->addr = &desc[vq->last_avail_idx].flags;
1889                 if (vq->avail_wrap_counter)
1890                         pmc->val = VRING_DESC_F_AVAIL;
1891                 else
1892                         pmc->val = VRING_DESC_F_USED;
1893                 pmc->mask = VRING_DESC_F_AVAIL | VRING_DESC_F_USED;
1894                 pmc->size = sizeof(desc[vq->last_avail_idx].flags);
1895                 pmc->match = 1;
1896         } else {
1897                 pmc->addr = &vq->avail->idx;
1898                 pmc->val = vq->last_avail_idx & (vq->size - 1);
1899                 pmc->mask = vq->size - 1;
1900                 pmc->size = sizeof(vq->avail->idx);
1901                 pmc->match = 0;
1902         }
1903
1904         return 0;
1905 }
1906
1907 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
1908 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);