common/cnxk: fix null pointer dereference
[dpdk.git] / lib / vhost / vhost.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #ifdef RTE_LIBRTE_VHOST_NUMA
10 #include <numa.h>
11 #include <numaif.h>
12 #endif
13
14 #include <rte_errno.h>
15 #include <rte_log.h>
16 #include <rte_memory.h>
17 #include <rte_malloc.h>
18 #include <rte_vhost.h>
19
20 #include "iotlb.h"
21 #include "vhost.h"
22 #include "vhost_user.h"
23
24 struct virtio_net *vhost_devices[RTE_MAX_VHOST_DEVICE];
25 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
26
27 /* Called with iotlb_lock read-locked */
28 uint64_t
29 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
30                     uint64_t iova, uint64_t *size, uint8_t perm)
31 {
32         uint64_t vva, tmp_size;
33
34         if (unlikely(!*size))
35                 return 0;
36
37         tmp_size = *size;
38
39         vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
40         if (tmp_size == *size)
41                 return vva;
42
43         iova += tmp_size;
44
45         if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
46                 /*
47                  * iotlb_lock is read-locked for a full burst,
48                  * but it only protects the iotlb cache.
49                  * In case of IOTLB miss, we might block on the socket,
50                  * which could cause a deadlock with QEMU if an IOTLB update
51                  * is being handled. We can safely unlock here to avoid it.
52                  */
53                 vhost_user_iotlb_rd_unlock(vq);
54
55                 vhost_user_iotlb_pending_insert(dev, vq, iova, perm);
56                 if (vhost_user_iotlb_miss(dev, iova, perm)) {
57                         VHOST_LOG_DATA(ERR, "(%s) IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
58                                 dev->ifname, iova);
59                         vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
60                 }
61
62                 vhost_user_iotlb_rd_lock(vq);
63         }
64
65         return 0;
66 }
67
68 #define VHOST_LOG_PAGE  4096
69
70 /*
71  * Atomically set a bit in memory.
72  */
73 static __rte_always_inline void
74 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
75 {
76 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
77         /*
78          * __sync_ built-ins are deprecated, but __atomic_ ones
79          * are sub-optimized in older GCC versions.
80          */
81         __sync_fetch_and_or_1(addr, (1U << nr));
82 #else
83         __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
84 #endif
85 }
86
87 static __rte_always_inline void
88 vhost_log_page(uint8_t *log_base, uint64_t page)
89 {
90         vhost_set_bit(page % 8, &log_base[page / 8]);
91 }
92
93 void
94 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
95 {
96         uint64_t page;
97
98         if (unlikely(!dev->log_base || !len))
99                 return;
100
101         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
102                 return;
103
104         /* To make sure guest memory updates are committed before logging */
105         rte_atomic_thread_fence(__ATOMIC_RELEASE);
106
107         page = addr / VHOST_LOG_PAGE;
108         while (page * VHOST_LOG_PAGE < addr + len) {
109                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
110                 page += 1;
111         }
112 }
113
114 void
115 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
116                              uint64_t iova, uint64_t len)
117 {
118         uint64_t hva, gpa, map_len;
119         map_len = len;
120
121         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
122         if (map_len != len) {
123                 VHOST_LOG_DATA(ERR,
124                         "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
125                         dev->ifname, iova);
126                 return;
127         }
128
129         gpa = hva_to_gpa(dev, hva, len);
130         if (gpa)
131                 __vhost_log_write(dev, gpa, len);
132 }
133
134 void
135 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
136 {
137         unsigned long *log_base;
138         int i;
139
140         if (unlikely(!dev->log_base))
141                 return;
142
143         /* No cache, nothing to sync */
144         if (unlikely(!vq->log_cache))
145                 return;
146
147         rte_atomic_thread_fence(__ATOMIC_RELEASE);
148
149         log_base = (unsigned long *)(uintptr_t)dev->log_base;
150
151         for (i = 0; i < vq->log_cache_nb_elem; i++) {
152                 struct log_cache_entry *elem = vq->log_cache + i;
153
154 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
155                 /*
156                  * '__sync' builtins are deprecated, but '__atomic' ones
157                  * are sub-optimized in older GCC versions.
158                  */
159                 __sync_fetch_and_or(log_base + elem->offset, elem->val);
160 #else
161                 __atomic_fetch_or(log_base + elem->offset, elem->val,
162                                 __ATOMIC_RELAXED);
163 #endif
164         }
165
166         rte_atomic_thread_fence(__ATOMIC_RELEASE);
167
168         vq->log_cache_nb_elem = 0;
169 }
170
171 static __rte_always_inline void
172 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
173                         uint64_t page)
174 {
175         uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
176         uint32_t offset = page / (sizeof(unsigned long) << 3);
177         int i;
178
179         if (unlikely(!vq->log_cache)) {
180                 /* No logging cache allocated, write dirty log map directly */
181                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
182                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
183
184                 return;
185         }
186
187         for (i = 0; i < vq->log_cache_nb_elem; i++) {
188                 struct log_cache_entry *elem = vq->log_cache + i;
189
190                 if (elem->offset == offset) {
191                         elem->val |= (1UL << bit_nr);
192                         return;
193                 }
194         }
195
196         if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
197                 /*
198                  * No more room for a new log cache entry,
199                  * so write the dirty log map directly.
200                  */
201                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
202                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
203
204                 return;
205         }
206
207         vq->log_cache[i].offset = offset;
208         vq->log_cache[i].val = (1UL << bit_nr);
209         vq->log_cache_nb_elem++;
210 }
211
212 void
213 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
214                         uint64_t addr, uint64_t len)
215 {
216         uint64_t page;
217
218         if (unlikely(!dev->log_base || !len))
219                 return;
220
221         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
222                 return;
223
224         page = addr / VHOST_LOG_PAGE;
225         while (page * VHOST_LOG_PAGE < addr + len) {
226                 vhost_log_cache_page(dev, vq, page);
227                 page += 1;
228         }
229 }
230
231 void
232 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
233                              uint64_t iova, uint64_t len)
234 {
235         uint64_t hva, gpa, map_len;
236         map_len = len;
237
238         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
239         if (map_len != len) {
240                 VHOST_LOG_DATA(ERR,
241                         "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
242                         dev->ifname, iova);
243                 return;
244         }
245
246         gpa = hva_to_gpa(dev, hva, len);
247         if (gpa)
248                 __vhost_log_cache_write(dev, vq, gpa, len);
249 }
250
251 void *
252 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
253                 uint64_t desc_addr, uint64_t desc_len)
254 {
255         void *idesc;
256         uint64_t src, dst;
257         uint64_t len, remain = desc_len;
258
259         idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
260         if (unlikely(!idesc))
261                 return NULL;
262
263         dst = (uint64_t)(uintptr_t)idesc;
264
265         while (remain) {
266                 len = remain;
267                 src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
268                                 VHOST_ACCESS_RO);
269                 if (unlikely(!src || !len)) {
270                         rte_free(idesc);
271                         return NULL;
272                 }
273
274                 rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
275
276                 remain -= len;
277                 dst += len;
278                 desc_addr += len;
279         }
280
281         return idesc;
282 }
283
284 void
285 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
286 {
287         if ((vq->callfd >= 0) && (destroy != 0))
288                 close(vq->callfd);
289         if (vq->kickfd >= 0)
290                 close(vq->kickfd);
291 }
292
293 void
294 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
295 {
296         if (!(dev->protocol_features &
297             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
298                 return;
299
300         if (vq_is_packed(dev)) {
301                 if (vq->inflight_packed)
302                         vq->inflight_packed = NULL;
303         } else {
304                 if (vq->inflight_split)
305                         vq->inflight_split = NULL;
306         }
307
308         if (vq->resubmit_inflight) {
309                 if (vq->resubmit_inflight->resubmit_list) {
310                         rte_free(vq->resubmit_inflight->resubmit_list);
311                         vq->resubmit_inflight->resubmit_list = NULL;
312                 }
313                 rte_free(vq->resubmit_inflight);
314                 vq->resubmit_inflight = NULL;
315         }
316 }
317
318 /*
319  * Unmap any memory, close any file descriptors and
320  * free any memory owned by a device.
321  */
322 void
323 cleanup_device(struct virtio_net *dev, int destroy)
324 {
325         uint32_t i;
326
327         vhost_backend_cleanup(dev);
328
329         for (i = 0; i < dev->nr_vring; i++) {
330                 cleanup_vq(dev->virtqueue[i], destroy);
331                 cleanup_vq_inflight(dev, dev->virtqueue[i]);
332         }
333 }
334
335 static void
336 vhost_free_async_mem(struct vhost_virtqueue *vq)
337 {
338         if (!vq->async)
339                 return;
340
341         rte_free(vq->async->pkts_info);
342         rte_free(vq->async->pkts_cmpl_flag);
343
344         rte_free(vq->async->buffers_packed);
345         vq->async->buffers_packed = NULL;
346         rte_free(vq->async->descs_split);
347         vq->async->descs_split = NULL;
348
349         rte_free(vq->async);
350         vq->async = NULL;
351 }
352
353 void
354 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
355 {
356         if (vq_is_packed(dev))
357                 rte_free(vq->shadow_used_packed);
358         else
359                 rte_free(vq->shadow_used_split);
360
361         vhost_free_async_mem(vq);
362         rte_free(vq->batch_copy_elems);
363         rte_mempool_free(vq->iotlb_pool);
364         rte_free(vq->log_cache);
365         rte_free(vq);
366 }
367
368 /*
369  * Release virtqueues and device memory.
370  */
371 static void
372 free_device(struct virtio_net *dev)
373 {
374         uint32_t i;
375
376         for (i = 0; i < dev->nr_vring; i++)
377                 free_vq(dev, dev->virtqueue[i]);
378
379         rte_free(dev);
380 }
381
382 static __rte_always_inline int
383 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
384 {
385         if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
386                 return 0;
387
388         vq->log_guest_addr = translate_log_addr(dev, vq,
389                                                 vq->ring_addrs.log_guest_addr);
390         if (vq->log_guest_addr == 0)
391                 return -1;
392
393         return 0;
394 }
395
396 /*
397  * Converts vring log address to GPA
398  * If IOMMU is enabled, the log address is IOVA
399  * If IOMMU not enabled, the log address is already GPA
400  *
401  * Caller should have iotlb_lock read-locked
402  */
403 uint64_t
404 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
405                 uint64_t log_addr)
406 {
407         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
408                 const uint64_t exp_size = sizeof(uint64_t);
409                 uint64_t hva, gpa;
410                 uint64_t size = exp_size;
411
412                 hva = vhost_iova_to_vva(dev, vq, log_addr,
413                                         &size, VHOST_ACCESS_RW);
414
415                 if (size != exp_size)
416                         return 0;
417
418                 gpa = hva_to_gpa(dev, hva, exp_size);
419                 if (!gpa) {
420                         VHOST_LOG_DATA(ERR,
421                                 "(%s) failed to find GPA for log_addr: 0x%"
422                                 PRIx64 " hva: 0x%" PRIx64 "\n",
423                                 dev->ifname, log_addr, hva);
424                         return 0;
425                 }
426                 return gpa;
427
428         } else
429                 return log_addr;
430 }
431
432 /* Caller should have iotlb_lock read-locked */
433 static int
434 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
435 {
436         uint64_t req_size, size;
437
438         req_size = sizeof(struct vring_desc) * vq->size;
439         size = req_size;
440         vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
441                                                 vq->ring_addrs.desc_user_addr,
442                                                 &size, VHOST_ACCESS_RW);
443         if (!vq->desc || size != req_size)
444                 return -1;
445
446         req_size = sizeof(struct vring_avail);
447         req_size += sizeof(uint16_t) * vq->size;
448         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
449                 req_size += sizeof(uint16_t);
450         size = req_size;
451         vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
452                                                 vq->ring_addrs.avail_user_addr,
453                                                 &size, VHOST_ACCESS_RW);
454         if (!vq->avail || size != req_size)
455                 return -1;
456
457         req_size = sizeof(struct vring_used);
458         req_size += sizeof(struct vring_used_elem) * vq->size;
459         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
460                 req_size += sizeof(uint16_t);
461         size = req_size;
462         vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
463                                                 vq->ring_addrs.used_user_addr,
464                                                 &size, VHOST_ACCESS_RW);
465         if (!vq->used || size != req_size)
466                 return -1;
467
468         return 0;
469 }
470
471 /* Caller should have iotlb_lock read-locked */
472 static int
473 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
474 {
475         uint64_t req_size, size;
476
477         req_size = sizeof(struct vring_packed_desc) * vq->size;
478         size = req_size;
479         vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
480                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
481                                 &size, VHOST_ACCESS_RW);
482         if (!vq->desc_packed || size != req_size)
483                 return -1;
484
485         req_size = sizeof(struct vring_packed_desc_event);
486         size = req_size;
487         vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
488                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
489                                 &size, VHOST_ACCESS_RW);
490         if (!vq->driver_event || size != req_size)
491                 return -1;
492
493         req_size = sizeof(struct vring_packed_desc_event);
494         size = req_size;
495         vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
496                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
497                                 &size, VHOST_ACCESS_RW);
498         if (!vq->device_event || size != req_size)
499                 return -1;
500
501         return 0;
502 }
503
504 int
505 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
506 {
507
508         if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
509                 return -1;
510
511         if (vq_is_packed(dev)) {
512                 if (vring_translate_packed(dev, vq) < 0)
513                         return -1;
514         } else {
515                 if (vring_translate_split(dev, vq) < 0)
516                         return -1;
517         }
518
519         if (log_translate(dev, vq) < 0)
520                 return -1;
521
522         vq->access_ok = true;
523
524         return 0;
525 }
526
527 void
528 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
529 {
530         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
531                 vhost_user_iotlb_wr_lock(vq);
532
533         vq->access_ok = false;
534         vq->desc = NULL;
535         vq->avail = NULL;
536         vq->used = NULL;
537         vq->log_guest_addr = 0;
538
539         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
540                 vhost_user_iotlb_wr_unlock(vq);
541 }
542
543 static void
544 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
545 {
546         struct vhost_virtqueue *vq;
547         int numa_node = SOCKET_ID_ANY;
548
549         if (vring_idx >= VHOST_MAX_VRING) {
550                 VHOST_LOG_CONFIG(ERR, "(%s) failed to init vring, out of bound (%d)\n",
551                                 dev->ifname, vring_idx);
552                 return;
553         }
554
555         vq = dev->virtqueue[vring_idx];
556         if (!vq) {
557                 VHOST_LOG_CONFIG(ERR, "(%s) virtqueue not allocated (%d)\n",
558                                 dev->ifname, vring_idx);
559                 return;
560         }
561
562         memset(vq, 0, sizeof(struct vhost_virtqueue));
563
564         vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
565         vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
566         vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
567
568 #ifdef RTE_LIBRTE_VHOST_NUMA
569         if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
570                 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
571                         dev->ifname, rte_strerror(errno));
572                 numa_node = SOCKET_ID_ANY;
573         }
574 #endif
575         vq->numa_node = numa_node;
576
577         vhost_user_iotlb_init(dev, vring_idx);
578 }
579
580 static void
581 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
582 {
583         struct vhost_virtqueue *vq;
584         int callfd;
585
586         if (vring_idx >= VHOST_MAX_VRING) {
587                 VHOST_LOG_CONFIG(ERR,
588                                 "(%s) failed to reset vring, out of bound (%d)\n",
589                                 dev->ifname, vring_idx);
590                 return;
591         }
592
593         vq = dev->virtqueue[vring_idx];
594         if (!vq) {
595                 VHOST_LOG_CONFIG(ERR, "(%s) failed to reset vring, virtqueue not allocated (%d)\n",
596                                 dev->ifname, vring_idx);
597                 return;
598         }
599
600         callfd = vq->callfd;
601         init_vring_queue(dev, vring_idx);
602         vq->callfd = callfd;
603 }
604
605 int
606 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
607 {
608         struct vhost_virtqueue *vq;
609         uint32_t i;
610
611         /* Also allocate holes, if any, up to requested vring index. */
612         for (i = 0; i <= vring_idx; i++) {
613                 if (dev->virtqueue[i])
614                         continue;
615
616                 vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
617                 if (vq == NULL) {
618                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate memory for vring %u.\n",
619                                         dev->ifname, i);
620                         return -1;
621                 }
622
623                 dev->virtqueue[i] = vq;
624                 init_vring_queue(dev, i);
625                 rte_spinlock_init(&vq->access_lock);
626                 vq->avail_wrap_counter = 1;
627                 vq->used_wrap_counter = 1;
628                 vq->signalled_used_valid = false;
629         }
630
631         dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
632
633         return 0;
634 }
635
636 /*
637  * Reset some variables in device structure, while keeping few
638  * others untouched, such as vid, ifname, nr_vring: they
639  * should be same unless the device is removed.
640  */
641 void
642 reset_device(struct virtio_net *dev)
643 {
644         uint32_t i;
645
646         dev->features = 0;
647         dev->protocol_features = 0;
648         dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
649
650         for (i = 0; i < dev->nr_vring; i++)
651                 reset_vring_queue(dev, i);
652 }
653
654 /*
655  * Invoked when there is a new vhost-user connection established (when
656  * there is a new virtio device being attached).
657  */
658 int
659 vhost_new_device(void)
660 {
661         struct virtio_net *dev;
662         int i;
663
664         pthread_mutex_lock(&vhost_dev_lock);
665         for (i = 0; i < RTE_MAX_VHOST_DEVICE; i++) {
666                 if (vhost_devices[i] == NULL)
667                         break;
668         }
669
670         if (i == RTE_MAX_VHOST_DEVICE) {
671                 VHOST_LOG_CONFIG(ERR, "failed to find a free slot for new device.\n");
672                 pthread_mutex_unlock(&vhost_dev_lock);
673                 return -1;
674         }
675
676         dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
677         if (dev == NULL) {
678                 VHOST_LOG_CONFIG(ERR, "failed to allocate memory for new device.\n");
679                 pthread_mutex_unlock(&vhost_dev_lock);
680                 return -1;
681         }
682
683         vhost_devices[i] = dev;
684         pthread_mutex_unlock(&vhost_dev_lock);
685
686         dev->vid = i;
687         dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
688         dev->slave_req_fd = -1;
689         dev->postcopy_ufd = -1;
690         rte_spinlock_init(&dev->slave_req_lock);
691
692         return i;
693 }
694
695 void
696 vhost_destroy_device_notify(struct virtio_net *dev)
697 {
698         struct rte_vdpa_device *vdpa_dev;
699
700         if (dev->flags & VIRTIO_DEV_RUNNING) {
701                 vdpa_dev = dev->vdpa_dev;
702                 if (vdpa_dev)
703                         vdpa_dev->ops->dev_close(dev->vid);
704                 dev->flags &= ~VIRTIO_DEV_RUNNING;
705                 dev->notify_ops->destroy_device(dev->vid);
706         }
707 }
708
709 /*
710  * Invoked when there is the vhost-user connection is broken (when
711  * the virtio device is being detached).
712  */
713 void
714 vhost_destroy_device(int vid)
715 {
716         struct virtio_net *dev = get_device(vid);
717
718         if (dev == NULL)
719                 return;
720
721         vhost_destroy_device_notify(dev);
722
723         cleanup_device(dev, 1);
724         free_device(dev);
725
726         vhost_devices[vid] = NULL;
727 }
728
729 void
730 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
731 {
732         struct virtio_net *dev = get_device(vid);
733
734         if (dev == NULL)
735                 return;
736
737         dev->vdpa_dev = vdpa_dev;
738 }
739
740 void
741 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
742 {
743         struct virtio_net *dev;
744         unsigned int len;
745
746         dev = get_device(vid);
747         if (dev == NULL)
748                 return;
749
750         len = if_len > sizeof(dev->ifname) ?
751                 sizeof(dev->ifname) : if_len;
752
753         strncpy(dev->ifname, if_name, len);
754         dev->ifname[sizeof(dev->ifname) - 1] = '\0';
755 }
756
757 void
758 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags)
759 {
760         struct virtio_net *dev = get_device(vid);
761
762         if (dev == NULL)
763                 return;
764
765         if (enable)
766                 dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
767         else
768                 dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
769         if (!compliant_ol_flags)
770                 dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
771         else
772                 dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
773 }
774
775 void
776 vhost_enable_extbuf(int vid)
777 {
778         struct virtio_net *dev = get_device(vid);
779
780         if (dev == NULL)
781                 return;
782
783         dev->extbuf = 1;
784 }
785
786 void
787 vhost_enable_linearbuf(int vid)
788 {
789         struct virtio_net *dev = get_device(vid);
790
791         if (dev == NULL)
792                 return;
793
794         dev->linearbuf = 1;
795 }
796
797 int
798 rte_vhost_get_mtu(int vid, uint16_t *mtu)
799 {
800         struct virtio_net *dev = get_device(vid);
801
802         if (dev == NULL || mtu == NULL)
803                 return -ENODEV;
804
805         if (!(dev->flags & VIRTIO_DEV_READY))
806                 return -EAGAIN;
807
808         if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
809                 return -ENOTSUP;
810
811         *mtu = dev->mtu;
812
813         return 0;
814 }
815
816 int
817 rte_vhost_get_numa_node(int vid)
818 {
819 #ifdef RTE_LIBRTE_VHOST_NUMA
820         struct virtio_net *dev = get_device(vid);
821         int numa_node;
822         int ret;
823
824         if (dev == NULL || numa_available() != 0)
825                 return -1;
826
827         ret = get_mempolicy(&numa_node, NULL, 0, dev,
828                             MPOL_F_NODE | MPOL_F_ADDR);
829         if (ret < 0) {
830                 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
831                         dev->ifname, rte_strerror(errno));
832                 return -1;
833         }
834
835         return numa_node;
836 #else
837         RTE_SET_USED(vid);
838         return -1;
839 #endif
840 }
841
842 uint32_t
843 rte_vhost_get_queue_num(int vid)
844 {
845         struct virtio_net *dev = get_device(vid);
846
847         if (dev == NULL)
848                 return 0;
849
850         return dev->nr_vring / 2;
851 }
852
853 uint16_t
854 rte_vhost_get_vring_num(int vid)
855 {
856         struct virtio_net *dev = get_device(vid);
857
858         if (dev == NULL)
859                 return 0;
860
861         return dev->nr_vring;
862 }
863
864 int
865 rte_vhost_get_ifname(int vid, char *buf, size_t len)
866 {
867         struct virtio_net *dev = get_device(vid);
868
869         if (dev == NULL || buf == NULL)
870                 return -1;
871
872         len = RTE_MIN(len, sizeof(dev->ifname));
873
874         strncpy(buf, dev->ifname, len);
875         buf[len - 1] = '\0';
876
877         return 0;
878 }
879
880 int
881 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
882 {
883         struct virtio_net *dev;
884
885         dev = get_device(vid);
886         if (dev == NULL || features == NULL)
887                 return -1;
888
889         *features = dev->features;
890         return 0;
891 }
892
893 int
894 rte_vhost_get_negotiated_protocol_features(int vid,
895                                            uint64_t *protocol_features)
896 {
897         struct virtio_net *dev;
898
899         dev = get_device(vid);
900         if (dev == NULL || protocol_features == NULL)
901                 return -1;
902
903         *protocol_features = dev->protocol_features;
904         return 0;
905 }
906
907 int
908 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
909 {
910         struct virtio_net *dev;
911         struct rte_vhost_memory *m;
912         size_t size;
913
914         dev = get_device(vid);
915         if (dev == NULL || mem == NULL)
916                 return -1;
917
918         size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
919         m = malloc(sizeof(struct rte_vhost_memory) + size);
920         if (!m)
921                 return -1;
922
923         m->nregions = dev->mem->nregions;
924         memcpy(m->regions, dev->mem->regions, size);
925         *mem = m;
926
927         return 0;
928 }
929
930 int
931 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
932                           struct rte_vhost_vring *vring)
933 {
934         struct virtio_net *dev;
935         struct vhost_virtqueue *vq;
936
937         dev = get_device(vid);
938         if (dev == NULL || vring == NULL)
939                 return -1;
940
941         if (vring_idx >= VHOST_MAX_VRING)
942                 return -1;
943
944         vq = dev->virtqueue[vring_idx];
945         if (!vq)
946                 return -1;
947
948         if (vq_is_packed(dev)) {
949                 vring->desc_packed = vq->desc_packed;
950                 vring->driver_event = vq->driver_event;
951                 vring->device_event = vq->device_event;
952         } else {
953                 vring->desc = vq->desc;
954                 vring->avail = vq->avail;
955                 vring->used = vq->used;
956         }
957         vring->log_guest_addr  = vq->log_guest_addr;
958
959         vring->callfd  = vq->callfd;
960         vring->kickfd  = vq->kickfd;
961         vring->size    = vq->size;
962
963         return 0;
964 }
965
966 int
967 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
968                                   struct rte_vhost_ring_inflight *vring)
969 {
970         struct virtio_net *dev;
971         struct vhost_virtqueue *vq;
972
973         dev = get_device(vid);
974         if (unlikely(!dev))
975                 return -1;
976
977         if (vring_idx >= VHOST_MAX_VRING)
978                 return -1;
979
980         vq = dev->virtqueue[vring_idx];
981         if (unlikely(!vq))
982                 return -1;
983
984         if (vq_is_packed(dev)) {
985                 if (unlikely(!vq->inflight_packed))
986                         return -1;
987
988                 vring->inflight_packed = vq->inflight_packed;
989         } else {
990                 if (unlikely(!vq->inflight_split))
991                         return -1;
992
993                 vring->inflight_split = vq->inflight_split;
994         }
995
996         vring->resubmit_inflight = vq->resubmit_inflight;
997
998         return 0;
999 }
1000
1001 int
1002 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
1003                                   uint16_t idx)
1004 {
1005         struct vhost_virtqueue *vq;
1006         struct virtio_net *dev;
1007
1008         dev = get_device(vid);
1009         if (unlikely(!dev))
1010                 return -1;
1011
1012         if (unlikely(!(dev->protocol_features &
1013             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1014                 return 0;
1015
1016         if (unlikely(vq_is_packed(dev)))
1017                 return -1;
1018
1019         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1020                 return -1;
1021
1022         vq = dev->virtqueue[vring_idx];
1023         if (unlikely(!vq))
1024                 return -1;
1025
1026         if (unlikely(!vq->inflight_split))
1027                 return -1;
1028
1029         if (unlikely(idx >= vq->size))
1030                 return -1;
1031
1032         vq->inflight_split->desc[idx].counter = vq->global_counter++;
1033         vq->inflight_split->desc[idx].inflight = 1;
1034         return 0;
1035 }
1036
1037 int
1038 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1039                                    uint16_t head, uint16_t last,
1040                                    uint16_t *inflight_entry)
1041 {
1042         struct rte_vhost_inflight_info_packed *inflight_info;
1043         struct virtio_net *dev;
1044         struct vhost_virtqueue *vq;
1045         struct vring_packed_desc *desc;
1046         uint16_t old_free_head, free_head;
1047
1048         dev = get_device(vid);
1049         if (unlikely(!dev))
1050                 return -1;
1051
1052         if (unlikely(!(dev->protocol_features &
1053             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1054                 return 0;
1055
1056         if (unlikely(!vq_is_packed(dev)))
1057                 return -1;
1058
1059         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1060                 return -1;
1061
1062         vq = dev->virtqueue[vring_idx];
1063         if (unlikely(!vq))
1064                 return -1;
1065
1066         inflight_info = vq->inflight_packed;
1067         if (unlikely(!inflight_info))
1068                 return -1;
1069
1070         if (unlikely(head >= vq->size))
1071                 return -1;
1072
1073         desc = vq->desc_packed;
1074         old_free_head = inflight_info->old_free_head;
1075         if (unlikely(old_free_head >= vq->size))
1076                 return -1;
1077
1078         free_head = old_free_head;
1079
1080         /* init header descriptor */
1081         inflight_info->desc[old_free_head].num = 0;
1082         inflight_info->desc[old_free_head].counter = vq->global_counter++;
1083         inflight_info->desc[old_free_head].inflight = 1;
1084
1085         /* save desc entry in flight entry */
1086         while (head != ((last + 1) % vq->size)) {
1087                 inflight_info->desc[old_free_head].num++;
1088                 inflight_info->desc[free_head].addr = desc[head].addr;
1089                 inflight_info->desc[free_head].len = desc[head].len;
1090                 inflight_info->desc[free_head].flags = desc[head].flags;
1091                 inflight_info->desc[free_head].id = desc[head].id;
1092
1093                 inflight_info->desc[old_free_head].last = free_head;
1094                 free_head = inflight_info->desc[free_head].next;
1095                 inflight_info->free_head = free_head;
1096                 head = (head + 1) % vq->size;
1097         }
1098
1099         inflight_info->old_free_head = free_head;
1100         *inflight_entry = old_free_head;
1101
1102         return 0;
1103 }
1104
1105 int
1106 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1107                                   uint16_t last_used_idx, uint16_t idx)
1108 {
1109         struct virtio_net *dev;
1110         struct vhost_virtqueue *vq;
1111
1112         dev = get_device(vid);
1113         if (unlikely(!dev))
1114                 return -1;
1115
1116         if (unlikely(!(dev->protocol_features &
1117             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1118                 return 0;
1119
1120         if (unlikely(vq_is_packed(dev)))
1121                 return -1;
1122
1123         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1124                 return -1;
1125
1126         vq = dev->virtqueue[vring_idx];
1127         if (unlikely(!vq))
1128                 return -1;
1129
1130         if (unlikely(!vq->inflight_split))
1131                 return -1;
1132
1133         if (unlikely(idx >= vq->size))
1134                 return -1;
1135
1136         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1137
1138         vq->inflight_split->desc[idx].inflight = 0;
1139
1140         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1141
1142         vq->inflight_split->used_idx = last_used_idx;
1143         return 0;
1144 }
1145
1146 int
1147 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1148                                    uint16_t head)
1149 {
1150         struct rte_vhost_inflight_info_packed *inflight_info;
1151         struct virtio_net *dev;
1152         struct vhost_virtqueue *vq;
1153
1154         dev = get_device(vid);
1155         if (unlikely(!dev))
1156                 return -1;
1157
1158         if (unlikely(!(dev->protocol_features &
1159             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1160                 return 0;
1161
1162         if (unlikely(!vq_is_packed(dev)))
1163                 return -1;
1164
1165         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1166                 return -1;
1167
1168         vq = dev->virtqueue[vring_idx];
1169         if (unlikely(!vq))
1170                 return -1;
1171
1172         inflight_info = vq->inflight_packed;
1173         if (unlikely(!inflight_info))
1174                 return -1;
1175
1176         if (unlikely(head >= vq->size))
1177                 return -1;
1178
1179         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1180
1181         inflight_info->desc[head].inflight = 0;
1182
1183         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1184
1185         inflight_info->old_free_head = inflight_info->free_head;
1186         inflight_info->old_used_idx = inflight_info->used_idx;
1187         inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1188
1189         return 0;
1190 }
1191
1192 int
1193 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1194                                      uint16_t idx)
1195 {
1196         struct virtio_net *dev;
1197         struct vhost_virtqueue *vq;
1198
1199         dev = get_device(vid);
1200         if (unlikely(!dev))
1201                 return -1;
1202
1203         if (unlikely(!(dev->protocol_features &
1204             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1205                 return 0;
1206
1207         if (unlikely(vq_is_packed(dev)))
1208                 return -1;
1209
1210         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1211                 return -1;
1212
1213         vq = dev->virtqueue[vring_idx];
1214         if (unlikely(!vq))
1215                 return -1;
1216
1217         if (unlikely(!vq->inflight_split))
1218                 return -1;
1219
1220         if (unlikely(idx >= vq->size))
1221                 return -1;
1222
1223         vq->inflight_split->last_inflight_io = idx;
1224         return 0;
1225 }
1226
1227 int
1228 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1229                                       uint16_t head)
1230 {
1231         struct rte_vhost_inflight_info_packed *inflight_info;
1232         struct virtio_net *dev;
1233         struct vhost_virtqueue *vq;
1234         uint16_t last;
1235
1236         dev = get_device(vid);
1237         if (unlikely(!dev))
1238                 return -1;
1239
1240         if (unlikely(!(dev->protocol_features &
1241             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1242                 return 0;
1243
1244         if (unlikely(!vq_is_packed(dev)))
1245                 return -1;
1246
1247         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1248                 return -1;
1249
1250         vq = dev->virtqueue[vring_idx];
1251         if (unlikely(!vq))
1252                 return -1;
1253
1254         inflight_info = vq->inflight_packed;
1255         if (unlikely(!inflight_info))
1256                 return -1;
1257
1258         if (unlikely(head >= vq->size))
1259                 return -1;
1260
1261         last = inflight_info->desc[head].last;
1262         if (unlikely(last >= vq->size))
1263                 return -1;
1264
1265         inflight_info->desc[last].next = inflight_info->free_head;
1266         inflight_info->free_head = head;
1267         inflight_info->used_idx += inflight_info->desc[head].num;
1268         if (inflight_info->used_idx >= inflight_info->desc_num) {
1269                 inflight_info->used_idx -= inflight_info->desc_num;
1270                 inflight_info->used_wrap_counter =
1271                         !inflight_info->used_wrap_counter;
1272         }
1273
1274         return 0;
1275 }
1276
1277 int
1278 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1279 {
1280         struct virtio_net *dev;
1281         struct vhost_virtqueue *vq;
1282
1283         dev = get_device(vid);
1284         if (!dev)
1285                 return -1;
1286
1287         if (vring_idx >= VHOST_MAX_VRING)
1288                 return -1;
1289
1290         vq = dev->virtqueue[vring_idx];
1291         if (!vq)
1292                 return -1;
1293
1294         rte_spinlock_lock(&vq->access_lock);
1295
1296         if (vq_is_packed(dev))
1297                 vhost_vring_call_packed(dev, vq);
1298         else
1299                 vhost_vring_call_split(dev, vq);
1300
1301         rte_spinlock_unlock(&vq->access_lock);
1302
1303         return 0;
1304 }
1305
1306 uint16_t
1307 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1308 {
1309         struct virtio_net *dev;
1310         struct vhost_virtqueue *vq;
1311         uint16_t ret = 0;
1312
1313         dev = get_device(vid);
1314         if (!dev)
1315                 return 0;
1316
1317         if (queue_id >= VHOST_MAX_VRING)
1318                 return 0;
1319
1320         vq = dev->virtqueue[queue_id];
1321         if (!vq)
1322                 return 0;
1323
1324         rte_spinlock_lock(&vq->access_lock);
1325
1326         if (unlikely(!vq->enabled || vq->avail == NULL))
1327                 goto out;
1328
1329         ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1330
1331 out:
1332         rte_spinlock_unlock(&vq->access_lock);
1333         return ret;
1334 }
1335
1336 static inline int
1337 vhost_enable_notify_split(struct virtio_net *dev,
1338                 struct vhost_virtqueue *vq, int enable)
1339 {
1340         if (vq->used == NULL)
1341                 return -1;
1342
1343         if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1344                 if (enable)
1345                         vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1346                 else
1347                         vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1348         } else {
1349                 if (enable)
1350                         vhost_avail_event(vq) = vq->last_avail_idx;
1351         }
1352         return 0;
1353 }
1354
1355 static inline int
1356 vhost_enable_notify_packed(struct virtio_net *dev,
1357                 struct vhost_virtqueue *vq, int enable)
1358 {
1359         uint16_t flags;
1360
1361         if (vq->device_event == NULL)
1362                 return -1;
1363
1364         if (!enable) {
1365                 vq->device_event->flags = VRING_EVENT_F_DISABLE;
1366                 return 0;
1367         }
1368
1369         flags = VRING_EVENT_F_ENABLE;
1370         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1371                 flags = VRING_EVENT_F_DESC;
1372                 vq->device_event->off_wrap = vq->last_avail_idx |
1373                         vq->avail_wrap_counter << 15;
1374         }
1375
1376         rte_atomic_thread_fence(__ATOMIC_RELEASE);
1377
1378         vq->device_event->flags = flags;
1379         return 0;
1380 }
1381
1382 int
1383 vhost_enable_guest_notification(struct virtio_net *dev,
1384                 struct vhost_virtqueue *vq, int enable)
1385 {
1386         /*
1387          * If the virtqueue is not ready yet, it will be applied
1388          * when it will become ready.
1389          */
1390         if (!vq->ready)
1391                 return 0;
1392
1393         if (vq_is_packed(dev))
1394                 return vhost_enable_notify_packed(dev, vq, enable);
1395         else
1396                 return vhost_enable_notify_split(dev, vq, enable);
1397 }
1398
1399 int
1400 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1401 {
1402         struct virtio_net *dev = get_device(vid);
1403         struct vhost_virtqueue *vq;
1404         int ret;
1405
1406         if (!dev)
1407                 return -1;
1408
1409         if (queue_id >= VHOST_MAX_VRING)
1410                 return -1;
1411
1412         vq = dev->virtqueue[queue_id];
1413         if (!vq)
1414                 return -1;
1415
1416         rte_spinlock_lock(&vq->access_lock);
1417
1418         vq->notif_enable = enable;
1419         ret = vhost_enable_guest_notification(dev, vq, enable);
1420
1421         rte_spinlock_unlock(&vq->access_lock);
1422
1423         return ret;
1424 }
1425
1426 void
1427 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1428 {
1429         struct virtio_net *dev = get_device(vid);
1430
1431         if (dev == NULL)
1432                 return;
1433
1434         vhost_log_write(dev, addr, len);
1435 }
1436
1437 void
1438 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1439                          uint64_t offset, uint64_t len)
1440 {
1441         struct virtio_net *dev;
1442         struct vhost_virtqueue *vq;
1443
1444         dev = get_device(vid);
1445         if (dev == NULL)
1446                 return;
1447
1448         if (vring_idx >= VHOST_MAX_VRING)
1449                 return;
1450         vq = dev->virtqueue[vring_idx];
1451         if (!vq)
1452                 return;
1453
1454         vhost_log_used_vring(dev, vq, offset, len);
1455 }
1456
1457 uint32_t
1458 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1459 {
1460         struct virtio_net *dev;
1461         struct vhost_virtqueue *vq;
1462         uint32_t ret = 0;
1463
1464         dev = get_device(vid);
1465         if (dev == NULL)
1466                 return 0;
1467
1468         if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1469                 VHOST_LOG_DATA(ERR, "(%s) %s: invalid virtqueue idx %d.\n",
1470                         dev->ifname, __func__, qid);
1471                 return 0;
1472         }
1473
1474         vq = dev->virtqueue[qid];
1475         if (vq == NULL)
1476                 return 0;
1477
1478         rte_spinlock_lock(&vq->access_lock);
1479
1480         if (unlikely(!vq->enabled || vq->avail == NULL))
1481                 goto out;
1482
1483         ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1484
1485 out:
1486         rte_spinlock_unlock(&vq->access_lock);
1487         return ret;
1488 }
1489
1490 struct rte_vdpa_device *
1491 rte_vhost_get_vdpa_device(int vid)
1492 {
1493         struct virtio_net *dev = get_device(vid);
1494
1495         if (dev == NULL)
1496                 return NULL;
1497
1498         return dev->vdpa_dev;
1499 }
1500
1501 int
1502 rte_vhost_get_log_base(int vid, uint64_t *log_base,
1503                 uint64_t *log_size)
1504 {
1505         struct virtio_net *dev = get_device(vid);
1506
1507         if (dev == NULL || log_base == NULL || log_size == NULL)
1508                 return -1;
1509
1510         *log_base = dev->log_base;
1511         *log_size = dev->log_size;
1512
1513         return 0;
1514 }
1515
1516 int
1517 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1518                 uint16_t *last_avail_idx, uint16_t *last_used_idx)
1519 {
1520         struct vhost_virtqueue *vq;
1521         struct virtio_net *dev = get_device(vid);
1522
1523         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1524                 return -1;
1525
1526         if (queue_id >= VHOST_MAX_VRING)
1527                 return -1;
1528
1529         vq = dev->virtqueue[queue_id];
1530         if (!vq)
1531                 return -1;
1532
1533         if (vq_is_packed(dev)) {
1534                 *last_avail_idx = (vq->avail_wrap_counter << 15) |
1535                                   vq->last_avail_idx;
1536                 *last_used_idx = (vq->used_wrap_counter << 15) |
1537                                  vq->last_used_idx;
1538         } else {
1539                 *last_avail_idx = vq->last_avail_idx;
1540                 *last_used_idx = vq->last_used_idx;
1541         }
1542
1543         return 0;
1544 }
1545
1546 int
1547 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1548                 uint16_t last_avail_idx, uint16_t last_used_idx)
1549 {
1550         struct vhost_virtqueue *vq;
1551         struct virtio_net *dev = get_device(vid);
1552
1553         if (!dev)
1554                 return -1;
1555
1556         if (queue_id >= VHOST_MAX_VRING)
1557                 return -1;
1558
1559         vq = dev->virtqueue[queue_id];
1560         if (!vq)
1561                 return -1;
1562
1563         if (vq_is_packed(dev)) {
1564                 vq->last_avail_idx = last_avail_idx & 0x7fff;
1565                 vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1566                 vq->last_used_idx = last_used_idx & 0x7fff;
1567                 vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1568         } else {
1569                 vq->last_avail_idx = last_avail_idx;
1570                 vq->last_used_idx = last_used_idx;
1571         }
1572
1573         return 0;
1574 }
1575
1576 int
1577 rte_vhost_get_vring_base_from_inflight(int vid,
1578                                        uint16_t queue_id,
1579                                        uint16_t *last_avail_idx,
1580                                        uint16_t *last_used_idx)
1581 {
1582         struct rte_vhost_inflight_info_packed *inflight_info;
1583         struct vhost_virtqueue *vq;
1584         struct virtio_net *dev = get_device(vid);
1585
1586         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1587                 return -1;
1588
1589         if (queue_id >= VHOST_MAX_VRING)
1590                 return -1;
1591
1592         vq = dev->virtqueue[queue_id];
1593         if (!vq)
1594                 return -1;
1595
1596         if (!vq_is_packed(dev))
1597                 return -1;
1598
1599         inflight_info = vq->inflight_packed;
1600         if (!inflight_info)
1601                 return -1;
1602
1603         *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1604                           inflight_info->old_used_idx;
1605         *last_used_idx = *last_avail_idx;
1606
1607         return 0;
1608 }
1609
1610 int
1611 rte_vhost_extern_callback_register(int vid,
1612                 struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1613 {
1614         struct virtio_net *dev = get_device(vid);
1615
1616         if (dev == NULL || ops == NULL)
1617                 return -1;
1618
1619         dev->extern_ops = *ops;
1620         dev->extern_data = ctx;
1621         return 0;
1622 }
1623
1624 static __rte_always_inline int
1625 async_channel_register(int vid, uint16_t queue_id)
1626 {
1627         struct virtio_net *dev = get_device(vid);
1628         struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
1629         struct vhost_async *async;
1630         int node = vq->numa_node;
1631
1632         if (unlikely(vq->async)) {
1633                 VHOST_LOG_CONFIG(ERR,
1634                                 "(%s) async register failed: already registered (qid: %d)\n",
1635                                 dev->ifname, queue_id);
1636                 return -1;
1637         }
1638
1639         async = rte_zmalloc_socket(NULL, sizeof(struct vhost_async), 0, node);
1640         if (!async) {
1641                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async metadata (qid: %d)\n",
1642                                 dev->ifname, queue_id);
1643                 return -1;
1644         }
1645
1646         async->pkts_info = rte_malloc_socket(NULL, vq->size * sizeof(struct async_inflight_info),
1647                         RTE_CACHE_LINE_SIZE, node);
1648         if (!async->pkts_info) {
1649                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async_pkts_info (qid: %d)\n",
1650                                 dev->ifname, queue_id);
1651                 goto out_free_async;
1652         }
1653
1654         async->pkts_cmpl_flag = rte_zmalloc_socket(NULL, vq->size * sizeof(bool),
1655                         RTE_CACHE_LINE_SIZE, node);
1656         if (!async->pkts_cmpl_flag) {
1657                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async pkts_cmpl_flag (qid: %d)\n",
1658                                 dev->ifname, queue_id);
1659                 goto out_free_async;
1660         }
1661
1662         if (vq_is_packed(dev)) {
1663                 async->buffers_packed = rte_malloc_socket(NULL,
1664                                 vq->size * sizeof(struct vring_used_elem_packed),
1665                                 RTE_CACHE_LINE_SIZE, node);
1666                 if (!async->buffers_packed) {
1667                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async buffers (qid: %d)\n",
1668                                         dev->ifname, queue_id);
1669                         goto out_free_inflight;
1670                 }
1671         } else {
1672                 async->descs_split = rte_malloc_socket(NULL,
1673                                 vq->size * sizeof(struct vring_used_elem),
1674                                 RTE_CACHE_LINE_SIZE, node);
1675                 if (!async->descs_split) {
1676                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async descs (qid: %d)\n",
1677                                         dev->ifname, queue_id);
1678                         goto out_free_inflight;
1679                 }
1680         }
1681
1682         vq->async = async;
1683
1684         return 0;
1685 out_free_inflight:
1686         rte_free(async->pkts_info);
1687 out_free_async:
1688         rte_free(async);
1689
1690         return -1;
1691 }
1692
1693 int
1694 rte_vhost_async_channel_register(int vid, uint16_t queue_id)
1695 {
1696         struct vhost_virtqueue *vq;
1697         struct virtio_net *dev = get_device(vid);
1698         int ret;
1699
1700         if (dev == NULL)
1701                 return -1;
1702
1703         if (queue_id >= VHOST_MAX_VRING)
1704                 return -1;
1705
1706         vq = dev->virtqueue[queue_id];
1707
1708         if (unlikely(vq == NULL || !dev->async_copy))
1709                 return -1;
1710
1711         rte_spinlock_lock(&vq->access_lock);
1712         ret = async_channel_register(vid, queue_id);
1713         rte_spinlock_unlock(&vq->access_lock);
1714
1715         return ret;
1716 }
1717
1718 int
1719 rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id)
1720 {
1721         struct vhost_virtqueue *vq;
1722         struct virtio_net *dev = get_device(vid);
1723
1724         if (dev == NULL)
1725                 return -1;
1726
1727         if (queue_id >= VHOST_MAX_VRING)
1728                 return -1;
1729
1730         vq = dev->virtqueue[queue_id];
1731
1732         if (unlikely(vq == NULL || !dev->async_copy))
1733                 return -1;
1734
1735         return async_channel_register(vid, queue_id);
1736 }
1737
1738 int
1739 rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1740 {
1741         struct vhost_virtqueue *vq;
1742         struct virtio_net *dev = get_device(vid);
1743         int ret = -1;
1744
1745         if (dev == NULL)
1746                 return ret;
1747
1748         if (queue_id >= VHOST_MAX_VRING)
1749                 return ret;
1750
1751         vq = dev->virtqueue[queue_id];
1752
1753         if (vq == NULL)
1754                 return ret;
1755
1756         ret = 0;
1757
1758         if (!vq->async)
1759                 return ret;
1760
1761         if (!rte_spinlock_trylock(&vq->access_lock)) {
1762                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel, virtqueue busy.\n",
1763                                 dev->ifname);
1764                 return -1;
1765         }
1766
1767         if (vq->async->pkts_inflight_n) {
1768                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1769                 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1770                         dev->ifname);
1771                 ret = -1;
1772                 goto out;
1773         }
1774
1775         vhost_free_async_mem(vq);
1776 out:
1777         rte_spinlock_unlock(&vq->access_lock);
1778
1779         return ret;
1780 }
1781
1782 int
1783 rte_vhost_async_channel_unregister_thread_unsafe(int vid, uint16_t queue_id)
1784 {
1785         struct vhost_virtqueue *vq;
1786         struct virtio_net *dev = get_device(vid);
1787
1788         if (dev == NULL)
1789                 return -1;
1790
1791         if (queue_id >= VHOST_MAX_VRING)
1792                 return -1;
1793
1794         vq = dev->virtqueue[queue_id];
1795
1796         if (vq == NULL)
1797                 return -1;
1798
1799         if (!vq->async)
1800                 return 0;
1801
1802         if (vq->async->pkts_inflight_n) {
1803                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1804                 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1805                         dev->ifname);
1806                 return -1;
1807         }
1808
1809         vhost_free_async_mem(vq);
1810
1811         return 0;
1812 }
1813
1814 int
1815 rte_vhost_async_dma_configure(int16_t dma_id, uint16_t vchan_id)
1816 {
1817         struct rte_dma_info info;
1818         void *pkts_cmpl_flag_addr;
1819         uint16_t max_desc;
1820
1821         if (!rte_dma_is_valid(dma_id)) {
1822                 VHOST_LOG_CONFIG(ERR, "DMA %d is not found.\n", dma_id);
1823                 return -1;
1824         }
1825
1826         rte_dma_info_get(dma_id, &info);
1827         if (vchan_id >= info.max_vchans) {
1828                 VHOST_LOG_CONFIG(ERR, "Invalid DMA %d vChannel %u.\n", dma_id, vchan_id);
1829                 return -1;
1830         }
1831
1832         if (!dma_copy_track[dma_id].vchans) {
1833                 struct async_dma_vchan_info *vchans;
1834
1835                 vchans = rte_zmalloc(NULL, sizeof(struct async_dma_vchan_info) * info.max_vchans,
1836                                 RTE_CACHE_LINE_SIZE);
1837                 if (vchans == NULL) {
1838                         VHOST_LOG_CONFIG(ERR, "Failed to allocate vchans for DMA %d vChannel %u.\n",
1839                                         dma_id, vchan_id);
1840                         return -1;
1841                 }
1842
1843                 dma_copy_track[dma_id].vchans = vchans;
1844         }
1845
1846         if (dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr) {
1847                 VHOST_LOG_CONFIG(INFO, "DMA %d vChannel %u already registered.\n", dma_id,
1848                                 vchan_id);
1849                 return 0;
1850         }
1851
1852         max_desc = info.max_desc;
1853         if (!rte_is_power_of_2(max_desc))
1854                 max_desc = rte_align32pow2(max_desc);
1855
1856         pkts_cmpl_flag_addr = rte_zmalloc(NULL, sizeof(bool *) * max_desc, RTE_CACHE_LINE_SIZE);
1857         if (!pkts_cmpl_flag_addr) {
1858                 VHOST_LOG_CONFIG(ERR, "Failed to allocate pkts_cmpl_flag_addr for DMA %d "
1859                                 "vChannel %u.\n", dma_id, vchan_id);
1860
1861                 if (dma_copy_track[dma_id].nr_vchans == 0) {
1862                         rte_free(dma_copy_track[dma_id].vchans);
1863                         dma_copy_track[dma_id].vchans = NULL;
1864                 }
1865                 return -1;
1866         }
1867
1868         dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr = pkts_cmpl_flag_addr;
1869         dma_copy_track[dma_id].vchans[vchan_id].ring_size = max_desc;
1870         dma_copy_track[dma_id].vchans[vchan_id].ring_mask = max_desc - 1;
1871         dma_copy_track[dma_id].nr_vchans++;
1872
1873         return 0;
1874 }
1875
1876 int
1877 rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
1878 {
1879         struct vhost_virtqueue *vq;
1880         struct virtio_net *dev = get_device(vid);
1881         int ret = -1;
1882
1883         if (dev == NULL)
1884                 return ret;
1885
1886         if (queue_id >= VHOST_MAX_VRING)
1887                 return ret;
1888
1889         vq = dev->virtqueue[queue_id];
1890
1891         if (vq == NULL)
1892                 return ret;
1893
1894         if (!vq->async)
1895                 return ret;
1896
1897         if (!rte_spinlock_trylock(&vq->access_lock)) {
1898                 VHOST_LOG_CONFIG(DEBUG,
1899                         "(%s) failed to check in-flight packets. virtqueue busy.\n",
1900                         dev->ifname);
1901                 return ret;
1902         }
1903
1904         ret = vq->async->pkts_inflight_n;
1905         rte_spinlock_unlock(&vq->access_lock);
1906
1907         return ret;
1908 }
1909
1910 int
1911 rte_vhost_async_get_inflight_thread_unsafe(int vid, uint16_t queue_id)
1912 {
1913         struct vhost_virtqueue *vq;
1914         struct virtio_net *dev = get_device(vid);
1915         int ret = -1;
1916
1917         if (dev == NULL)
1918                 return ret;
1919
1920         if (queue_id >= VHOST_MAX_VRING)
1921                 return ret;
1922
1923         vq = dev->virtqueue[queue_id];
1924
1925         if (vq == NULL)
1926                 return ret;
1927
1928         if (!vq->async)
1929                 return ret;
1930
1931         ret = vq->async->pkts_inflight_n;
1932
1933         return ret;
1934 }
1935
1936 int
1937 rte_vhost_get_monitor_addr(int vid, uint16_t queue_id,
1938                 struct rte_vhost_power_monitor_cond *pmc)
1939 {
1940         struct virtio_net *dev = get_device(vid);
1941         struct vhost_virtqueue *vq;
1942
1943         if (dev == NULL)
1944                 return -1;
1945         if (queue_id >= VHOST_MAX_VRING)
1946                 return -1;
1947
1948         vq = dev->virtqueue[queue_id];
1949         if (vq == NULL)
1950                 return -1;
1951
1952         if (vq_is_packed(dev)) {
1953                 struct vring_packed_desc *desc;
1954                 desc = vq->desc_packed;
1955                 pmc->addr = &desc[vq->last_avail_idx].flags;
1956                 if (vq->avail_wrap_counter)
1957                         pmc->val = VRING_DESC_F_AVAIL;
1958                 else
1959                         pmc->val = VRING_DESC_F_USED;
1960                 pmc->mask = VRING_DESC_F_AVAIL | VRING_DESC_F_USED;
1961                 pmc->size = sizeof(desc[vq->last_avail_idx].flags);
1962                 pmc->match = 1;
1963         } else {
1964                 pmc->addr = &vq->avail->idx;
1965                 pmc->val = vq->last_avail_idx & (vq->size - 1);
1966                 pmc->mask = vq->size - 1;
1967                 pmc->size = sizeof(vq->avail->idx);
1968                 pmc->match = 0;
1969         }
1970
1971         return 0;
1972 }
1973
1974 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
1975 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);