vhost: support per-virtqueue statistics
[dpdk.git] / lib / vhost / vhost.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #ifdef RTE_LIBRTE_VHOST_NUMA
10 #include <numa.h>
11 #include <numaif.h>
12 #endif
13
14 #include <rte_errno.h>
15 #include <rte_log.h>
16 #include <rte_memory.h>
17 #include <rte_malloc.h>
18 #include <rte_vhost.h>
19
20 #include "iotlb.h"
21 #include "vhost.h"
22 #include "vhost_user.h"
23
24 struct virtio_net *vhost_devices[RTE_MAX_VHOST_DEVICE];
25 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
26
27 struct vhost_vq_stats_name_off {
28         char name[RTE_VHOST_STATS_NAME_SIZE];
29         unsigned int offset;
30 };
31
32 static const struct vhost_vq_stats_name_off vhost_vq_stat_strings[] = {
33         {"good_packets",           offsetof(struct vhost_virtqueue, stats.packets)},
34         {"good_bytes",             offsetof(struct vhost_virtqueue, stats.bytes)},
35         {"multicast_packets",      offsetof(struct vhost_virtqueue, stats.multicast)},
36         {"broadcast_packets",      offsetof(struct vhost_virtqueue, stats.broadcast)},
37         {"undersize_packets",      offsetof(struct vhost_virtqueue, stats.size_bins[0])},
38         {"size_64_packets",        offsetof(struct vhost_virtqueue, stats.size_bins[1])},
39         {"size_65_127_packets",    offsetof(struct vhost_virtqueue, stats.size_bins[2])},
40         {"size_128_255_packets",   offsetof(struct vhost_virtqueue, stats.size_bins[3])},
41         {"size_256_511_packets",   offsetof(struct vhost_virtqueue, stats.size_bins[4])},
42         {"size_512_1023_packets",  offsetof(struct vhost_virtqueue, stats.size_bins[5])},
43         {"size_1024_1518_packets", offsetof(struct vhost_virtqueue, stats.size_bins[6])},
44         {"size_1519_max_packets",  offsetof(struct vhost_virtqueue, stats.size_bins[7])},
45 };
46
47 #define VHOST_NB_VQ_STATS RTE_DIM(vhost_vq_stat_strings)
48
49 /* Called with iotlb_lock read-locked */
50 uint64_t
51 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
52                     uint64_t iova, uint64_t *size, uint8_t perm)
53 {
54         uint64_t vva, tmp_size;
55
56         if (unlikely(!*size))
57                 return 0;
58
59         tmp_size = *size;
60
61         vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
62         if (tmp_size == *size)
63                 return vva;
64
65         iova += tmp_size;
66
67         if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
68                 /*
69                  * iotlb_lock is read-locked for a full burst,
70                  * but it only protects the iotlb cache.
71                  * In case of IOTLB miss, we might block on the socket,
72                  * which could cause a deadlock with QEMU if an IOTLB update
73                  * is being handled. We can safely unlock here to avoid it.
74                  */
75                 vhost_user_iotlb_rd_unlock(vq);
76
77                 vhost_user_iotlb_pending_insert(dev, vq, iova, perm);
78                 if (vhost_user_iotlb_miss(dev, iova, perm)) {
79                         VHOST_LOG_DATA(ERR, "(%s) IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
80                                 dev->ifname, iova);
81                         vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
82                 }
83
84                 vhost_user_iotlb_rd_lock(vq);
85         }
86
87         return 0;
88 }
89
90 #define VHOST_LOG_PAGE  4096
91
92 /*
93  * Atomically set a bit in memory.
94  */
95 static __rte_always_inline void
96 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
97 {
98 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
99         /*
100          * __sync_ built-ins are deprecated, but __atomic_ ones
101          * are sub-optimized in older GCC versions.
102          */
103         __sync_fetch_and_or_1(addr, (1U << nr));
104 #else
105         __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
106 #endif
107 }
108
109 static __rte_always_inline void
110 vhost_log_page(uint8_t *log_base, uint64_t page)
111 {
112         vhost_set_bit(page % 8, &log_base[page / 8]);
113 }
114
115 void
116 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
117 {
118         uint64_t page;
119
120         if (unlikely(!dev->log_base || !len))
121                 return;
122
123         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
124                 return;
125
126         /* To make sure guest memory updates are committed before logging */
127         rte_atomic_thread_fence(__ATOMIC_RELEASE);
128
129         page = addr / VHOST_LOG_PAGE;
130         while (page * VHOST_LOG_PAGE < addr + len) {
131                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
132                 page += 1;
133         }
134 }
135
136 void
137 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
138                              uint64_t iova, uint64_t len)
139 {
140         uint64_t hva, gpa, map_len;
141         map_len = len;
142
143         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
144         if (map_len != len) {
145                 VHOST_LOG_DATA(ERR,
146                         "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
147                         dev->ifname, iova);
148                 return;
149         }
150
151         gpa = hva_to_gpa(dev, hva, len);
152         if (gpa)
153                 __vhost_log_write(dev, gpa, len);
154 }
155
156 void
157 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
158 {
159         unsigned long *log_base;
160         int i;
161
162         if (unlikely(!dev->log_base))
163                 return;
164
165         /* No cache, nothing to sync */
166         if (unlikely(!vq->log_cache))
167                 return;
168
169         rte_atomic_thread_fence(__ATOMIC_RELEASE);
170
171         log_base = (unsigned long *)(uintptr_t)dev->log_base;
172
173         for (i = 0; i < vq->log_cache_nb_elem; i++) {
174                 struct log_cache_entry *elem = vq->log_cache + i;
175
176 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
177                 /*
178                  * '__sync' builtins are deprecated, but '__atomic' ones
179                  * are sub-optimized in older GCC versions.
180                  */
181                 __sync_fetch_and_or(log_base + elem->offset, elem->val);
182 #else
183                 __atomic_fetch_or(log_base + elem->offset, elem->val,
184                                 __ATOMIC_RELAXED);
185 #endif
186         }
187
188         rte_atomic_thread_fence(__ATOMIC_RELEASE);
189
190         vq->log_cache_nb_elem = 0;
191 }
192
193 static __rte_always_inline void
194 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
195                         uint64_t page)
196 {
197         uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
198         uint32_t offset = page / (sizeof(unsigned long) << 3);
199         int i;
200
201         if (unlikely(!vq->log_cache)) {
202                 /* No logging cache allocated, write dirty log map directly */
203                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
204                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
205
206                 return;
207         }
208
209         for (i = 0; i < vq->log_cache_nb_elem; i++) {
210                 struct log_cache_entry *elem = vq->log_cache + i;
211
212                 if (elem->offset == offset) {
213                         elem->val |= (1UL << bit_nr);
214                         return;
215                 }
216         }
217
218         if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
219                 /*
220                  * No more room for a new log cache entry,
221                  * so write the dirty log map directly.
222                  */
223                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
224                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
225
226                 return;
227         }
228
229         vq->log_cache[i].offset = offset;
230         vq->log_cache[i].val = (1UL << bit_nr);
231         vq->log_cache_nb_elem++;
232 }
233
234 void
235 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
236                         uint64_t addr, uint64_t len)
237 {
238         uint64_t page;
239
240         if (unlikely(!dev->log_base || !len))
241                 return;
242
243         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
244                 return;
245
246         page = addr / VHOST_LOG_PAGE;
247         while (page * VHOST_LOG_PAGE < addr + len) {
248                 vhost_log_cache_page(dev, vq, page);
249                 page += 1;
250         }
251 }
252
253 void
254 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
255                              uint64_t iova, uint64_t len)
256 {
257         uint64_t hva, gpa, map_len;
258         map_len = len;
259
260         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
261         if (map_len != len) {
262                 VHOST_LOG_DATA(ERR,
263                         "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
264                         dev->ifname, iova);
265                 return;
266         }
267
268         gpa = hva_to_gpa(dev, hva, len);
269         if (gpa)
270                 __vhost_log_cache_write(dev, vq, gpa, len);
271 }
272
273 void *
274 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
275                 uint64_t desc_addr, uint64_t desc_len)
276 {
277         void *idesc;
278         uint64_t src, dst;
279         uint64_t len, remain = desc_len;
280
281         idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
282         if (unlikely(!idesc))
283                 return NULL;
284
285         dst = (uint64_t)(uintptr_t)idesc;
286
287         while (remain) {
288                 len = remain;
289                 src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
290                                 VHOST_ACCESS_RO);
291                 if (unlikely(!src || !len)) {
292                         rte_free(idesc);
293                         return NULL;
294                 }
295
296                 rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
297
298                 remain -= len;
299                 dst += len;
300                 desc_addr += len;
301         }
302
303         return idesc;
304 }
305
306 void
307 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
308 {
309         if ((vq->callfd >= 0) && (destroy != 0))
310                 close(vq->callfd);
311         if (vq->kickfd >= 0)
312                 close(vq->kickfd);
313 }
314
315 void
316 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
317 {
318         if (!(dev->protocol_features &
319             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
320                 return;
321
322         if (vq_is_packed(dev)) {
323                 if (vq->inflight_packed)
324                         vq->inflight_packed = NULL;
325         } else {
326                 if (vq->inflight_split)
327                         vq->inflight_split = NULL;
328         }
329
330         if (vq->resubmit_inflight) {
331                 if (vq->resubmit_inflight->resubmit_list) {
332                         rte_free(vq->resubmit_inflight->resubmit_list);
333                         vq->resubmit_inflight->resubmit_list = NULL;
334                 }
335                 rte_free(vq->resubmit_inflight);
336                 vq->resubmit_inflight = NULL;
337         }
338 }
339
340 /*
341  * Unmap any memory, close any file descriptors and
342  * free any memory owned by a device.
343  */
344 void
345 cleanup_device(struct virtio_net *dev, int destroy)
346 {
347         uint32_t i;
348
349         vhost_backend_cleanup(dev);
350
351         for (i = 0; i < dev->nr_vring; i++) {
352                 cleanup_vq(dev->virtqueue[i], destroy);
353                 cleanup_vq_inflight(dev, dev->virtqueue[i]);
354         }
355 }
356
357 static void
358 vhost_free_async_mem(struct vhost_virtqueue *vq)
359 {
360         if (!vq->async)
361                 return;
362
363         rte_free(vq->async->pkts_info);
364         rte_free(vq->async->pkts_cmpl_flag);
365
366         rte_free(vq->async->buffers_packed);
367         vq->async->buffers_packed = NULL;
368         rte_free(vq->async->descs_split);
369         vq->async->descs_split = NULL;
370
371         rte_free(vq->async);
372         vq->async = NULL;
373 }
374
375 void
376 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
377 {
378         if (vq_is_packed(dev))
379                 rte_free(vq->shadow_used_packed);
380         else
381                 rte_free(vq->shadow_used_split);
382
383         vhost_free_async_mem(vq);
384         rte_free(vq->batch_copy_elems);
385         rte_mempool_free(vq->iotlb_pool);
386         rte_free(vq->log_cache);
387         rte_free(vq);
388 }
389
390 /*
391  * Release virtqueues and device memory.
392  */
393 static void
394 free_device(struct virtio_net *dev)
395 {
396         uint32_t i;
397
398         for (i = 0; i < dev->nr_vring; i++)
399                 free_vq(dev, dev->virtqueue[i]);
400
401         rte_free(dev);
402 }
403
404 static __rte_always_inline int
405 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
406 {
407         if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
408                 return 0;
409
410         vq->log_guest_addr = translate_log_addr(dev, vq,
411                                                 vq->ring_addrs.log_guest_addr);
412         if (vq->log_guest_addr == 0)
413                 return -1;
414
415         return 0;
416 }
417
418 /*
419  * Converts vring log address to GPA
420  * If IOMMU is enabled, the log address is IOVA
421  * If IOMMU not enabled, the log address is already GPA
422  *
423  * Caller should have iotlb_lock read-locked
424  */
425 uint64_t
426 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
427                 uint64_t log_addr)
428 {
429         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
430                 const uint64_t exp_size = sizeof(uint64_t);
431                 uint64_t hva, gpa;
432                 uint64_t size = exp_size;
433
434                 hva = vhost_iova_to_vva(dev, vq, log_addr,
435                                         &size, VHOST_ACCESS_RW);
436
437                 if (size != exp_size)
438                         return 0;
439
440                 gpa = hva_to_gpa(dev, hva, exp_size);
441                 if (!gpa) {
442                         VHOST_LOG_DATA(ERR,
443                                 "(%s) failed to find GPA for log_addr: 0x%"
444                                 PRIx64 " hva: 0x%" PRIx64 "\n",
445                                 dev->ifname, log_addr, hva);
446                         return 0;
447                 }
448                 return gpa;
449
450         } else
451                 return log_addr;
452 }
453
454 /* Caller should have iotlb_lock read-locked */
455 static int
456 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
457 {
458         uint64_t req_size, size;
459
460         req_size = sizeof(struct vring_desc) * vq->size;
461         size = req_size;
462         vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
463                                                 vq->ring_addrs.desc_user_addr,
464                                                 &size, VHOST_ACCESS_RW);
465         if (!vq->desc || size != req_size)
466                 return -1;
467
468         req_size = sizeof(struct vring_avail);
469         req_size += sizeof(uint16_t) * vq->size;
470         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
471                 req_size += sizeof(uint16_t);
472         size = req_size;
473         vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
474                                                 vq->ring_addrs.avail_user_addr,
475                                                 &size, VHOST_ACCESS_RW);
476         if (!vq->avail || size != req_size)
477                 return -1;
478
479         req_size = sizeof(struct vring_used);
480         req_size += sizeof(struct vring_used_elem) * vq->size;
481         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
482                 req_size += sizeof(uint16_t);
483         size = req_size;
484         vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
485                                                 vq->ring_addrs.used_user_addr,
486                                                 &size, VHOST_ACCESS_RW);
487         if (!vq->used || size != req_size)
488                 return -1;
489
490         return 0;
491 }
492
493 /* Caller should have iotlb_lock read-locked */
494 static int
495 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
496 {
497         uint64_t req_size, size;
498
499         req_size = sizeof(struct vring_packed_desc) * vq->size;
500         size = req_size;
501         vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
502                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
503                                 &size, VHOST_ACCESS_RW);
504         if (!vq->desc_packed || size != req_size)
505                 return -1;
506
507         req_size = sizeof(struct vring_packed_desc_event);
508         size = req_size;
509         vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
510                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
511                                 &size, VHOST_ACCESS_RW);
512         if (!vq->driver_event || size != req_size)
513                 return -1;
514
515         req_size = sizeof(struct vring_packed_desc_event);
516         size = req_size;
517         vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
518                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
519                                 &size, VHOST_ACCESS_RW);
520         if (!vq->device_event || size != req_size)
521                 return -1;
522
523         return 0;
524 }
525
526 int
527 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
528 {
529
530         if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
531                 return -1;
532
533         if (vq_is_packed(dev)) {
534                 if (vring_translate_packed(dev, vq) < 0)
535                         return -1;
536         } else {
537                 if (vring_translate_split(dev, vq) < 0)
538                         return -1;
539         }
540
541         if (log_translate(dev, vq) < 0)
542                 return -1;
543
544         vq->access_ok = true;
545
546         return 0;
547 }
548
549 void
550 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
551 {
552         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
553                 vhost_user_iotlb_wr_lock(vq);
554
555         vq->access_ok = false;
556         vq->desc = NULL;
557         vq->avail = NULL;
558         vq->used = NULL;
559         vq->log_guest_addr = 0;
560
561         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
562                 vhost_user_iotlb_wr_unlock(vq);
563 }
564
565 static void
566 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
567 {
568         struct vhost_virtqueue *vq;
569         int numa_node = SOCKET_ID_ANY;
570
571         if (vring_idx >= VHOST_MAX_VRING) {
572                 VHOST_LOG_CONFIG(ERR, "(%s) failed to init vring, out of bound (%d)\n",
573                                 dev->ifname, vring_idx);
574                 return;
575         }
576
577         vq = dev->virtqueue[vring_idx];
578         if (!vq) {
579                 VHOST_LOG_CONFIG(ERR, "(%s) virtqueue not allocated (%d)\n",
580                                 dev->ifname, vring_idx);
581                 return;
582         }
583
584         memset(vq, 0, sizeof(struct vhost_virtqueue));
585
586         vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
587         vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
588         vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
589
590 #ifdef RTE_LIBRTE_VHOST_NUMA
591         if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
592                 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
593                         dev->ifname, rte_strerror(errno));
594                 numa_node = SOCKET_ID_ANY;
595         }
596 #endif
597         vq->numa_node = numa_node;
598
599         vhost_user_iotlb_init(dev, vring_idx);
600 }
601
602 static void
603 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
604 {
605         struct vhost_virtqueue *vq;
606         int callfd;
607
608         if (vring_idx >= VHOST_MAX_VRING) {
609                 VHOST_LOG_CONFIG(ERR,
610                                 "(%s) failed to reset vring, out of bound (%d)\n",
611                                 dev->ifname, vring_idx);
612                 return;
613         }
614
615         vq = dev->virtqueue[vring_idx];
616         if (!vq) {
617                 VHOST_LOG_CONFIG(ERR, "(%s) failed to reset vring, virtqueue not allocated (%d)\n",
618                                 dev->ifname, vring_idx);
619                 return;
620         }
621
622         callfd = vq->callfd;
623         init_vring_queue(dev, vring_idx);
624         vq->callfd = callfd;
625 }
626
627 int
628 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
629 {
630         struct vhost_virtqueue *vq;
631         uint32_t i;
632
633         /* Also allocate holes, if any, up to requested vring index. */
634         for (i = 0; i <= vring_idx; i++) {
635                 if (dev->virtqueue[i])
636                         continue;
637
638                 vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
639                 if (vq == NULL) {
640                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate memory for vring %u.\n",
641                                         dev->ifname, i);
642                         return -1;
643                 }
644
645                 dev->virtqueue[i] = vq;
646                 init_vring_queue(dev, i);
647                 rte_spinlock_init(&vq->access_lock);
648                 vq->avail_wrap_counter = 1;
649                 vq->used_wrap_counter = 1;
650                 vq->signalled_used_valid = false;
651         }
652
653         dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
654
655         return 0;
656 }
657
658 /*
659  * Reset some variables in device structure, while keeping few
660  * others untouched, such as vid, ifname, nr_vring: they
661  * should be same unless the device is removed.
662  */
663 void
664 reset_device(struct virtio_net *dev)
665 {
666         uint32_t i;
667
668         dev->features = 0;
669         dev->protocol_features = 0;
670         dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
671
672         for (i = 0; i < dev->nr_vring; i++)
673                 reset_vring_queue(dev, i);
674 }
675
676 /*
677  * Invoked when there is a new vhost-user connection established (when
678  * there is a new virtio device being attached).
679  */
680 int
681 vhost_new_device(void)
682 {
683         struct virtio_net *dev;
684         int i;
685
686         pthread_mutex_lock(&vhost_dev_lock);
687         for (i = 0; i < RTE_MAX_VHOST_DEVICE; i++) {
688                 if (vhost_devices[i] == NULL)
689                         break;
690         }
691
692         if (i == RTE_MAX_VHOST_DEVICE) {
693                 VHOST_LOG_CONFIG(ERR, "failed to find a free slot for new device.\n");
694                 pthread_mutex_unlock(&vhost_dev_lock);
695                 return -1;
696         }
697
698         dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
699         if (dev == NULL) {
700                 VHOST_LOG_CONFIG(ERR, "failed to allocate memory for new device.\n");
701                 pthread_mutex_unlock(&vhost_dev_lock);
702                 return -1;
703         }
704
705         vhost_devices[i] = dev;
706         pthread_mutex_unlock(&vhost_dev_lock);
707
708         dev->vid = i;
709         dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
710         dev->slave_req_fd = -1;
711         dev->postcopy_ufd = -1;
712         rte_spinlock_init(&dev->slave_req_lock);
713
714         return i;
715 }
716
717 void
718 vhost_destroy_device_notify(struct virtio_net *dev)
719 {
720         struct rte_vdpa_device *vdpa_dev;
721
722         if (dev->flags & VIRTIO_DEV_RUNNING) {
723                 vdpa_dev = dev->vdpa_dev;
724                 if (vdpa_dev)
725                         vdpa_dev->ops->dev_close(dev->vid);
726                 dev->flags &= ~VIRTIO_DEV_RUNNING;
727                 dev->notify_ops->destroy_device(dev->vid);
728         }
729 }
730
731 /*
732  * Invoked when there is the vhost-user connection is broken (when
733  * the virtio device is being detached).
734  */
735 void
736 vhost_destroy_device(int vid)
737 {
738         struct virtio_net *dev = get_device(vid);
739
740         if (dev == NULL)
741                 return;
742
743         vhost_destroy_device_notify(dev);
744
745         cleanup_device(dev, 1);
746         free_device(dev);
747
748         vhost_devices[vid] = NULL;
749 }
750
751 void
752 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
753 {
754         struct virtio_net *dev = get_device(vid);
755
756         if (dev == NULL)
757                 return;
758
759         dev->vdpa_dev = vdpa_dev;
760 }
761
762 void
763 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
764 {
765         struct virtio_net *dev;
766         unsigned int len;
767
768         dev = get_device(vid);
769         if (dev == NULL)
770                 return;
771
772         len = if_len > sizeof(dev->ifname) ?
773                 sizeof(dev->ifname) : if_len;
774
775         strncpy(dev->ifname, if_name, len);
776         dev->ifname[sizeof(dev->ifname) - 1] = '\0';
777 }
778
779 void
780 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags, bool stats_enabled)
781 {
782         struct virtio_net *dev = get_device(vid);
783
784         if (dev == NULL)
785                 return;
786
787         if (enable)
788                 dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
789         else
790                 dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
791         if (!compliant_ol_flags)
792                 dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
793         else
794                 dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
795         if (stats_enabled)
796                 dev->flags |= VIRTIO_DEV_STATS_ENABLED;
797         else
798                 dev->flags &= ~VIRTIO_DEV_STATS_ENABLED;
799 }
800
801 void
802 vhost_enable_extbuf(int vid)
803 {
804         struct virtio_net *dev = get_device(vid);
805
806         if (dev == NULL)
807                 return;
808
809         dev->extbuf = 1;
810 }
811
812 void
813 vhost_enable_linearbuf(int vid)
814 {
815         struct virtio_net *dev = get_device(vid);
816
817         if (dev == NULL)
818                 return;
819
820         dev->linearbuf = 1;
821 }
822
823 int
824 rte_vhost_get_mtu(int vid, uint16_t *mtu)
825 {
826         struct virtio_net *dev = get_device(vid);
827
828         if (dev == NULL || mtu == NULL)
829                 return -ENODEV;
830
831         if (!(dev->flags & VIRTIO_DEV_READY))
832                 return -EAGAIN;
833
834         if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
835                 return -ENOTSUP;
836
837         *mtu = dev->mtu;
838
839         return 0;
840 }
841
842 int
843 rte_vhost_get_numa_node(int vid)
844 {
845 #ifdef RTE_LIBRTE_VHOST_NUMA
846         struct virtio_net *dev = get_device(vid);
847         int numa_node;
848         int ret;
849
850         if (dev == NULL || numa_available() != 0)
851                 return -1;
852
853         ret = get_mempolicy(&numa_node, NULL, 0, dev,
854                             MPOL_F_NODE | MPOL_F_ADDR);
855         if (ret < 0) {
856                 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
857                         dev->ifname, rte_strerror(errno));
858                 return -1;
859         }
860
861         return numa_node;
862 #else
863         RTE_SET_USED(vid);
864         return -1;
865 #endif
866 }
867
868 uint32_t
869 rte_vhost_get_queue_num(int vid)
870 {
871         struct virtio_net *dev = get_device(vid);
872
873         if (dev == NULL)
874                 return 0;
875
876         return dev->nr_vring / 2;
877 }
878
879 uint16_t
880 rte_vhost_get_vring_num(int vid)
881 {
882         struct virtio_net *dev = get_device(vid);
883
884         if (dev == NULL)
885                 return 0;
886
887         return dev->nr_vring;
888 }
889
890 int
891 rte_vhost_get_ifname(int vid, char *buf, size_t len)
892 {
893         struct virtio_net *dev = get_device(vid);
894
895         if (dev == NULL || buf == NULL)
896                 return -1;
897
898         len = RTE_MIN(len, sizeof(dev->ifname));
899
900         strncpy(buf, dev->ifname, len);
901         buf[len - 1] = '\0';
902
903         return 0;
904 }
905
906 int
907 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
908 {
909         struct virtio_net *dev;
910
911         dev = get_device(vid);
912         if (dev == NULL || features == NULL)
913                 return -1;
914
915         *features = dev->features;
916         return 0;
917 }
918
919 int
920 rte_vhost_get_negotiated_protocol_features(int vid,
921                                            uint64_t *protocol_features)
922 {
923         struct virtio_net *dev;
924
925         dev = get_device(vid);
926         if (dev == NULL || protocol_features == NULL)
927                 return -1;
928
929         *protocol_features = dev->protocol_features;
930         return 0;
931 }
932
933 int
934 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
935 {
936         struct virtio_net *dev;
937         struct rte_vhost_memory *m;
938         size_t size;
939
940         dev = get_device(vid);
941         if (dev == NULL || mem == NULL)
942                 return -1;
943
944         size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
945         m = malloc(sizeof(struct rte_vhost_memory) + size);
946         if (!m)
947                 return -1;
948
949         m->nregions = dev->mem->nregions;
950         memcpy(m->regions, dev->mem->regions, size);
951         *mem = m;
952
953         return 0;
954 }
955
956 int
957 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
958                           struct rte_vhost_vring *vring)
959 {
960         struct virtio_net *dev;
961         struct vhost_virtqueue *vq;
962
963         dev = get_device(vid);
964         if (dev == NULL || vring == NULL)
965                 return -1;
966
967         if (vring_idx >= VHOST_MAX_VRING)
968                 return -1;
969
970         vq = dev->virtqueue[vring_idx];
971         if (!vq)
972                 return -1;
973
974         if (vq_is_packed(dev)) {
975                 vring->desc_packed = vq->desc_packed;
976                 vring->driver_event = vq->driver_event;
977                 vring->device_event = vq->device_event;
978         } else {
979                 vring->desc = vq->desc;
980                 vring->avail = vq->avail;
981                 vring->used = vq->used;
982         }
983         vring->log_guest_addr  = vq->log_guest_addr;
984
985         vring->callfd  = vq->callfd;
986         vring->kickfd  = vq->kickfd;
987         vring->size    = vq->size;
988
989         return 0;
990 }
991
992 int
993 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
994                                   struct rte_vhost_ring_inflight *vring)
995 {
996         struct virtio_net *dev;
997         struct vhost_virtqueue *vq;
998
999         dev = get_device(vid);
1000         if (unlikely(!dev))
1001                 return -1;
1002
1003         if (vring_idx >= VHOST_MAX_VRING)
1004                 return -1;
1005
1006         vq = dev->virtqueue[vring_idx];
1007         if (unlikely(!vq))
1008                 return -1;
1009
1010         if (vq_is_packed(dev)) {
1011                 if (unlikely(!vq->inflight_packed))
1012                         return -1;
1013
1014                 vring->inflight_packed = vq->inflight_packed;
1015         } else {
1016                 if (unlikely(!vq->inflight_split))
1017                         return -1;
1018
1019                 vring->inflight_split = vq->inflight_split;
1020         }
1021
1022         vring->resubmit_inflight = vq->resubmit_inflight;
1023
1024         return 0;
1025 }
1026
1027 int
1028 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
1029                                   uint16_t idx)
1030 {
1031         struct vhost_virtqueue *vq;
1032         struct virtio_net *dev;
1033
1034         dev = get_device(vid);
1035         if (unlikely(!dev))
1036                 return -1;
1037
1038         if (unlikely(!(dev->protocol_features &
1039             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1040                 return 0;
1041
1042         if (unlikely(vq_is_packed(dev)))
1043                 return -1;
1044
1045         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1046                 return -1;
1047
1048         vq = dev->virtqueue[vring_idx];
1049         if (unlikely(!vq))
1050                 return -1;
1051
1052         if (unlikely(!vq->inflight_split))
1053                 return -1;
1054
1055         if (unlikely(idx >= vq->size))
1056                 return -1;
1057
1058         vq->inflight_split->desc[idx].counter = vq->global_counter++;
1059         vq->inflight_split->desc[idx].inflight = 1;
1060         return 0;
1061 }
1062
1063 int
1064 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1065                                    uint16_t head, uint16_t last,
1066                                    uint16_t *inflight_entry)
1067 {
1068         struct rte_vhost_inflight_info_packed *inflight_info;
1069         struct virtio_net *dev;
1070         struct vhost_virtqueue *vq;
1071         struct vring_packed_desc *desc;
1072         uint16_t old_free_head, free_head;
1073
1074         dev = get_device(vid);
1075         if (unlikely(!dev))
1076                 return -1;
1077
1078         if (unlikely(!(dev->protocol_features &
1079             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1080                 return 0;
1081
1082         if (unlikely(!vq_is_packed(dev)))
1083                 return -1;
1084
1085         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1086                 return -1;
1087
1088         vq = dev->virtqueue[vring_idx];
1089         if (unlikely(!vq))
1090                 return -1;
1091
1092         inflight_info = vq->inflight_packed;
1093         if (unlikely(!inflight_info))
1094                 return -1;
1095
1096         if (unlikely(head >= vq->size))
1097                 return -1;
1098
1099         desc = vq->desc_packed;
1100         old_free_head = inflight_info->old_free_head;
1101         if (unlikely(old_free_head >= vq->size))
1102                 return -1;
1103
1104         free_head = old_free_head;
1105
1106         /* init header descriptor */
1107         inflight_info->desc[old_free_head].num = 0;
1108         inflight_info->desc[old_free_head].counter = vq->global_counter++;
1109         inflight_info->desc[old_free_head].inflight = 1;
1110
1111         /* save desc entry in flight entry */
1112         while (head != ((last + 1) % vq->size)) {
1113                 inflight_info->desc[old_free_head].num++;
1114                 inflight_info->desc[free_head].addr = desc[head].addr;
1115                 inflight_info->desc[free_head].len = desc[head].len;
1116                 inflight_info->desc[free_head].flags = desc[head].flags;
1117                 inflight_info->desc[free_head].id = desc[head].id;
1118
1119                 inflight_info->desc[old_free_head].last = free_head;
1120                 free_head = inflight_info->desc[free_head].next;
1121                 inflight_info->free_head = free_head;
1122                 head = (head + 1) % vq->size;
1123         }
1124
1125         inflight_info->old_free_head = free_head;
1126         *inflight_entry = old_free_head;
1127
1128         return 0;
1129 }
1130
1131 int
1132 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1133                                   uint16_t last_used_idx, uint16_t idx)
1134 {
1135         struct virtio_net *dev;
1136         struct vhost_virtqueue *vq;
1137
1138         dev = get_device(vid);
1139         if (unlikely(!dev))
1140                 return -1;
1141
1142         if (unlikely(!(dev->protocol_features &
1143             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1144                 return 0;
1145
1146         if (unlikely(vq_is_packed(dev)))
1147                 return -1;
1148
1149         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1150                 return -1;
1151
1152         vq = dev->virtqueue[vring_idx];
1153         if (unlikely(!vq))
1154                 return -1;
1155
1156         if (unlikely(!vq->inflight_split))
1157                 return -1;
1158
1159         if (unlikely(idx >= vq->size))
1160                 return -1;
1161
1162         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1163
1164         vq->inflight_split->desc[idx].inflight = 0;
1165
1166         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1167
1168         vq->inflight_split->used_idx = last_used_idx;
1169         return 0;
1170 }
1171
1172 int
1173 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1174                                    uint16_t head)
1175 {
1176         struct rte_vhost_inflight_info_packed *inflight_info;
1177         struct virtio_net *dev;
1178         struct vhost_virtqueue *vq;
1179
1180         dev = get_device(vid);
1181         if (unlikely(!dev))
1182                 return -1;
1183
1184         if (unlikely(!(dev->protocol_features &
1185             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1186                 return 0;
1187
1188         if (unlikely(!vq_is_packed(dev)))
1189                 return -1;
1190
1191         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1192                 return -1;
1193
1194         vq = dev->virtqueue[vring_idx];
1195         if (unlikely(!vq))
1196                 return -1;
1197
1198         inflight_info = vq->inflight_packed;
1199         if (unlikely(!inflight_info))
1200                 return -1;
1201
1202         if (unlikely(head >= vq->size))
1203                 return -1;
1204
1205         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1206
1207         inflight_info->desc[head].inflight = 0;
1208
1209         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1210
1211         inflight_info->old_free_head = inflight_info->free_head;
1212         inflight_info->old_used_idx = inflight_info->used_idx;
1213         inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1214
1215         return 0;
1216 }
1217
1218 int
1219 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1220                                      uint16_t idx)
1221 {
1222         struct virtio_net *dev;
1223         struct vhost_virtqueue *vq;
1224
1225         dev = get_device(vid);
1226         if (unlikely(!dev))
1227                 return -1;
1228
1229         if (unlikely(!(dev->protocol_features &
1230             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1231                 return 0;
1232
1233         if (unlikely(vq_is_packed(dev)))
1234                 return -1;
1235
1236         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1237                 return -1;
1238
1239         vq = dev->virtqueue[vring_idx];
1240         if (unlikely(!vq))
1241                 return -1;
1242
1243         if (unlikely(!vq->inflight_split))
1244                 return -1;
1245
1246         if (unlikely(idx >= vq->size))
1247                 return -1;
1248
1249         vq->inflight_split->last_inflight_io = idx;
1250         return 0;
1251 }
1252
1253 int
1254 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1255                                       uint16_t head)
1256 {
1257         struct rte_vhost_inflight_info_packed *inflight_info;
1258         struct virtio_net *dev;
1259         struct vhost_virtqueue *vq;
1260         uint16_t last;
1261
1262         dev = get_device(vid);
1263         if (unlikely(!dev))
1264                 return -1;
1265
1266         if (unlikely(!(dev->protocol_features &
1267             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1268                 return 0;
1269
1270         if (unlikely(!vq_is_packed(dev)))
1271                 return -1;
1272
1273         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1274                 return -1;
1275
1276         vq = dev->virtqueue[vring_idx];
1277         if (unlikely(!vq))
1278                 return -1;
1279
1280         inflight_info = vq->inflight_packed;
1281         if (unlikely(!inflight_info))
1282                 return -1;
1283
1284         if (unlikely(head >= vq->size))
1285                 return -1;
1286
1287         last = inflight_info->desc[head].last;
1288         if (unlikely(last >= vq->size))
1289                 return -1;
1290
1291         inflight_info->desc[last].next = inflight_info->free_head;
1292         inflight_info->free_head = head;
1293         inflight_info->used_idx += inflight_info->desc[head].num;
1294         if (inflight_info->used_idx >= inflight_info->desc_num) {
1295                 inflight_info->used_idx -= inflight_info->desc_num;
1296                 inflight_info->used_wrap_counter =
1297                         !inflight_info->used_wrap_counter;
1298         }
1299
1300         return 0;
1301 }
1302
1303 int
1304 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1305 {
1306         struct virtio_net *dev;
1307         struct vhost_virtqueue *vq;
1308
1309         dev = get_device(vid);
1310         if (!dev)
1311                 return -1;
1312
1313         if (vring_idx >= VHOST_MAX_VRING)
1314                 return -1;
1315
1316         vq = dev->virtqueue[vring_idx];
1317         if (!vq)
1318                 return -1;
1319
1320         rte_spinlock_lock(&vq->access_lock);
1321
1322         if (vq_is_packed(dev))
1323                 vhost_vring_call_packed(dev, vq);
1324         else
1325                 vhost_vring_call_split(dev, vq);
1326
1327         rte_spinlock_unlock(&vq->access_lock);
1328
1329         return 0;
1330 }
1331
1332 uint16_t
1333 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1334 {
1335         struct virtio_net *dev;
1336         struct vhost_virtqueue *vq;
1337         uint16_t ret = 0;
1338
1339         dev = get_device(vid);
1340         if (!dev)
1341                 return 0;
1342
1343         if (queue_id >= VHOST_MAX_VRING)
1344                 return 0;
1345
1346         vq = dev->virtqueue[queue_id];
1347         if (!vq)
1348                 return 0;
1349
1350         rte_spinlock_lock(&vq->access_lock);
1351
1352         if (unlikely(!vq->enabled || vq->avail == NULL))
1353                 goto out;
1354
1355         ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1356
1357 out:
1358         rte_spinlock_unlock(&vq->access_lock);
1359         return ret;
1360 }
1361
1362 static inline int
1363 vhost_enable_notify_split(struct virtio_net *dev,
1364                 struct vhost_virtqueue *vq, int enable)
1365 {
1366         if (vq->used == NULL)
1367                 return -1;
1368
1369         if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1370                 if (enable)
1371                         vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1372                 else
1373                         vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1374         } else {
1375                 if (enable)
1376                         vhost_avail_event(vq) = vq->last_avail_idx;
1377         }
1378         return 0;
1379 }
1380
1381 static inline int
1382 vhost_enable_notify_packed(struct virtio_net *dev,
1383                 struct vhost_virtqueue *vq, int enable)
1384 {
1385         uint16_t flags;
1386
1387         if (vq->device_event == NULL)
1388                 return -1;
1389
1390         if (!enable) {
1391                 vq->device_event->flags = VRING_EVENT_F_DISABLE;
1392                 return 0;
1393         }
1394
1395         flags = VRING_EVENT_F_ENABLE;
1396         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1397                 flags = VRING_EVENT_F_DESC;
1398                 vq->device_event->off_wrap = vq->last_avail_idx |
1399                         vq->avail_wrap_counter << 15;
1400         }
1401
1402         rte_atomic_thread_fence(__ATOMIC_RELEASE);
1403
1404         vq->device_event->flags = flags;
1405         return 0;
1406 }
1407
1408 int
1409 vhost_enable_guest_notification(struct virtio_net *dev,
1410                 struct vhost_virtqueue *vq, int enable)
1411 {
1412         /*
1413          * If the virtqueue is not ready yet, it will be applied
1414          * when it will become ready.
1415          */
1416         if (!vq->ready)
1417                 return 0;
1418
1419         if (vq_is_packed(dev))
1420                 return vhost_enable_notify_packed(dev, vq, enable);
1421         else
1422                 return vhost_enable_notify_split(dev, vq, enable);
1423 }
1424
1425 int
1426 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1427 {
1428         struct virtio_net *dev = get_device(vid);
1429         struct vhost_virtqueue *vq;
1430         int ret;
1431
1432         if (!dev)
1433                 return -1;
1434
1435         if (queue_id >= VHOST_MAX_VRING)
1436                 return -1;
1437
1438         vq = dev->virtqueue[queue_id];
1439         if (!vq)
1440                 return -1;
1441
1442         rte_spinlock_lock(&vq->access_lock);
1443
1444         vq->notif_enable = enable;
1445         ret = vhost_enable_guest_notification(dev, vq, enable);
1446
1447         rte_spinlock_unlock(&vq->access_lock);
1448
1449         return ret;
1450 }
1451
1452 void
1453 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1454 {
1455         struct virtio_net *dev = get_device(vid);
1456
1457         if (dev == NULL)
1458                 return;
1459
1460         vhost_log_write(dev, addr, len);
1461 }
1462
1463 void
1464 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1465                          uint64_t offset, uint64_t len)
1466 {
1467         struct virtio_net *dev;
1468         struct vhost_virtqueue *vq;
1469
1470         dev = get_device(vid);
1471         if (dev == NULL)
1472                 return;
1473
1474         if (vring_idx >= VHOST_MAX_VRING)
1475                 return;
1476         vq = dev->virtqueue[vring_idx];
1477         if (!vq)
1478                 return;
1479
1480         vhost_log_used_vring(dev, vq, offset, len);
1481 }
1482
1483 uint32_t
1484 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1485 {
1486         struct virtio_net *dev;
1487         struct vhost_virtqueue *vq;
1488         uint32_t ret = 0;
1489
1490         dev = get_device(vid);
1491         if (dev == NULL)
1492                 return 0;
1493
1494         if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1495                 VHOST_LOG_DATA(ERR, "(%s) %s: invalid virtqueue idx %d.\n",
1496                         dev->ifname, __func__, qid);
1497                 return 0;
1498         }
1499
1500         vq = dev->virtqueue[qid];
1501         if (vq == NULL)
1502                 return 0;
1503
1504         rte_spinlock_lock(&vq->access_lock);
1505
1506         if (unlikely(!vq->enabled || vq->avail == NULL))
1507                 goto out;
1508
1509         ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1510
1511 out:
1512         rte_spinlock_unlock(&vq->access_lock);
1513         return ret;
1514 }
1515
1516 struct rte_vdpa_device *
1517 rte_vhost_get_vdpa_device(int vid)
1518 {
1519         struct virtio_net *dev = get_device(vid);
1520
1521         if (dev == NULL)
1522                 return NULL;
1523
1524         return dev->vdpa_dev;
1525 }
1526
1527 int
1528 rte_vhost_get_log_base(int vid, uint64_t *log_base,
1529                 uint64_t *log_size)
1530 {
1531         struct virtio_net *dev = get_device(vid);
1532
1533         if (dev == NULL || log_base == NULL || log_size == NULL)
1534                 return -1;
1535
1536         *log_base = dev->log_base;
1537         *log_size = dev->log_size;
1538
1539         return 0;
1540 }
1541
1542 int
1543 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1544                 uint16_t *last_avail_idx, uint16_t *last_used_idx)
1545 {
1546         struct vhost_virtqueue *vq;
1547         struct virtio_net *dev = get_device(vid);
1548
1549         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1550                 return -1;
1551
1552         if (queue_id >= VHOST_MAX_VRING)
1553                 return -1;
1554
1555         vq = dev->virtqueue[queue_id];
1556         if (!vq)
1557                 return -1;
1558
1559         if (vq_is_packed(dev)) {
1560                 *last_avail_idx = (vq->avail_wrap_counter << 15) |
1561                                   vq->last_avail_idx;
1562                 *last_used_idx = (vq->used_wrap_counter << 15) |
1563                                  vq->last_used_idx;
1564         } else {
1565                 *last_avail_idx = vq->last_avail_idx;
1566                 *last_used_idx = vq->last_used_idx;
1567         }
1568
1569         return 0;
1570 }
1571
1572 int
1573 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1574                 uint16_t last_avail_idx, uint16_t last_used_idx)
1575 {
1576         struct vhost_virtqueue *vq;
1577         struct virtio_net *dev = get_device(vid);
1578
1579         if (!dev)
1580                 return -1;
1581
1582         if (queue_id >= VHOST_MAX_VRING)
1583                 return -1;
1584
1585         vq = dev->virtqueue[queue_id];
1586         if (!vq)
1587                 return -1;
1588
1589         if (vq_is_packed(dev)) {
1590                 vq->last_avail_idx = last_avail_idx & 0x7fff;
1591                 vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1592                 vq->last_used_idx = last_used_idx & 0x7fff;
1593                 vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1594         } else {
1595                 vq->last_avail_idx = last_avail_idx;
1596                 vq->last_used_idx = last_used_idx;
1597         }
1598
1599         return 0;
1600 }
1601
1602 int
1603 rte_vhost_get_vring_base_from_inflight(int vid,
1604                                        uint16_t queue_id,
1605                                        uint16_t *last_avail_idx,
1606                                        uint16_t *last_used_idx)
1607 {
1608         struct rte_vhost_inflight_info_packed *inflight_info;
1609         struct vhost_virtqueue *vq;
1610         struct virtio_net *dev = get_device(vid);
1611
1612         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1613                 return -1;
1614
1615         if (queue_id >= VHOST_MAX_VRING)
1616                 return -1;
1617
1618         vq = dev->virtqueue[queue_id];
1619         if (!vq)
1620                 return -1;
1621
1622         if (!vq_is_packed(dev))
1623                 return -1;
1624
1625         inflight_info = vq->inflight_packed;
1626         if (!inflight_info)
1627                 return -1;
1628
1629         *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1630                           inflight_info->old_used_idx;
1631         *last_used_idx = *last_avail_idx;
1632
1633         return 0;
1634 }
1635
1636 int
1637 rte_vhost_extern_callback_register(int vid,
1638                 struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1639 {
1640         struct virtio_net *dev = get_device(vid);
1641
1642         if (dev == NULL || ops == NULL)
1643                 return -1;
1644
1645         dev->extern_ops = *ops;
1646         dev->extern_data = ctx;
1647         return 0;
1648 }
1649
1650 static __rte_always_inline int
1651 async_channel_register(int vid, uint16_t queue_id)
1652 {
1653         struct virtio_net *dev = get_device(vid);
1654         struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
1655         struct vhost_async *async;
1656         int node = vq->numa_node;
1657
1658         if (unlikely(vq->async)) {
1659                 VHOST_LOG_CONFIG(ERR,
1660                                 "(%s) async register failed: already registered (qid: %d)\n",
1661                                 dev->ifname, queue_id);
1662                 return -1;
1663         }
1664
1665         async = rte_zmalloc_socket(NULL, sizeof(struct vhost_async), 0, node);
1666         if (!async) {
1667                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async metadata (qid: %d)\n",
1668                                 dev->ifname, queue_id);
1669                 return -1;
1670         }
1671
1672         async->pkts_info = rte_malloc_socket(NULL, vq->size * sizeof(struct async_inflight_info),
1673                         RTE_CACHE_LINE_SIZE, node);
1674         if (!async->pkts_info) {
1675                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async_pkts_info (qid: %d)\n",
1676                                 dev->ifname, queue_id);
1677                 goto out_free_async;
1678         }
1679
1680         async->pkts_cmpl_flag = rte_zmalloc_socket(NULL, vq->size * sizeof(bool),
1681                         RTE_CACHE_LINE_SIZE, node);
1682         if (!async->pkts_cmpl_flag) {
1683                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async pkts_cmpl_flag (qid: %d)\n",
1684                                 dev->ifname, queue_id);
1685                 goto out_free_async;
1686         }
1687
1688         if (vq_is_packed(dev)) {
1689                 async->buffers_packed = rte_malloc_socket(NULL,
1690                                 vq->size * sizeof(struct vring_used_elem_packed),
1691                                 RTE_CACHE_LINE_SIZE, node);
1692                 if (!async->buffers_packed) {
1693                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async buffers (qid: %d)\n",
1694                                         dev->ifname, queue_id);
1695                         goto out_free_inflight;
1696                 }
1697         } else {
1698                 async->descs_split = rte_malloc_socket(NULL,
1699                                 vq->size * sizeof(struct vring_used_elem),
1700                                 RTE_CACHE_LINE_SIZE, node);
1701                 if (!async->descs_split) {
1702                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async descs (qid: %d)\n",
1703                                         dev->ifname, queue_id);
1704                         goto out_free_inflight;
1705                 }
1706         }
1707
1708         vq->async = async;
1709
1710         return 0;
1711 out_free_inflight:
1712         rte_free(async->pkts_info);
1713 out_free_async:
1714         rte_free(async);
1715
1716         return -1;
1717 }
1718
1719 int
1720 rte_vhost_async_channel_register(int vid, uint16_t queue_id)
1721 {
1722         struct vhost_virtqueue *vq;
1723         struct virtio_net *dev = get_device(vid);
1724         int ret;
1725
1726         if (dev == NULL)
1727                 return -1;
1728
1729         if (queue_id >= VHOST_MAX_VRING)
1730                 return -1;
1731
1732         vq = dev->virtqueue[queue_id];
1733
1734         if (unlikely(vq == NULL || !dev->async_copy))
1735                 return -1;
1736
1737         rte_spinlock_lock(&vq->access_lock);
1738         ret = async_channel_register(vid, queue_id);
1739         rte_spinlock_unlock(&vq->access_lock);
1740
1741         return ret;
1742 }
1743
1744 int
1745 rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id)
1746 {
1747         struct vhost_virtqueue *vq;
1748         struct virtio_net *dev = get_device(vid);
1749
1750         if (dev == NULL)
1751                 return -1;
1752
1753         if (queue_id >= VHOST_MAX_VRING)
1754                 return -1;
1755
1756         vq = dev->virtqueue[queue_id];
1757
1758         if (unlikely(vq == NULL || !dev->async_copy))
1759                 return -1;
1760
1761         return async_channel_register(vid, queue_id);
1762 }
1763
1764 int
1765 rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1766 {
1767         struct vhost_virtqueue *vq;
1768         struct virtio_net *dev = get_device(vid);
1769         int ret = -1;
1770
1771         if (dev == NULL)
1772                 return ret;
1773
1774         if (queue_id >= VHOST_MAX_VRING)
1775                 return ret;
1776
1777         vq = dev->virtqueue[queue_id];
1778
1779         if (vq == NULL)
1780                 return ret;
1781
1782         if (!rte_spinlock_trylock(&vq->access_lock)) {
1783                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel, virtqueue busy.\n",
1784                                 dev->ifname);
1785                 return ret;
1786         }
1787
1788         if (!vq->async) {
1789                 ret = 0;
1790         } else if (vq->async->pkts_inflight_n) {
1791                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1792                 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1793                         dev->ifname);
1794         } else {
1795                 vhost_free_async_mem(vq);
1796                 ret = 0;
1797         }
1798
1799         rte_spinlock_unlock(&vq->access_lock);
1800
1801         return ret;
1802 }
1803
1804 int
1805 rte_vhost_async_channel_unregister_thread_unsafe(int vid, uint16_t queue_id)
1806 {
1807         struct vhost_virtqueue *vq;
1808         struct virtio_net *dev = get_device(vid);
1809
1810         if (dev == NULL)
1811                 return -1;
1812
1813         if (queue_id >= VHOST_MAX_VRING)
1814                 return -1;
1815
1816         vq = dev->virtqueue[queue_id];
1817
1818         if (vq == NULL)
1819                 return -1;
1820
1821         if (!vq->async)
1822                 return 0;
1823
1824         if (vq->async->pkts_inflight_n) {
1825                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1826                 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1827                         dev->ifname);
1828                 return -1;
1829         }
1830
1831         vhost_free_async_mem(vq);
1832
1833         return 0;
1834 }
1835
1836 int
1837 rte_vhost_async_dma_configure(int16_t dma_id, uint16_t vchan_id)
1838 {
1839         struct rte_dma_info info;
1840         void *pkts_cmpl_flag_addr;
1841         uint16_t max_desc;
1842
1843         if (!rte_dma_is_valid(dma_id)) {
1844                 VHOST_LOG_CONFIG(ERR, "DMA %d is not found.\n", dma_id);
1845                 return -1;
1846         }
1847
1848         rte_dma_info_get(dma_id, &info);
1849         if (vchan_id >= info.max_vchans) {
1850                 VHOST_LOG_CONFIG(ERR, "Invalid DMA %d vChannel %u.\n", dma_id, vchan_id);
1851                 return -1;
1852         }
1853
1854         if (!dma_copy_track[dma_id].vchans) {
1855                 struct async_dma_vchan_info *vchans;
1856
1857                 vchans = rte_zmalloc(NULL, sizeof(struct async_dma_vchan_info) * info.max_vchans,
1858                                 RTE_CACHE_LINE_SIZE);
1859                 if (vchans == NULL) {
1860                         VHOST_LOG_CONFIG(ERR, "Failed to allocate vchans for DMA %d vChannel %u.\n",
1861                                         dma_id, vchan_id);
1862                         return -1;
1863                 }
1864
1865                 dma_copy_track[dma_id].vchans = vchans;
1866         }
1867
1868         if (dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr) {
1869                 VHOST_LOG_CONFIG(INFO, "DMA %d vChannel %u already registered.\n", dma_id,
1870                                 vchan_id);
1871                 return 0;
1872         }
1873
1874         max_desc = info.max_desc;
1875         if (!rte_is_power_of_2(max_desc))
1876                 max_desc = rte_align32pow2(max_desc);
1877
1878         pkts_cmpl_flag_addr = rte_zmalloc(NULL, sizeof(bool *) * max_desc, RTE_CACHE_LINE_SIZE);
1879         if (!pkts_cmpl_flag_addr) {
1880                 VHOST_LOG_CONFIG(ERR, "Failed to allocate pkts_cmpl_flag_addr for DMA %d "
1881                                 "vChannel %u.\n", dma_id, vchan_id);
1882
1883                 if (dma_copy_track[dma_id].nr_vchans == 0) {
1884                         rte_free(dma_copy_track[dma_id].vchans);
1885                         dma_copy_track[dma_id].vchans = NULL;
1886                 }
1887                 return -1;
1888         }
1889
1890         dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr = pkts_cmpl_flag_addr;
1891         dma_copy_track[dma_id].vchans[vchan_id].ring_size = max_desc;
1892         dma_copy_track[dma_id].vchans[vchan_id].ring_mask = max_desc - 1;
1893         dma_copy_track[dma_id].nr_vchans++;
1894
1895         return 0;
1896 }
1897
1898 int
1899 rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
1900 {
1901         struct vhost_virtqueue *vq;
1902         struct virtio_net *dev = get_device(vid);
1903         int ret = -1;
1904
1905         if (dev == NULL)
1906                 return ret;
1907
1908         if (queue_id >= VHOST_MAX_VRING)
1909                 return ret;
1910
1911         vq = dev->virtqueue[queue_id];
1912
1913         if (vq == NULL)
1914                 return ret;
1915
1916         if (!rte_spinlock_trylock(&vq->access_lock)) {
1917                 VHOST_LOG_CONFIG(DEBUG,
1918                         "(%s) failed to check in-flight packets. virtqueue busy.\n",
1919                         dev->ifname);
1920                 return ret;
1921         }
1922
1923         if (vq->async)
1924                 ret = vq->async->pkts_inflight_n;
1925
1926         rte_spinlock_unlock(&vq->access_lock);
1927
1928         return ret;
1929 }
1930
1931 int
1932 rte_vhost_async_get_inflight_thread_unsafe(int vid, uint16_t queue_id)
1933 {
1934         struct vhost_virtqueue *vq;
1935         struct virtio_net *dev = get_device(vid);
1936         int ret = -1;
1937
1938         if (dev == NULL)
1939                 return ret;
1940
1941         if (queue_id >= VHOST_MAX_VRING)
1942                 return ret;
1943
1944         vq = dev->virtqueue[queue_id];
1945
1946         if (vq == NULL)
1947                 return ret;
1948
1949         if (!vq->async)
1950                 return ret;
1951
1952         ret = vq->async->pkts_inflight_n;
1953
1954         return ret;
1955 }
1956
1957 int
1958 rte_vhost_get_monitor_addr(int vid, uint16_t queue_id,
1959                 struct rte_vhost_power_monitor_cond *pmc)
1960 {
1961         struct virtio_net *dev = get_device(vid);
1962         struct vhost_virtqueue *vq;
1963
1964         if (dev == NULL)
1965                 return -1;
1966         if (queue_id >= VHOST_MAX_VRING)
1967                 return -1;
1968
1969         vq = dev->virtqueue[queue_id];
1970         if (vq == NULL)
1971                 return -1;
1972
1973         if (vq_is_packed(dev)) {
1974                 struct vring_packed_desc *desc;
1975                 desc = vq->desc_packed;
1976                 pmc->addr = &desc[vq->last_avail_idx].flags;
1977                 if (vq->avail_wrap_counter)
1978                         pmc->val = VRING_DESC_F_AVAIL;
1979                 else
1980                         pmc->val = VRING_DESC_F_USED;
1981                 pmc->mask = VRING_DESC_F_AVAIL | VRING_DESC_F_USED;
1982                 pmc->size = sizeof(desc[vq->last_avail_idx].flags);
1983                 pmc->match = 1;
1984         } else {
1985                 pmc->addr = &vq->avail->idx;
1986                 pmc->val = vq->last_avail_idx & (vq->size - 1);
1987                 pmc->mask = vq->size - 1;
1988                 pmc->size = sizeof(vq->avail->idx);
1989                 pmc->match = 0;
1990         }
1991
1992         return 0;
1993 }
1994
1995
1996 int
1997 rte_vhost_vring_stats_get_names(int vid, uint16_t queue_id,
1998                 struct rte_vhost_stat_name *name, unsigned int size)
1999 {
2000         struct virtio_net *dev = get_device(vid);
2001         unsigned int i;
2002
2003         if (dev == NULL)
2004                 return -1;
2005
2006         if (queue_id >= dev->nr_vring)
2007                 return -1;
2008
2009         if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
2010                 return -1;
2011
2012         if (name == NULL || size < VHOST_NB_VQ_STATS)
2013                 return VHOST_NB_VQ_STATS;
2014
2015         for (i = 0; i < VHOST_NB_VQ_STATS; i++)
2016                 snprintf(name[i].name, sizeof(name[i].name), "%s_q%u_%s",
2017                                 (queue_id & 1) ? "rx" : "tx",
2018                                 queue_id / 2, vhost_vq_stat_strings[i].name);
2019
2020         return VHOST_NB_VQ_STATS;
2021 }
2022
2023 int
2024 rte_vhost_vring_stats_get(int vid, uint16_t queue_id,
2025                 struct rte_vhost_stat *stats, unsigned int n)
2026 {
2027         struct virtio_net *dev = get_device(vid);
2028         struct vhost_virtqueue *vq;
2029         unsigned int i;
2030
2031         if (dev == NULL)
2032                 return -1;
2033
2034         if (queue_id >= dev->nr_vring)
2035                 return -1;
2036
2037         if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
2038                 return -1;
2039
2040         if (stats == NULL || n < VHOST_NB_VQ_STATS)
2041                 return VHOST_NB_VQ_STATS;
2042
2043         vq = dev->virtqueue[queue_id];
2044
2045         rte_spinlock_lock(&vq->access_lock);
2046         for (i = 0; i < VHOST_NB_VQ_STATS; i++) {
2047                 stats[i].value =
2048                         *(uint64_t *)(((char *)vq) + vhost_vq_stat_strings[i].offset);
2049                 stats[i].id = i;
2050         }
2051         rte_spinlock_unlock(&vq->access_lock);
2052
2053         return VHOST_NB_VQ_STATS;
2054 }
2055
2056 int rte_vhost_vring_stats_reset(int vid, uint16_t queue_id)
2057 {
2058         struct virtio_net *dev = get_device(vid);
2059         struct vhost_virtqueue *vq;
2060
2061         if (dev == NULL)
2062                 return -1;
2063
2064         if (queue_id >= dev->nr_vring)
2065                 return -1;
2066
2067         if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
2068                 return -1;
2069
2070         vq = dev->virtqueue[queue_id];
2071
2072         rte_spinlock_lock(&vq->access_lock);
2073         memset(&vq->stats, 0, sizeof(vq->stats));
2074         rte_spinlock_unlock(&vq->access_lock);
2075
2076         return 0;
2077 }
2078
2079 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
2080 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);