vhost: add statistics for IOTLB
[dpdk.git] / lib / vhost / vhost.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #ifdef RTE_LIBRTE_VHOST_NUMA
10 #include <numa.h>
11 #include <numaif.h>
12 #endif
13
14 #include <rte_errno.h>
15 #include <rte_log.h>
16 #include <rte_memory.h>
17 #include <rte_malloc.h>
18 #include <rte_vhost.h>
19
20 #include "iotlb.h"
21 #include "vhost.h"
22 #include "vhost_user.h"
23
24 struct virtio_net *vhost_devices[RTE_MAX_VHOST_DEVICE];
25 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
26
27 struct vhost_vq_stats_name_off {
28         char name[RTE_VHOST_STATS_NAME_SIZE];
29         unsigned int offset;
30 };
31
32 static const struct vhost_vq_stats_name_off vhost_vq_stat_strings[] = {
33         {"good_packets",           offsetof(struct vhost_virtqueue, stats.packets)},
34         {"good_bytes",             offsetof(struct vhost_virtqueue, stats.bytes)},
35         {"multicast_packets",      offsetof(struct vhost_virtqueue, stats.multicast)},
36         {"broadcast_packets",      offsetof(struct vhost_virtqueue, stats.broadcast)},
37         {"undersize_packets",      offsetof(struct vhost_virtqueue, stats.size_bins[0])},
38         {"size_64_packets",        offsetof(struct vhost_virtqueue, stats.size_bins[1])},
39         {"size_65_127_packets",    offsetof(struct vhost_virtqueue, stats.size_bins[2])},
40         {"size_128_255_packets",   offsetof(struct vhost_virtqueue, stats.size_bins[3])},
41         {"size_256_511_packets",   offsetof(struct vhost_virtqueue, stats.size_bins[4])},
42         {"size_512_1023_packets",  offsetof(struct vhost_virtqueue, stats.size_bins[5])},
43         {"size_1024_1518_packets", offsetof(struct vhost_virtqueue, stats.size_bins[6])},
44         {"size_1519_max_packets",  offsetof(struct vhost_virtqueue, stats.size_bins[7])},
45         {"guest_notifications",    offsetof(struct vhost_virtqueue, stats.guest_notifications)},
46         {"iotlb_hits",             offsetof(struct vhost_virtqueue, stats.iotlb_hits)},
47         {"iotlb_misses",           offsetof(struct vhost_virtqueue, stats.iotlb_misses)},
48 };
49
50 #define VHOST_NB_VQ_STATS RTE_DIM(vhost_vq_stat_strings)
51
52 /* Called with iotlb_lock read-locked */
53 uint64_t
54 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
55                     uint64_t iova, uint64_t *size, uint8_t perm)
56 {
57         uint64_t vva, tmp_size;
58
59         if (unlikely(!*size))
60                 return 0;
61
62         tmp_size = *size;
63
64         vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
65         if (tmp_size == *size) {
66                 if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
67                         vq->stats.iotlb_hits++;
68                 return vva;
69         }
70
71         if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
72                 vq->stats.iotlb_misses++;
73
74         iova += tmp_size;
75
76         if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
77                 /*
78                  * iotlb_lock is read-locked for a full burst,
79                  * but it only protects the iotlb cache.
80                  * In case of IOTLB miss, we might block on the socket,
81                  * which could cause a deadlock with QEMU if an IOTLB update
82                  * is being handled. We can safely unlock here to avoid it.
83                  */
84                 vhost_user_iotlb_rd_unlock(vq);
85
86                 vhost_user_iotlb_pending_insert(dev, vq, iova, perm);
87                 if (vhost_user_iotlb_miss(dev, iova, perm)) {
88                         VHOST_LOG_DATA(ERR, "(%s) IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
89                                 dev->ifname, iova);
90                         vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
91                 }
92
93                 vhost_user_iotlb_rd_lock(vq);
94         }
95
96         return 0;
97 }
98
99 #define VHOST_LOG_PAGE  4096
100
101 /*
102  * Atomically set a bit in memory.
103  */
104 static __rte_always_inline void
105 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
106 {
107 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
108         /*
109          * __sync_ built-ins are deprecated, but __atomic_ ones
110          * are sub-optimized in older GCC versions.
111          */
112         __sync_fetch_and_or_1(addr, (1U << nr));
113 #else
114         __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
115 #endif
116 }
117
118 static __rte_always_inline void
119 vhost_log_page(uint8_t *log_base, uint64_t page)
120 {
121         vhost_set_bit(page % 8, &log_base[page / 8]);
122 }
123
124 void
125 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
126 {
127         uint64_t page;
128
129         if (unlikely(!dev->log_base || !len))
130                 return;
131
132         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
133                 return;
134
135         /* To make sure guest memory updates are committed before logging */
136         rte_atomic_thread_fence(__ATOMIC_RELEASE);
137
138         page = addr / VHOST_LOG_PAGE;
139         while (page * VHOST_LOG_PAGE < addr + len) {
140                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
141                 page += 1;
142         }
143 }
144
145 void
146 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
147                              uint64_t iova, uint64_t len)
148 {
149         uint64_t hva, gpa, map_len;
150         map_len = len;
151
152         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
153         if (map_len != len) {
154                 VHOST_LOG_DATA(ERR,
155                         "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
156                         dev->ifname, iova);
157                 return;
158         }
159
160         gpa = hva_to_gpa(dev, hva, len);
161         if (gpa)
162                 __vhost_log_write(dev, gpa, len);
163 }
164
165 void
166 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
167 {
168         unsigned long *log_base;
169         int i;
170
171         if (unlikely(!dev->log_base))
172                 return;
173
174         /* No cache, nothing to sync */
175         if (unlikely(!vq->log_cache))
176                 return;
177
178         rte_atomic_thread_fence(__ATOMIC_RELEASE);
179
180         log_base = (unsigned long *)(uintptr_t)dev->log_base;
181
182         for (i = 0; i < vq->log_cache_nb_elem; i++) {
183                 struct log_cache_entry *elem = vq->log_cache + i;
184
185 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
186                 /*
187                  * '__sync' builtins are deprecated, but '__atomic' ones
188                  * are sub-optimized in older GCC versions.
189                  */
190                 __sync_fetch_and_or(log_base + elem->offset, elem->val);
191 #else
192                 __atomic_fetch_or(log_base + elem->offset, elem->val,
193                                 __ATOMIC_RELAXED);
194 #endif
195         }
196
197         rte_atomic_thread_fence(__ATOMIC_RELEASE);
198
199         vq->log_cache_nb_elem = 0;
200 }
201
202 static __rte_always_inline void
203 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
204                         uint64_t page)
205 {
206         uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
207         uint32_t offset = page / (sizeof(unsigned long) << 3);
208         int i;
209
210         if (unlikely(!vq->log_cache)) {
211                 /* No logging cache allocated, write dirty log map directly */
212                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
213                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
214
215                 return;
216         }
217
218         for (i = 0; i < vq->log_cache_nb_elem; i++) {
219                 struct log_cache_entry *elem = vq->log_cache + i;
220
221                 if (elem->offset == offset) {
222                         elem->val |= (1UL << bit_nr);
223                         return;
224                 }
225         }
226
227         if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
228                 /*
229                  * No more room for a new log cache entry,
230                  * so write the dirty log map directly.
231                  */
232                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
233                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
234
235                 return;
236         }
237
238         vq->log_cache[i].offset = offset;
239         vq->log_cache[i].val = (1UL << bit_nr);
240         vq->log_cache_nb_elem++;
241 }
242
243 void
244 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
245                         uint64_t addr, uint64_t len)
246 {
247         uint64_t page;
248
249         if (unlikely(!dev->log_base || !len))
250                 return;
251
252         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
253                 return;
254
255         page = addr / VHOST_LOG_PAGE;
256         while (page * VHOST_LOG_PAGE < addr + len) {
257                 vhost_log_cache_page(dev, vq, page);
258                 page += 1;
259         }
260 }
261
262 void
263 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
264                              uint64_t iova, uint64_t len)
265 {
266         uint64_t hva, gpa, map_len;
267         map_len = len;
268
269         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
270         if (map_len != len) {
271                 VHOST_LOG_DATA(ERR,
272                         "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
273                         dev->ifname, iova);
274                 return;
275         }
276
277         gpa = hva_to_gpa(dev, hva, len);
278         if (gpa)
279                 __vhost_log_cache_write(dev, vq, gpa, len);
280 }
281
282 void *
283 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
284                 uint64_t desc_addr, uint64_t desc_len)
285 {
286         void *idesc;
287         uint64_t src, dst;
288         uint64_t len, remain = desc_len;
289
290         idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
291         if (unlikely(!idesc))
292                 return NULL;
293
294         dst = (uint64_t)(uintptr_t)idesc;
295
296         while (remain) {
297                 len = remain;
298                 src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
299                                 VHOST_ACCESS_RO);
300                 if (unlikely(!src || !len)) {
301                         rte_free(idesc);
302                         return NULL;
303                 }
304
305                 rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
306
307                 remain -= len;
308                 dst += len;
309                 desc_addr += len;
310         }
311
312         return idesc;
313 }
314
315 void
316 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
317 {
318         if ((vq->callfd >= 0) && (destroy != 0))
319                 close(vq->callfd);
320         if (vq->kickfd >= 0)
321                 close(vq->kickfd);
322 }
323
324 void
325 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
326 {
327         if (!(dev->protocol_features &
328             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
329                 return;
330
331         if (vq_is_packed(dev)) {
332                 if (vq->inflight_packed)
333                         vq->inflight_packed = NULL;
334         } else {
335                 if (vq->inflight_split)
336                         vq->inflight_split = NULL;
337         }
338
339         if (vq->resubmit_inflight) {
340                 if (vq->resubmit_inflight->resubmit_list) {
341                         rte_free(vq->resubmit_inflight->resubmit_list);
342                         vq->resubmit_inflight->resubmit_list = NULL;
343                 }
344                 rte_free(vq->resubmit_inflight);
345                 vq->resubmit_inflight = NULL;
346         }
347 }
348
349 /*
350  * Unmap any memory, close any file descriptors and
351  * free any memory owned by a device.
352  */
353 void
354 cleanup_device(struct virtio_net *dev, int destroy)
355 {
356         uint32_t i;
357
358         vhost_backend_cleanup(dev);
359
360         for (i = 0; i < dev->nr_vring; i++) {
361                 cleanup_vq(dev->virtqueue[i], destroy);
362                 cleanup_vq_inflight(dev, dev->virtqueue[i]);
363         }
364 }
365
366 static void
367 vhost_free_async_mem(struct vhost_virtqueue *vq)
368 {
369         if (!vq->async)
370                 return;
371
372         rte_free(vq->async->pkts_info);
373         rte_free(vq->async->pkts_cmpl_flag);
374
375         rte_free(vq->async->buffers_packed);
376         vq->async->buffers_packed = NULL;
377         rte_free(vq->async->descs_split);
378         vq->async->descs_split = NULL;
379
380         rte_free(vq->async);
381         vq->async = NULL;
382 }
383
384 void
385 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
386 {
387         if (vq_is_packed(dev))
388                 rte_free(vq->shadow_used_packed);
389         else
390                 rte_free(vq->shadow_used_split);
391
392         vhost_free_async_mem(vq);
393         rte_free(vq->batch_copy_elems);
394         rte_mempool_free(vq->iotlb_pool);
395         rte_free(vq->log_cache);
396         rte_free(vq);
397 }
398
399 /*
400  * Release virtqueues and device memory.
401  */
402 static void
403 free_device(struct virtio_net *dev)
404 {
405         uint32_t i;
406
407         for (i = 0; i < dev->nr_vring; i++)
408                 free_vq(dev, dev->virtqueue[i]);
409
410         rte_free(dev);
411 }
412
413 static __rte_always_inline int
414 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
415 {
416         if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
417                 return 0;
418
419         vq->log_guest_addr = translate_log_addr(dev, vq,
420                                                 vq->ring_addrs.log_guest_addr);
421         if (vq->log_guest_addr == 0)
422                 return -1;
423
424         return 0;
425 }
426
427 /*
428  * Converts vring log address to GPA
429  * If IOMMU is enabled, the log address is IOVA
430  * If IOMMU not enabled, the log address is already GPA
431  *
432  * Caller should have iotlb_lock read-locked
433  */
434 uint64_t
435 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
436                 uint64_t log_addr)
437 {
438         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
439                 const uint64_t exp_size = sizeof(uint64_t);
440                 uint64_t hva, gpa;
441                 uint64_t size = exp_size;
442
443                 hva = vhost_iova_to_vva(dev, vq, log_addr,
444                                         &size, VHOST_ACCESS_RW);
445
446                 if (size != exp_size)
447                         return 0;
448
449                 gpa = hva_to_gpa(dev, hva, exp_size);
450                 if (!gpa) {
451                         VHOST_LOG_DATA(ERR,
452                                 "(%s) failed to find GPA for log_addr: 0x%"
453                                 PRIx64 " hva: 0x%" PRIx64 "\n",
454                                 dev->ifname, log_addr, hva);
455                         return 0;
456                 }
457                 return gpa;
458
459         } else
460                 return log_addr;
461 }
462
463 /* Caller should have iotlb_lock read-locked */
464 static int
465 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
466 {
467         uint64_t req_size, size;
468
469         req_size = sizeof(struct vring_desc) * vq->size;
470         size = req_size;
471         vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
472                                                 vq->ring_addrs.desc_user_addr,
473                                                 &size, VHOST_ACCESS_RW);
474         if (!vq->desc || size != req_size)
475                 return -1;
476
477         req_size = sizeof(struct vring_avail);
478         req_size += sizeof(uint16_t) * vq->size;
479         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
480                 req_size += sizeof(uint16_t);
481         size = req_size;
482         vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
483                                                 vq->ring_addrs.avail_user_addr,
484                                                 &size, VHOST_ACCESS_RW);
485         if (!vq->avail || size != req_size)
486                 return -1;
487
488         req_size = sizeof(struct vring_used);
489         req_size += sizeof(struct vring_used_elem) * vq->size;
490         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
491                 req_size += sizeof(uint16_t);
492         size = req_size;
493         vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
494                                                 vq->ring_addrs.used_user_addr,
495                                                 &size, VHOST_ACCESS_RW);
496         if (!vq->used || size != req_size)
497                 return -1;
498
499         return 0;
500 }
501
502 /* Caller should have iotlb_lock read-locked */
503 static int
504 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
505 {
506         uint64_t req_size, size;
507
508         req_size = sizeof(struct vring_packed_desc) * vq->size;
509         size = req_size;
510         vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
511                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
512                                 &size, VHOST_ACCESS_RW);
513         if (!vq->desc_packed || size != req_size)
514                 return -1;
515
516         req_size = sizeof(struct vring_packed_desc_event);
517         size = req_size;
518         vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
519                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
520                                 &size, VHOST_ACCESS_RW);
521         if (!vq->driver_event || size != req_size)
522                 return -1;
523
524         req_size = sizeof(struct vring_packed_desc_event);
525         size = req_size;
526         vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
527                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
528                                 &size, VHOST_ACCESS_RW);
529         if (!vq->device_event || size != req_size)
530                 return -1;
531
532         return 0;
533 }
534
535 int
536 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
537 {
538
539         if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
540                 return -1;
541
542         if (vq_is_packed(dev)) {
543                 if (vring_translate_packed(dev, vq) < 0)
544                         return -1;
545         } else {
546                 if (vring_translate_split(dev, vq) < 0)
547                         return -1;
548         }
549
550         if (log_translate(dev, vq) < 0)
551                 return -1;
552
553         vq->access_ok = true;
554
555         return 0;
556 }
557
558 void
559 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
560 {
561         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
562                 vhost_user_iotlb_wr_lock(vq);
563
564         vq->access_ok = false;
565         vq->desc = NULL;
566         vq->avail = NULL;
567         vq->used = NULL;
568         vq->log_guest_addr = 0;
569
570         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
571                 vhost_user_iotlb_wr_unlock(vq);
572 }
573
574 static void
575 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
576 {
577         struct vhost_virtqueue *vq;
578         int numa_node = SOCKET_ID_ANY;
579
580         if (vring_idx >= VHOST_MAX_VRING) {
581                 VHOST_LOG_CONFIG(ERR, "(%s) failed to init vring, out of bound (%d)\n",
582                                 dev->ifname, vring_idx);
583                 return;
584         }
585
586         vq = dev->virtqueue[vring_idx];
587         if (!vq) {
588                 VHOST_LOG_CONFIG(ERR, "(%s) virtqueue not allocated (%d)\n",
589                                 dev->ifname, vring_idx);
590                 return;
591         }
592
593         memset(vq, 0, sizeof(struct vhost_virtqueue));
594
595         vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
596         vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
597         vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
598
599 #ifdef RTE_LIBRTE_VHOST_NUMA
600         if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
601                 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
602                         dev->ifname, rte_strerror(errno));
603                 numa_node = SOCKET_ID_ANY;
604         }
605 #endif
606         vq->numa_node = numa_node;
607
608         vhost_user_iotlb_init(dev, vring_idx);
609 }
610
611 static void
612 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
613 {
614         struct vhost_virtqueue *vq;
615         int callfd;
616
617         if (vring_idx >= VHOST_MAX_VRING) {
618                 VHOST_LOG_CONFIG(ERR,
619                                 "(%s) failed to reset vring, out of bound (%d)\n",
620                                 dev->ifname, vring_idx);
621                 return;
622         }
623
624         vq = dev->virtqueue[vring_idx];
625         if (!vq) {
626                 VHOST_LOG_CONFIG(ERR, "(%s) failed to reset vring, virtqueue not allocated (%d)\n",
627                                 dev->ifname, vring_idx);
628                 return;
629         }
630
631         callfd = vq->callfd;
632         init_vring_queue(dev, vring_idx);
633         vq->callfd = callfd;
634 }
635
636 int
637 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
638 {
639         struct vhost_virtqueue *vq;
640         uint32_t i;
641
642         /* Also allocate holes, if any, up to requested vring index. */
643         for (i = 0; i <= vring_idx; i++) {
644                 if (dev->virtqueue[i])
645                         continue;
646
647                 vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
648                 if (vq == NULL) {
649                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate memory for vring %u.\n",
650                                         dev->ifname, i);
651                         return -1;
652                 }
653
654                 dev->virtqueue[i] = vq;
655                 init_vring_queue(dev, i);
656                 rte_spinlock_init(&vq->access_lock);
657                 vq->avail_wrap_counter = 1;
658                 vq->used_wrap_counter = 1;
659                 vq->signalled_used_valid = false;
660         }
661
662         dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
663
664         return 0;
665 }
666
667 /*
668  * Reset some variables in device structure, while keeping few
669  * others untouched, such as vid, ifname, nr_vring: they
670  * should be same unless the device is removed.
671  */
672 void
673 reset_device(struct virtio_net *dev)
674 {
675         uint32_t i;
676
677         dev->features = 0;
678         dev->protocol_features = 0;
679         dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
680
681         for (i = 0; i < dev->nr_vring; i++)
682                 reset_vring_queue(dev, i);
683 }
684
685 /*
686  * Invoked when there is a new vhost-user connection established (when
687  * there is a new virtio device being attached).
688  */
689 int
690 vhost_new_device(void)
691 {
692         struct virtio_net *dev;
693         int i;
694
695         pthread_mutex_lock(&vhost_dev_lock);
696         for (i = 0; i < RTE_MAX_VHOST_DEVICE; i++) {
697                 if (vhost_devices[i] == NULL)
698                         break;
699         }
700
701         if (i == RTE_MAX_VHOST_DEVICE) {
702                 VHOST_LOG_CONFIG(ERR, "failed to find a free slot for new device.\n");
703                 pthread_mutex_unlock(&vhost_dev_lock);
704                 return -1;
705         }
706
707         dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
708         if (dev == NULL) {
709                 VHOST_LOG_CONFIG(ERR, "failed to allocate memory for new device.\n");
710                 pthread_mutex_unlock(&vhost_dev_lock);
711                 return -1;
712         }
713
714         vhost_devices[i] = dev;
715         pthread_mutex_unlock(&vhost_dev_lock);
716
717         dev->vid = i;
718         dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
719         dev->slave_req_fd = -1;
720         dev->postcopy_ufd = -1;
721         rte_spinlock_init(&dev->slave_req_lock);
722
723         return i;
724 }
725
726 void
727 vhost_destroy_device_notify(struct virtio_net *dev)
728 {
729         struct rte_vdpa_device *vdpa_dev;
730
731         if (dev->flags & VIRTIO_DEV_RUNNING) {
732                 vdpa_dev = dev->vdpa_dev;
733                 if (vdpa_dev)
734                         vdpa_dev->ops->dev_close(dev->vid);
735                 dev->flags &= ~VIRTIO_DEV_RUNNING;
736                 dev->notify_ops->destroy_device(dev->vid);
737         }
738 }
739
740 /*
741  * Invoked when there is the vhost-user connection is broken (when
742  * the virtio device is being detached).
743  */
744 void
745 vhost_destroy_device(int vid)
746 {
747         struct virtio_net *dev = get_device(vid);
748
749         if (dev == NULL)
750                 return;
751
752         vhost_destroy_device_notify(dev);
753
754         cleanup_device(dev, 1);
755         free_device(dev);
756
757         vhost_devices[vid] = NULL;
758 }
759
760 void
761 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
762 {
763         struct virtio_net *dev = get_device(vid);
764
765         if (dev == NULL)
766                 return;
767
768         dev->vdpa_dev = vdpa_dev;
769 }
770
771 void
772 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
773 {
774         struct virtio_net *dev;
775         unsigned int len;
776
777         dev = get_device(vid);
778         if (dev == NULL)
779                 return;
780
781         len = if_len > sizeof(dev->ifname) ?
782                 sizeof(dev->ifname) : if_len;
783
784         strncpy(dev->ifname, if_name, len);
785         dev->ifname[sizeof(dev->ifname) - 1] = '\0';
786 }
787
788 void
789 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags, bool stats_enabled)
790 {
791         struct virtio_net *dev = get_device(vid);
792
793         if (dev == NULL)
794                 return;
795
796         if (enable)
797                 dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
798         else
799                 dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
800         if (!compliant_ol_flags)
801                 dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
802         else
803                 dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
804         if (stats_enabled)
805                 dev->flags |= VIRTIO_DEV_STATS_ENABLED;
806         else
807                 dev->flags &= ~VIRTIO_DEV_STATS_ENABLED;
808 }
809
810 void
811 vhost_enable_extbuf(int vid)
812 {
813         struct virtio_net *dev = get_device(vid);
814
815         if (dev == NULL)
816                 return;
817
818         dev->extbuf = 1;
819 }
820
821 void
822 vhost_enable_linearbuf(int vid)
823 {
824         struct virtio_net *dev = get_device(vid);
825
826         if (dev == NULL)
827                 return;
828
829         dev->linearbuf = 1;
830 }
831
832 int
833 rte_vhost_get_mtu(int vid, uint16_t *mtu)
834 {
835         struct virtio_net *dev = get_device(vid);
836
837         if (dev == NULL || mtu == NULL)
838                 return -ENODEV;
839
840         if (!(dev->flags & VIRTIO_DEV_READY))
841                 return -EAGAIN;
842
843         if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
844                 return -ENOTSUP;
845
846         *mtu = dev->mtu;
847
848         return 0;
849 }
850
851 int
852 rte_vhost_get_numa_node(int vid)
853 {
854 #ifdef RTE_LIBRTE_VHOST_NUMA
855         struct virtio_net *dev = get_device(vid);
856         int numa_node;
857         int ret;
858
859         if (dev == NULL || numa_available() != 0)
860                 return -1;
861
862         ret = get_mempolicy(&numa_node, NULL, 0, dev,
863                             MPOL_F_NODE | MPOL_F_ADDR);
864         if (ret < 0) {
865                 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
866                         dev->ifname, rte_strerror(errno));
867                 return -1;
868         }
869
870         return numa_node;
871 #else
872         RTE_SET_USED(vid);
873         return -1;
874 #endif
875 }
876
877 uint32_t
878 rte_vhost_get_queue_num(int vid)
879 {
880         struct virtio_net *dev = get_device(vid);
881
882         if (dev == NULL)
883                 return 0;
884
885         return dev->nr_vring / 2;
886 }
887
888 uint16_t
889 rte_vhost_get_vring_num(int vid)
890 {
891         struct virtio_net *dev = get_device(vid);
892
893         if (dev == NULL)
894                 return 0;
895
896         return dev->nr_vring;
897 }
898
899 int
900 rte_vhost_get_ifname(int vid, char *buf, size_t len)
901 {
902         struct virtio_net *dev = get_device(vid);
903
904         if (dev == NULL || buf == NULL)
905                 return -1;
906
907         len = RTE_MIN(len, sizeof(dev->ifname));
908
909         strncpy(buf, dev->ifname, len);
910         buf[len - 1] = '\0';
911
912         return 0;
913 }
914
915 int
916 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
917 {
918         struct virtio_net *dev;
919
920         dev = get_device(vid);
921         if (dev == NULL || features == NULL)
922                 return -1;
923
924         *features = dev->features;
925         return 0;
926 }
927
928 int
929 rte_vhost_get_negotiated_protocol_features(int vid,
930                                            uint64_t *protocol_features)
931 {
932         struct virtio_net *dev;
933
934         dev = get_device(vid);
935         if (dev == NULL || protocol_features == NULL)
936                 return -1;
937
938         *protocol_features = dev->protocol_features;
939         return 0;
940 }
941
942 int
943 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
944 {
945         struct virtio_net *dev;
946         struct rte_vhost_memory *m;
947         size_t size;
948
949         dev = get_device(vid);
950         if (dev == NULL || mem == NULL)
951                 return -1;
952
953         size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
954         m = malloc(sizeof(struct rte_vhost_memory) + size);
955         if (!m)
956                 return -1;
957
958         m->nregions = dev->mem->nregions;
959         memcpy(m->regions, dev->mem->regions, size);
960         *mem = m;
961
962         return 0;
963 }
964
965 int
966 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
967                           struct rte_vhost_vring *vring)
968 {
969         struct virtio_net *dev;
970         struct vhost_virtqueue *vq;
971
972         dev = get_device(vid);
973         if (dev == NULL || vring == NULL)
974                 return -1;
975
976         if (vring_idx >= VHOST_MAX_VRING)
977                 return -1;
978
979         vq = dev->virtqueue[vring_idx];
980         if (!vq)
981                 return -1;
982
983         if (vq_is_packed(dev)) {
984                 vring->desc_packed = vq->desc_packed;
985                 vring->driver_event = vq->driver_event;
986                 vring->device_event = vq->device_event;
987         } else {
988                 vring->desc = vq->desc;
989                 vring->avail = vq->avail;
990                 vring->used = vq->used;
991         }
992         vring->log_guest_addr  = vq->log_guest_addr;
993
994         vring->callfd  = vq->callfd;
995         vring->kickfd  = vq->kickfd;
996         vring->size    = vq->size;
997
998         return 0;
999 }
1000
1001 int
1002 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
1003                                   struct rte_vhost_ring_inflight *vring)
1004 {
1005         struct virtio_net *dev;
1006         struct vhost_virtqueue *vq;
1007
1008         dev = get_device(vid);
1009         if (unlikely(!dev))
1010                 return -1;
1011
1012         if (vring_idx >= VHOST_MAX_VRING)
1013                 return -1;
1014
1015         vq = dev->virtqueue[vring_idx];
1016         if (unlikely(!vq))
1017                 return -1;
1018
1019         if (vq_is_packed(dev)) {
1020                 if (unlikely(!vq->inflight_packed))
1021                         return -1;
1022
1023                 vring->inflight_packed = vq->inflight_packed;
1024         } else {
1025                 if (unlikely(!vq->inflight_split))
1026                         return -1;
1027
1028                 vring->inflight_split = vq->inflight_split;
1029         }
1030
1031         vring->resubmit_inflight = vq->resubmit_inflight;
1032
1033         return 0;
1034 }
1035
1036 int
1037 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
1038                                   uint16_t idx)
1039 {
1040         struct vhost_virtqueue *vq;
1041         struct virtio_net *dev;
1042
1043         dev = get_device(vid);
1044         if (unlikely(!dev))
1045                 return -1;
1046
1047         if (unlikely(!(dev->protocol_features &
1048             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1049                 return 0;
1050
1051         if (unlikely(vq_is_packed(dev)))
1052                 return -1;
1053
1054         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1055                 return -1;
1056
1057         vq = dev->virtqueue[vring_idx];
1058         if (unlikely(!vq))
1059                 return -1;
1060
1061         if (unlikely(!vq->inflight_split))
1062                 return -1;
1063
1064         if (unlikely(idx >= vq->size))
1065                 return -1;
1066
1067         vq->inflight_split->desc[idx].counter = vq->global_counter++;
1068         vq->inflight_split->desc[idx].inflight = 1;
1069         return 0;
1070 }
1071
1072 int
1073 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1074                                    uint16_t head, uint16_t last,
1075                                    uint16_t *inflight_entry)
1076 {
1077         struct rte_vhost_inflight_info_packed *inflight_info;
1078         struct virtio_net *dev;
1079         struct vhost_virtqueue *vq;
1080         struct vring_packed_desc *desc;
1081         uint16_t old_free_head, free_head;
1082
1083         dev = get_device(vid);
1084         if (unlikely(!dev))
1085                 return -1;
1086
1087         if (unlikely(!(dev->protocol_features &
1088             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1089                 return 0;
1090
1091         if (unlikely(!vq_is_packed(dev)))
1092                 return -1;
1093
1094         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1095                 return -1;
1096
1097         vq = dev->virtqueue[vring_idx];
1098         if (unlikely(!vq))
1099                 return -1;
1100
1101         inflight_info = vq->inflight_packed;
1102         if (unlikely(!inflight_info))
1103                 return -1;
1104
1105         if (unlikely(head >= vq->size))
1106                 return -1;
1107
1108         desc = vq->desc_packed;
1109         old_free_head = inflight_info->old_free_head;
1110         if (unlikely(old_free_head >= vq->size))
1111                 return -1;
1112
1113         free_head = old_free_head;
1114
1115         /* init header descriptor */
1116         inflight_info->desc[old_free_head].num = 0;
1117         inflight_info->desc[old_free_head].counter = vq->global_counter++;
1118         inflight_info->desc[old_free_head].inflight = 1;
1119
1120         /* save desc entry in flight entry */
1121         while (head != ((last + 1) % vq->size)) {
1122                 inflight_info->desc[old_free_head].num++;
1123                 inflight_info->desc[free_head].addr = desc[head].addr;
1124                 inflight_info->desc[free_head].len = desc[head].len;
1125                 inflight_info->desc[free_head].flags = desc[head].flags;
1126                 inflight_info->desc[free_head].id = desc[head].id;
1127
1128                 inflight_info->desc[old_free_head].last = free_head;
1129                 free_head = inflight_info->desc[free_head].next;
1130                 inflight_info->free_head = free_head;
1131                 head = (head + 1) % vq->size;
1132         }
1133
1134         inflight_info->old_free_head = free_head;
1135         *inflight_entry = old_free_head;
1136
1137         return 0;
1138 }
1139
1140 int
1141 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1142                                   uint16_t last_used_idx, uint16_t idx)
1143 {
1144         struct virtio_net *dev;
1145         struct vhost_virtqueue *vq;
1146
1147         dev = get_device(vid);
1148         if (unlikely(!dev))
1149                 return -1;
1150
1151         if (unlikely(!(dev->protocol_features &
1152             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1153                 return 0;
1154
1155         if (unlikely(vq_is_packed(dev)))
1156                 return -1;
1157
1158         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1159                 return -1;
1160
1161         vq = dev->virtqueue[vring_idx];
1162         if (unlikely(!vq))
1163                 return -1;
1164
1165         if (unlikely(!vq->inflight_split))
1166                 return -1;
1167
1168         if (unlikely(idx >= vq->size))
1169                 return -1;
1170
1171         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1172
1173         vq->inflight_split->desc[idx].inflight = 0;
1174
1175         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1176
1177         vq->inflight_split->used_idx = last_used_idx;
1178         return 0;
1179 }
1180
1181 int
1182 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1183                                    uint16_t head)
1184 {
1185         struct rte_vhost_inflight_info_packed *inflight_info;
1186         struct virtio_net *dev;
1187         struct vhost_virtqueue *vq;
1188
1189         dev = get_device(vid);
1190         if (unlikely(!dev))
1191                 return -1;
1192
1193         if (unlikely(!(dev->protocol_features &
1194             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1195                 return 0;
1196
1197         if (unlikely(!vq_is_packed(dev)))
1198                 return -1;
1199
1200         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1201                 return -1;
1202
1203         vq = dev->virtqueue[vring_idx];
1204         if (unlikely(!vq))
1205                 return -1;
1206
1207         inflight_info = vq->inflight_packed;
1208         if (unlikely(!inflight_info))
1209                 return -1;
1210
1211         if (unlikely(head >= vq->size))
1212                 return -1;
1213
1214         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1215
1216         inflight_info->desc[head].inflight = 0;
1217
1218         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1219
1220         inflight_info->old_free_head = inflight_info->free_head;
1221         inflight_info->old_used_idx = inflight_info->used_idx;
1222         inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1223
1224         return 0;
1225 }
1226
1227 int
1228 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1229                                      uint16_t idx)
1230 {
1231         struct virtio_net *dev;
1232         struct vhost_virtqueue *vq;
1233
1234         dev = get_device(vid);
1235         if (unlikely(!dev))
1236                 return -1;
1237
1238         if (unlikely(!(dev->protocol_features &
1239             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1240                 return 0;
1241
1242         if (unlikely(vq_is_packed(dev)))
1243                 return -1;
1244
1245         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1246                 return -1;
1247
1248         vq = dev->virtqueue[vring_idx];
1249         if (unlikely(!vq))
1250                 return -1;
1251
1252         if (unlikely(!vq->inflight_split))
1253                 return -1;
1254
1255         if (unlikely(idx >= vq->size))
1256                 return -1;
1257
1258         vq->inflight_split->last_inflight_io = idx;
1259         return 0;
1260 }
1261
1262 int
1263 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1264                                       uint16_t head)
1265 {
1266         struct rte_vhost_inflight_info_packed *inflight_info;
1267         struct virtio_net *dev;
1268         struct vhost_virtqueue *vq;
1269         uint16_t last;
1270
1271         dev = get_device(vid);
1272         if (unlikely(!dev))
1273                 return -1;
1274
1275         if (unlikely(!(dev->protocol_features &
1276             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1277                 return 0;
1278
1279         if (unlikely(!vq_is_packed(dev)))
1280                 return -1;
1281
1282         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1283                 return -1;
1284
1285         vq = dev->virtqueue[vring_idx];
1286         if (unlikely(!vq))
1287                 return -1;
1288
1289         inflight_info = vq->inflight_packed;
1290         if (unlikely(!inflight_info))
1291                 return -1;
1292
1293         if (unlikely(head >= vq->size))
1294                 return -1;
1295
1296         last = inflight_info->desc[head].last;
1297         if (unlikely(last >= vq->size))
1298                 return -1;
1299
1300         inflight_info->desc[last].next = inflight_info->free_head;
1301         inflight_info->free_head = head;
1302         inflight_info->used_idx += inflight_info->desc[head].num;
1303         if (inflight_info->used_idx >= inflight_info->desc_num) {
1304                 inflight_info->used_idx -= inflight_info->desc_num;
1305                 inflight_info->used_wrap_counter =
1306                         !inflight_info->used_wrap_counter;
1307         }
1308
1309         return 0;
1310 }
1311
1312 int
1313 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1314 {
1315         struct virtio_net *dev;
1316         struct vhost_virtqueue *vq;
1317
1318         dev = get_device(vid);
1319         if (!dev)
1320                 return -1;
1321
1322         if (vring_idx >= VHOST_MAX_VRING)
1323                 return -1;
1324
1325         vq = dev->virtqueue[vring_idx];
1326         if (!vq)
1327                 return -1;
1328
1329         rte_spinlock_lock(&vq->access_lock);
1330
1331         if (vq_is_packed(dev))
1332                 vhost_vring_call_packed(dev, vq);
1333         else
1334                 vhost_vring_call_split(dev, vq);
1335
1336         rte_spinlock_unlock(&vq->access_lock);
1337
1338         return 0;
1339 }
1340
1341 uint16_t
1342 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1343 {
1344         struct virtio_net *dev;
1345         struct vhost_virtqueue *vq;
1346         uint16_t ret = 0;
1347
1348         dev = get_device(vid);
1349         if (!dev)
1350                 return 0;
1351
1352         if (queue_id >= VHOST_MAX_VRING)
1353                 return 0;
1354
1355         vq = dev->virtqueue[queue_id];
1356         if (!vq)
1357                 return 0;
1358
1359         rte_spinlock_lock(&vq->access_lock);
1360
1361         if (unlikely(!vq->enabled || vq->avail == NULL))
1362                 goto out;
1363
1364         ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1365
1366 out:
1367         rte_spinlock_unlock(&vq->access_lock);
1368         return ret;
1369 }
1370
1371 static inline int
1372 vhost_enable_notify_split(struct virtio_net *dev,
1373                 struct vhost_virtqueue *vq, int enable)
1374 {
1375         if (vq->used == NULL)
1376                 return -1;
1377
1378         if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1379                 if (enable)
1380                         vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1381                 else
1382                         vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1383         } else {
1384                 if (enable)
1385                         vhost_avail_event(vq) = vq->last_avail_idx;
1386         }
1387         return 0;
1388 }
1389
1390 static inline int
1391 vhost_enable_notify_packed(struct virtio_net *dev,
1392                 struct vhost_virtqueue *vq, int enable)
1393 {
1394         uint16_t flags;
1395
1396         if (vq->device_event == NULL)
1397                 return -1;
1398
1399         if (!enable) {
1400                 vq->device_event->flags = VRING_EVENT_F_DISABLE;
1401                 return 0;
1402         }
1403
1404         flags = VRING_EVENT_F_ENABLE;
1405         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1406                 flags = VRING_EVENT_F_DESC;
1407                 vq->device_event->off_wrap = vq->last_avail_idx |
1408                         vq->avail_wrap_counter << 15;
1409         }
1410
1411         rte_atomic_thread_fence(__ATOMIC_RELEASE);
1412
1413         vq->device_event->flags = flags;
1414         return 0;
1415 }
1416
1417 int
1418 vhost_enable_guest_notification(struct virtio_net *dev,
1419                 struct vhost_virtqueue *vq, int enable)
1420 {
1421         /*
1422          * If the virtqueue is not ready yet, it will be applied
1423          * when it will become ready.
1424          */
1425         if (!vq->ready)
1426                 return 0;
1427
1428         if (vq_is_packed(dev))
1429                 return vhost_enable_notify_packed(dev, vq, enable);
1430         else
1431                 return vhost_enable_notify_split(dev, vq, enable);
1432 }
1433
1434 int
1435 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1436 {
1437         struct virtio_net *dev = get_device(vid);
1438         struct vhost_virtqueue *vq;
1439         int ret;
1440
1441         if (!dev)
1442                 return -1;
1443
1444         if (queue_id >= VHOST_MAX_VRING)
1445                 return -1;
1446
1447         vq = dev->virtqueue[queue_id];
1448         if (!vq)
1449                 return -1;
1450
1451         rte_spinlock_lock(&vq->access_lock);
1452
1453         vq->notif_enable = enable;
1454         ret = vhost_enable_guest_notification(dev, vq, enable);
1455
1456         rte_spinlock_unlock(&vq->access_lock);
1457
1458         return ret;
1459 }
1460
1461 void
1462 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1463 {
1464         struct virtio_net *dev = get_device(vid);
1465
1466         if (dev == NULL)
1467                 return;
1468
1469         vhost_log_write(dev, addr, len);
1470 }
1471
1472 void
1473 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1474                          uint64_t offset, uint64_t len)
1475 {
1476         struct virtio_net *dev;
1477         struct vhost_virtqueue *vq;
1478
1479         dev = get_device(vid);
1480         if (dev == NULL)
1481                 return;
1482
1483         if (vring_idx >= VHOST_MAX_VRING)
1484                 return;
1485         vq = dev->virtqueue[vring_idx];
1486         if (!vq)
1487                 return;
1488
1489         vhost_log_used_vring(dev, vq, offset, len);
1490 }
1491
1492 uint32_t
1493 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1494 {
1495         struct virtio_net *dev;
1496         struct vhost_virtqueue *vq;
1497         uint32_t ret = 0;
1498
1499         dev = get_device(vid);
1500         if (dev == NULL)
1501                 return 0;
1502
1503         if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1504                 VHOST_LOG_DATA(ERR, "(%s) %s: invalid virtqueue idx %d.\n",
1505                         dev->ifname, __func__, qid);
1506                 return 0;
1507         }
1508
1509         vq = dev->virtqueue[qid];
1510         if (vq == NULL)
1511                 return 0;
1512
1513         rte_spinlock_lock(&vq->access_lock);
1514
1515         if (unlikely(!vq->enabled || vq->avail == NULL))
1516                 goto out;
1517
1518         ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1519
1520 out:
1521         rte_spinlock_unlock(&vq->access_lock);
1522         return ret;
1523 }
1524
1525 struct rte_vdpa_device *
1526 rte_vhost_get_vdpa_device(int vid)
1527 {
1528         struct virtio_net *dev = get_device(vid);
1529
1530         if (dev == NULL)
1531                 return NULL;
1532
1533         return dev->vdpa_dev;
1534 }
1535
1536 int
1537 rte_vhost_get_log_base(int vid, uint64_t *log_base,
1538                 uint64_t *log_size)
1539 {
1540         struct virtio_net *dev = get_device(vid);
1541
1542         if (dev == NULL || log_base == NULL || log_size == NULL)
1543                 return -1;
1544
1545         *log_base = dev->log_base;
1546         *log_size = dev->log_size;
1547
1548         return 0;
1549 }
1550
1551 int
1552 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1553                 uint16_t *last_avail_idx, uint16_t *last_used_idx)
1554 {
1555         struct vhost_virtqueue *vq;
1556         struct virtio_net *dev = get_device(vid);
1557
1558         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1559                 return -1;
1560
1561         if (queue_id >= VHOST_MAX_VRING)
1562                 return -1;
1563
1564         vq = dev->virtqueue[queue_id];
1565         if (!vq)
1566                 return -1;
1567
1568         if (vq_is_packed(dev)) {
1569                 *last_avail_idx = (vq->avail_wrap_counter << 15) |
1570                                   vq->last_avail_idx;
1571                 *last_used_idx = (vq->used_wrap_counter << 15) |
1572                                  vq->last_used_idx;
1573         } else {
1574                 *last_avail_idx = vq->last_avail_idx;
1575                 *last_used_idx = vq->last_used_idx;
1576         }
1577
1578         return 0;
1579 }
1580
1581 int
1582 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1583                 uint16_t last_avail_idx, uint16_t last_used_idx)
1584 {
1585         struct vhost_virtqueue *vq;
1586         struct virtio_net *dev = get_device(vid);
1587
1588         if (!dev)
1589                 return -1;
1590
1591         if (queue_id >= VHOST_MAX_VRING)
1592                 return -1;
1593
1594         vq = dev->virtqueue[queue_id];
1595         if (!vq)
1596                 return -1;
1597
1598         if (vq_is_packed(dev)) {
1599                 vq->last_avail_idx = last_avail_idx & 0x7fff;
1600                 vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1601                 vq->last_used_idx = last_used_idx & 0x7fff;
1602                 vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1603         } else {
1604                 vq->last_avail_idx = last_avail_idx;
1605                 vq->last_used_idx = last_used_idx;
1606         }
1607
1608         return 0;
1609 }
1610
1611 int
1612 rte_vhost_get_vring_base_from_inflight(int vid,
1613                                        uint16_t queue_id,
1614                                        uint16_t *last_avail_idx,
1615                                        uint16_t *last_used_idx)
1616 {
1617         struct rte_vhost_inflight_info_packed *inflight_info;
1618         struct vhost_virtqueue *vq;
1619         struct virtio_net *dev = get_device(vid);
1620
1621         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1622                 return -1;
1623
1624         if (queue_id >= VHOST_MAX_VRING)
1625                 return -1;
1626
1627         vq = dev->virtqueue[queue_id];
1628         if (!vq)
1629                 return -1;
1630
1631         if (!vq_is_packed(dev))
1632                 return -1;
1633
1634         inflight_info = vq->inflight_packed;
1635         if (!inflight_info)
1636                 return -1;
1637
1638         *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1639                           inflight_info->old_used_idx;
1640         *last_used_idx = *last_avail_idx;
1641
1642         return 0;
1643 }
1644
1645 int
1646 rte_vhost_extern_callback_register(int vid,
1647                 struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1648 {
1649         struct virtio_net *dev = get_device(vid);
1650
1651         if (dev == NULL || ops == NULL)
1652                 return -1;
1653
1654         dev->extern_ops = *ops;
1655         dev->extern_data = ctx;
1656         return 0;
1657 }
1658
1659 static __rte_always_inline int
1660 async_channel_register(int vid, uint16_t queue_id)
1661 {
1662         struct virtio_net *dev = get_device(vid);
1663         struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
1664         struct vhost_async *async;
1665         int node = vq->numa_node;
1666
1667         if (unlikely(vq->async)) {
1668                 VHOST_LOG_CONFIG(ERR,
1669                                 "(%s) async register failed: already registered (qid: %d)\n",
1670                                 dev->ifname, queue_id);
1671                 return -1;
1672         }
1673
1674         async = rte_zmalloc_socket(NULL, sizeof(struct vhost_async), 0, node);
1675         if (!async) {
1676                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async metadata (qid: %d)\n",
1677                                 dev->ifname, queue_id);
1678                 return -1;
1679         }
1680
1681         async->pkts_info = rte_malloc_socket(NULL, vq->size * sizeof(struct async_inflight_info),
1682                         RTE_CACHE_LINE_SIZE, node);
1683         if (!async->pkts_info) {
1684                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async_pkts_info (qid: %d)\n",
1685                                 dev->ifname, queue_id);
1686                 goto out_free_async;
1687         }
1688
1689         async->pkts_cmpl_flag = rte_zmalloc_socket(NULL, vq->size * sizeof(bool),
1690                         RTE_CACHE_LINE_SIZE, node);
1691         if (!async->pkts_cmpl_flag) {
1692                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async pkts_cmpl_flag (qid: %d)\n",
1693                                 dev->ifname, queue_id);
1694                 goto out_free_async;
1695         }
1696
1697         if (vq_is_packed(dev)) {
1698                 async->buffers_packed = rte_malloc_socket(NULL,
1699                                 vq->size * sizeof(struct vring_used_elem_packed),
1700                                 RTE_CACHE_LINE_SIZE, node);
1701                 if (!async->buffers_packed) {
1702                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async buffers (qid: %d)\n",
1703                                         dev->ifname, queue_id);
1704                         goto out_free_inflight;
1705                 }
1706         } else {
1707                 async->descs_split = rte_malloc_socket(NULL,
1708                                 vq->size * sizeof(struct vring_used_elem),
1709                                 RTE_CACHE_LINE_SIZE, node);
1710                 if (!async->descs_split) {
1711                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async descs (qid: %d)\n",
1712                                         dev->ifname, queue_id);
1713                         goto out_free_inflight;
1714                 }
1715         }
1716
1717         vq->async = async;
1718
1719         return 0;
1720 out_free_inflight:
1721         rte_free(async->pkts_info);
1722 out_free_async:
1723         rte_free(async);
1724
1725         return -1;
1726 }
1727
1728 int
1729 rte_vhost_async_channel_register(int vid, uint16_t queue_id)
1730 {
1731         struct vhost_virtqueue *vq;
1732         struct virtio_net *dev = get_device(vid);
1733         int ret;
1734
1735         if (dev == NULL)
1736                 return -1;
1737
1738         if (queue_id >= VHOST_MAX_VRING)
1739                 return -1;
1740
1741         vq = dev->virtqueue[queue_id];
1742
1743         if (unlikely(vq == NULL || !dev->async_copy))
1744                 return -1;
1745
1746         rte_spinlock_lock(&vq->access_lock);
1747         ret = async_channel_register(vid, queue_id);
1748         rte_spinlock_unlock(&vq->access_lock);
1749
1750         return ret;
1751 }
1752
1753 int
1754 rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id)
1755 {
1756         struct vhost_virtqueue *vq;
1757         struct virtio_net *dev = get_device(vid);
1758
1759         if (dev == NULL)
1760                 return -1;
1761
1762         if (queue_id >= VHOST_MAX_VRING)
1763                 return -1;
1764
1765         vq = dev->virtqueue[queue_id];
1766
1767         if (unlikely(vq == NULL || !dev->async_copy))
1768                 return -1;
1769
1770         return async_channel_register(vid, queue_id);
1771 }
1772
1773 int
1774 rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1775 {
1776         struct vhost_virtqueue *vq;
1777         struct virtio_net *dev = get_device(vid);
1778         int ret = -1;
1779
1780         if (dev == NULL)
1781                 return ret;
1782
1783         if (queue_id >= VHOST_MAX_VRING)
1784                 return ret;
1785
1786         vq = dev->virtqueue[queue_id];
1787
1788         if (vq == NULL)
1789                 return ret;
1790
1791         if (!rte_spinlock_trylock(&vq->access_lock)) {
1792                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel, virtqueue busy.\n",
1793                                 dev->ifname);
1794                 return ret;
1795         }
1796
1797         if (!vq->async) {
1798                 ret = 0;
1799         } else if (vq->async->pkts_inflight_n) {
1800                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1801                 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1802                         dev->ifname);
1803         } else {
1804                 vhost_free_async_mem(vq);
1805                 ret = 0;
1806         }
1807
1808         rte_spinlock_unlock(&vq->access_lock);
1809
1810         return ret;
1811 }
1812
1813 int
1814 rte_vhost_async_channel_unregister_thread_unsafe(int vid, uint16_t queue_id)
1815 {
1816         struct vhost_virtqueue *vq;
1817         struct virtio_net *dev = get_device(vid);
1818
1819         if (dev == NULL)
1820                 return -1;
1821
1822         if (queue_id >= VHOST_MAX_VRING)
1823                 return -1;
1824
1825         vq = dev->virtqueue[queue_id];
1826
1827         if (vq == NULL)
1828                 return -1;
1829
1830         if (!vq->async)
1831                 return 0;
1832
1833         if (vq->async->pkts_inflight_n) {
1834                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1835                 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1836                         dev->ifname);
1837                 return -1;
1838         }
1839
1840         vhost_free_async_mem(vq);
1841
1842         return 0;
1843 }
1844
1845 int
1846 rte_vhost_async_dma_configure(int16_t dma_id, uint16_t vchan_id)
1847 {
1848         struct rte_dma_info info;
1849         void *pkts_cmpl_flag_addr;
1850         uint16_t max_desc;
1851
1852         if (!rte_dma_is_valid(dma_id)) {
1853                 VHOST_LOG_CONFIG(ERR, "DMA %d is not found.\n", dma_id);
1854                 return -1;
1855         }
1856
1857         rte_dma_info_get(dma_id, &info);
1858         if (vchan_id >= info.max_vchans) {
1859                 VHOST_LOG_CONFIG(ERR, "Invalid DMA %d vChannel %u.\n", dma_id, vchan_id);
1860                 return -1;
1861         }
1862
1863         if (!dma_copy_track[dma_id].vchans) {
1864                 struct async_dma_vchan_info *vchans;
1865
1866                 vchans = rte_zmalloc(NULL, sizeof(struct async_dma_vchan_info) * info.max_vchans,
1867                                 RTE_CACHE_LINE_SIZE);
1868                 if (vchans == NULL) {
1869                         VHOST_LOG_CONFIG(ERR, "Failed to allocate vchans for DMA %d vChannel %u.\n",
1870                                         dma_id, vchan_id);
1871                         return -1;
1872                 }
1873
1874                 dma_copy_track[dma_id].vchans = vchans;
1875         }
1876
1877         if (dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr) {
1878                 VHOST_LOG_CONFIG(INFO, "DMA %d vChannel %u already registered.\n", dma_id,
1879                                 vchan_id);
1880                 return 0;
1881         }
1882
1883         max_desc = info.max_desc;
1884         if (!rte_is_power_of_2(max_desc))
1885                 max_desc = rte_align32pow2(max_desc);
1886
1887         pkts_cmpl_flag_addr = rte_zmalloc(NULL, sizeof(bool *) * max_desc, RTE_CACHE_LINE_SIZE);
1888         if (!pkts_cmpl_flag_addr) {
1889                 VHOST_LOG_CONFIG(ERR, "Failed to allocate pkts_cmpl_flag_addr for DMA %d "
1890                                 "vChannel %u.\n", dma_id, vchan_id);
1891
1892                 if (dma_copy_track[dma_id].nr_vchans == 0) {
1893                         rte_free(dma_copy_track[dma_id].vchans);
1894                         dma_copy_track[dma_id].vchans = NULL;
1895                 }
1896                 return -1;
1897         }
1898
1899         dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr = pkts_cmpl_flag_addr;
1900         dma_copy_track[dma_id].vchans[vchan_id].ring_size = max_desc;
1901         dma_copy_track[dma_id].vchans[vchan_id].ring_mask = max_desc - 1;
1902         dma_copy_track[dma_id].nr_vchans++;
1903
1904         return 0;
1905 }
1906
1907 int
1908 rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
1909 {
1910         struct vhost_virtqueue *vq;
1911         struct virtio_net *dev = get_device(vid);
1912         int ret = -1;
1913
1914         if (dev == NULL)
1915                 return ret;
1916
1917         if (queue_id >= VHOST_MAX_VRING)
1918                 return ret;
1919
1920         vq = dev->virtqueue[queue_id];
1921
1922         if (vq == NULL)
1923                 return ret;
1924
1925         if (!rte_spinlock_trylock(&vq->access_lock)) {
1926                 VHOST_LOG_CONFIG(DEBUG,
1927                         "(%s) failed to check in-flight packets. virtqueue busy.\n",
1928                         dev->ifname);
1929                 return ret;
1930         }
1931
1932         if (vq->async)
1933                 ret = vq->async->pkts_inflight_n;
1934
1935         rte_spinlock_unlock(&vq->access_lock);
1936
1937         return ret;
1938 }
1939
1940 int
1941 rte_vhost_async_get_inflight_thread_unsafe(int vid, uint16_t queue_id)
1942 {
1943         struct vhost_virtqueue *vq;
1944         struct virtio_net *dev = get_device(vid);
1945         int ret = -1;
1946
1947         if (dev == NULL)
1948                 return ret;
1949
1950         if (queue_id >= VHOST_MAX_VRING)
1951                 return ret;
1952
1953         vq = dev->virtqueue[queue_id];
1954
1955         if (vq == NULL)
1956                 return ret;
1957
1958         if (!vq->async)
1959                 return ret;
1960
1961         ret = vq->async->pkts_inflight_n;
1962
1963         return ret;
1964 }
1965
1966 int
1967 rte_vhost_get_monitor_addr(int vid, uint16_t queue_id,
1968                 struct rte_vhost_power_monitor_cond *pmc)
1969 {
1970         struct virtio_net *dev = get_device(vid);
1971         struct vhost_virtqueue *vq;
1972
1973         if (dev == NULL)
1974                 return -1;
1975         if (queue_id >= VHOST_MAX_VRING)
1976                 return -1;
1977
1978         vq = dev->virtqueue[queue_id];
1979         if (vq == NULL)
1980                 return -1;
1981
1982         if (vq_is_packed(dev)) {
1983                 struct vring_packed_desc *desc;
1984                 desc = vq->desc_packed;
1985                 pmc->addr = &desc[vq->last_avail_idx].flags;
1986                 if (vq->avail_wrap_counter)
1987                         pmc->val = VRING_DESC_F_AVAIL;
1988                 else
1989                         pmc->val = VRING_DESC_F_USED;
1990                 pmc->mask = VRING_DESC_F_AVAIL | VRING_DESC_F_USED;
1991                 pmc->size = sizeof(desc[vq->last_avail_idx].flags);
1992                 pmc->match = 1;
1993         } else {
1994                 pmc->addr = &vq->avail->idx;
1995                 pmc->val = vq->last_avail_idx & (vq->size - 1);
1996                 pmc->mask = vq->size - 1;
1997                 pmc->size = sizeof(vq->avail->idx);
1998                 pmc->match = 0;
1999         }
2000
2001         return 0;
2002 }
2003
2004
2005 int
2006 rte_vhost_vring_stats_get_names(int vid, uint16_t queue_id,
2007                 struct rte_vhost_stat_name *name, unsigned int size)
2008 {
2009         struct virtio_net *dev = get_device(vid);
2010         unsigned int i;
2011
2012         if (dev == NULL)
2013                 return -1;
2014
2015         if (queue_id >= dev->nr_vring)
2016                 return -1;
2017
2018         if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
2019                 return -1;
2020
2021         if (name == NULL || size < VHOST_NB_VQ_STATS)
2022                 return VHOST_NB_VQ_STATS;
2023
2024         for (i = 0; i < VHOST_NB_VQ_STATS; i++)
2025                 snprintf(name[i].name, sizeof(name[i].name), "%s_q%u_%s",
2026                                 (queue_id & 1) ? "rx" : "tx",
2027                                 queue_id / 2, vhost_vq_stat_strings[i].name);
2028
2029         return VHOST_NB_VQ_STATS;
2030 }
2031
2032 int
2033 rte_vhost_vring_stats_get(int vid, uint16_t queue_id,
2034                 struct rte_vhost_stat *stats, unsigned int n)
2035 {
2036         struct virtio_net *dev = get_device(vid);
2037         struct vhost_virtqueue *vq;
2038         unsigned int i;
2039
2040         if (dev == NULL)
2041                 return -1;
2042
2043         if (queue_id >= dev->nr_vring)
2044                 return -1;
2045
2046         if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
2047                 return -1;
2048
2049         if (stats == NULL || n < VHOST_NB_VQ_STATS)
2050                 return VHOST_NB_VQ_STATS;
2051
2052         vq = dev->virtqueue[queue_id];
2053
2054         rte_spinlock_lock(&vq->access_lock);
2055         for (i = 0; i < VHOST_NB_VQ_STATS; i++) {
2056                 stats[i].value =
2057                         *(uint64_t *)(((char *)vq) + vhost_vq_stat_strings[i].offset);
2058                 stats[i].id = i;
2059         }
2060         rte_spinlock_unlock(&vq->access_lock);
2061
2062         return VHOST_NB_VQ_STATS;
2063 }
2064
2065 int rte_vhost_vring_stats_reset(int vid, uint16_t queue_id)
2066 {
2067         struct virtio_net *dev = get_device(vid);
2068         struct vhost_virtqueue *vq;
2069
2070         if (dev == NULL)
2071                 return -1;
2072
2073         if (queue_id >= dev->nr_vring)
2074                 return -1;
2075
2076         if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
2077                 return -1;
2078
2079         vq = dev->virtqueue[queue_id];
2080
2081         rte_spinlock_lock(&vq->access_lock);
2082         memset(&vq->stats, 0, sizeof(vq->stats));
2083         rte_spinlock_unlock(&vq->access_lock);
2084
2085         return 0;
2086 }
2087
2088 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
2089 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);