vhost: add vDPA operations for block device
[dpdk.git] / lib / vhost / vhost.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #ifdef RTE_LIBRTE_VHOST_NUMA
10 #include <numa.h>
11 #include <numaif.h>
12 #endif
13
14 #include <rte_errno.h>
15 #include <rte_log.h>
16 #include <rte_memory.h>
17 #include <rte_malloc.h>
18 #include <rte_vhost.h>
19
20 #include "iotlb.h"
21 #include "vhost.h"
22 #include "vhost_user.h"
23
24 struct virtio_net *vhost_devices[RTE_MAX_VHOST_DEVICE];
25 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
26
27 struct vhost_vq_stats_name_off {
28         char name[RTE_VHOST_STATS_NAME_SIZE];
29         unsigned int offset;
30 };
31
32 static const struct vhost_vq_stats_name_off vhost_vq_stat_strings[] = {
33         {"good_packets",           offsetof(struct vhost_virtqueue, stats.packets)},
34         {"good_bytes",             offsetof(struct vhost_virtqueue, stats.bytes)},
35         {"multicast_packets",      offsetof(struct vhost_virtqueue, stats.multicast)},
36         {"broadcast_packets",      offsetof(struct vhost_virtqueue, stats.broadcast)},
37         {"undersize_packets",      offsetof(struct vhost_virtqueue, stats.size_bins[0])},
38         {"size_64_packets",        offsetof(struct vhost_virtqueue, stats.size_bins[1])},
39         {"size_65_127_packets",    offsetof(struct vhost_virtqueue, stats.size_bins[2])},
40         {"size_128_255_packets",   offsetof(struct vhost_virtqueue, stats.size_bins[3])},
41         {"size_256_511_packets",   offsetof(struct vhost_virtqueue, stats.size_bins[4])},
42         {"size_512_1023_packets",  offsetof(struct vhost_virtqueue, stats.size_bins[5])},
43         {"size_1024_1518_packets", offsetof(struct vhost_virtqueue, stats.size_bins[6])},
44         {"size_1519_max_packets",  offsetof(struct vhost_virtqueue, stats.size_bins[7])},
45         {"guest_notifications",    offsetof(struct vhost_virtqueue, stats.guest_notifications)},
46         {"iotlb_hits",             offsetof(struct vhost_virtqueue, stats.iotlb_hits)},
47         {"iotlb_misses",           offsetof(struct vhost_virtqueue, stats.iotlb_misses)},
48         {"inflight_submitted",     offsetof(struct vhost_virtqueue, stats.inflight_submitted)},
49         {"inflight_completed",     offsetof(struct vhost_virtqueue, stats.inflight_completed)},
50 };
51
52 #define VHOST_NB_VQ_STATS RTE_DIM(vhost_vq_stat_strings)
53
54 /* Called with iotlb_lock read-locked */
55 uint64_t
56 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
57                     uint64_t iova, uint64_t *size, uint8_t perm)
58 {
59         uint64_t vva, tmp_size;
60
61         if (unlikely(!*size))
62                 return 0;
63
64         tmp_size = *size;
65
66         vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
67         if (tmp_size == *size) {
68                 if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
69                         vq->stats.iotlb_hits++;
70                 return vva;
71         }
72
73         if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
74                 vq->stats.iotlb_misses++;
75
76         iova += tmp_size;
77
78         if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
79                 /*
80                  * iotlb_lock is read-locked for a full burst,
81                  * but it only protects the iotlb cache.
82                  * In case of IOTLB miss, we might block on the socket,
83                  * which could cause a deadlock with QEMU if an IOTLB update
84                  * is being handled. We can safely unlock here to avoid it.
85                  */
86                 vhost_user_iotlb_rd_unlock(vq);
87
88                 vhost_user_iotlb_pending_insert(dev, vq, iova, perm);
89                 if (vhost_user_iotlb_miss(dev, iova, perm)) {
90                         VHOST_LOG_DATA(ERR, "(%s) IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
91                                 dev->ifname, iova);
92                         vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
93                 }
94
95                 vhost_user_iotlb_rd_lock(vq);
96         }
97
98         return 0;
99 }
100
101 #define VHOST_LOG_PAGE  4096
102
103 /*
104  * Atomically set a bit in memory.
105  */
106 static __rte_always_inline void
107 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
108 {
109 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
110         /*
111          * __sync_ built-ins are deprecated, but __atomic_ ones
112          * are sub-optimized in older GCC versions.
113          */
114         __sync_fetch_and_or_1(addr, (1U << nr));
115 #else
116         __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
117 #endif
118 }
119
120 static __rte_always_inline void
121 vhost_log_page(uint8_t *log_base, uint64_t page)
122 {
123         vhost_set_bit(page % 8, &log_base[page / 8]);
124 }
125
126 void
127 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
128 {
129         uint64_t page;
130
131         if (unlikely(!dev->log_base || !len))
132                 return;
133
134         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
135                 return;
136
137         /* To make sure guest memory updates are committed before logging */
138         rte_atomic_thread_fence(__ATOMIC_RELEASE);
139
140         page = addr / VHOST_LOG_PAGE;
141         while (page * VHOST_LOG_PAGE < addr + len) {
142                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
143                 page += 1;
144         }
145 }
146
147 void
148 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
149                              uint64_t iova, uint64_t len)
150 {
151         uint64_t hva, gpa, map_len;
152         map_len = len;
153
154         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
155         if (map_len != len) {
156                 VHOST_LOG_DATA(ERR,
157                         "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
158                         dev->ifname, iova);
159                 return;
160         }
161
162         gpa = hva_to_gpa(dev, hva, len);
163         if (gpa)
164                 __vhost_log_write(dev, gpa, len);
165 }
166
167 void
168 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
169 {
170         unsigned long *log_base;
171         int i;
172
173         if (unlikely(!dev->log_base))
174                 return;
175
176         /* No cache, nothing to sync */
177         if (unlikely(!vq->log_cache))
178                 return;
179
180         rte_atomic_thread_fence(__ATOMIC_RELEASE);
181
182         log_base = (unsigned long *)(uintptr_t)dev->log_base;
183
184         for (i = 0; i < vq->log_cache_nb_elem; i++) {
185                 struct log_cache_entry *elem = vq->log_cache + i;
186
187 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
188                 /*
189                  * '__sync' builtins are deprecated, but '__atomic' ones
190                  * are sub-optimized in older GCC versions.
191                  */
192                 __sync_fetch_and_or(log_base + elem->offset, elem->val);
193 #else
194                 __atomic_fetch_or(log_base + elem->offset, elem->val,
195                                 __ATOMIC_RELAXED);
196 #endif
197         }
198
199         rte_atomic_thread_fence(__ATOMIC_RELEASE);
200
201         vq->log_cache_nb_elem = 0;
202 }
203
204 static __rte_always_inline void
205 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
206                         uint64_t page)
207 {
208         uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
209         uint32_t offset = page / (sizeof(unsigned long) << 3);
210         int i;
211
212         if (unlikely(!vq->log_cache)) {
213                 /* No logging cache allocated, write dirty log map directly */
214                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
215                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
216
217                 return;
218         }
219
220         for (i = 0; i < vq->log_cache_nb_elem; i++) {
221                 struct log_cache_entry *elem = vq->log_cache + i;
222
223                 if (elem->offset == offset) {
224                         elem->val |= (1UL << bit_nr);
225                         return;
226                 }
227         }
228
229         if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
230                 /*
231                  * No more room for a new log cache entry,
232                  * so write the dirty log map directly.
233                  */
234                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
235                 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
236
237                 return;
238         }
239
240         vq->log_cache[i].offset = offset;
241         vq->log_cache[i].val = (1UL << bit_nr);
242         vq->log_cache_nb_elem++;
243 }
244
245 void
246 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
247                         uint64_t addr, uint64_t len)
248 {
249         uint64_t page;
250
251         if (unlikely(!dev->log_base || !len))
252                 return;
253
254         if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
255                 return;
256
257         page = addr / VHOST_LOG_PAGE;
258         while (page * VHOST_LOG_PAGE < addr + len) {
259                 vhost_log_cache_page(dev, vq, page);
260                 page += 1;
261         }
262 }
263
264 void
265 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
266                              uint64_t iova, uint64_t len)
267 {
268         uint64_t hva, gpa, map_len;
269         map_len = len;
270
271         hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
272         if (map_len != len) {
273                 VHOST_LOG_DATA(ERR,
274                         "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
275                         dev->ifname, iova);
276                 return;
277         }
278
279         gpa = hva_to_gpa(dev, hva, len);
280         if (gpa)
281                 __vhost_log_cache_write(dev, vq, gpa, len);
282 }
283
284 void *
285 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
286                 uint64_t desc_addr, uint64_t desc_len)
287 {
288         void *idesc;
289         uint64_t src, dst;
290         uint64_t len, remain = desc_len;
291
292         idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
293         if (unlikely(!idesc))
294                 return NULL;
295
296         dst = (uint64_t)(uintptr_t)idesc;
297
298         while (remain) {
299                 len = remain;
300                 src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
301                                 VHOST_ACCESS_RO);
302                 if (unlikely(!src || !len)) {
303                         rte_free(idesc);
304                         return NULL;
305                 }
306
307                 rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
308
309                 remain -= len;
310                 dst += len;
311                 desc_addr += len;
312         }
313
314         return idesc;
315 }
316
317 void
318 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
319 {
320         if ((vq->callfd >= 0) && (destroy != 0))
321                 close(vq->callfd);
322         if (vq->kickfd >= 0)
323                 close(vq->kickfd);
324 }
325
326 void
327 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
328 {
329         if (!(dev->protocol_features &
330             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
331                 return;
332
333         if (vq_is_packed(dev)) {
334                 if (vq->inflight_packed)
335                         vq->inflight_packed = NULL;
336         } else {
337                 if (vq->inflight_split)
338                         vq->inflight_split = NULL;
339         }
340
341         if (vq->resubmit_inflight) {
342                 if (vq->resubmit_inflight->resubmit_list) {
343                         rte_free(vq->resubmit_inflight->resubmit_list);
344                         vq->resubmit_inflight->resubmit_list = NULL;
345                 }
346                 rte_free(vq->resubmit_inflight);
347                 vq->resubmit_inflight = NULL;
348         }
349 }
350
351 /*
352  * Unmap any memory, close any file descriptors and
353  * free any memory owned by a device.
354  */
355 void
356 cleanup_device(struct virtio_net *dev, int destroy)
357 {
358         uint32_t i;
359
360         vhost_backend_cleanup(dev);
361
362         for (i = 0; i < dev->nr_vring; i++) {
363                 cleanup_vq(dev->virtqueue[i], destroy);
364                 cleanup_vq_inflight(dev, dev->virtqueue[i]);
365         }
366 }
367
368 static void
369 vhost_free_async_mem(struct vhost_virtqueue *vq)
370 {
371         if (!vq->async)
372                 return;
373
374         rte_free(vq->async->pkts_info);
375         rte_free(vq->async->pkts_cmpl_flag);
376
377         rte_free(vq->async->buffers_packed);
378         vq->async->buffers_packed = NULL;
379         rte_free(vq->async->descs_split);
380         vq->async->descs_split = NULL;
381
382         rte_free(vq->async);
383         vq->async = NULL;
384 }
385
386 void
387 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
388 {
389         if (vq_is_packed(dev))
390                 rte_free(vq->shadow_used_packed);
391         else
392                 rte_free(vq->shadow_used_split);
393
394         vhost_free_async_mem(vq);
395         rte_free(vq->batch_copy_elems);
396         rte_mempool_free(vq->iotlb_pool);
397         rte_free(vq->log_cache);
398         rte_free(vq);
399 }
400
401 /*
402  * Release virtqueues and device memory.
403  */
404 static void
405 free_device(struct virtio_net *dev)
406 {
407         uint32_t i;
408
409         for (i = 0; i < dev->nr_vring; i++)
410                 free_vq(dev, dev->virtqueue[i]);
411
412         rte_free(dev);
413 }
414
415 static __rte_always_inline int
416 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
417 {
418         if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
419                 return 0;
420
421         vq->log_guest_addr = translate_log_addr(dev, vq,
422                                                 vq->ring_addrs.log_guest_addr);
423         if (vq->log_guest_addr == 0)
424                 return -1;
425
426         return 0;
427 }
428
429 /*
430  * Converts vring log address to GPA
431  * If IOMMU is enabled, the log address is IOVA
432  * If IOMMU not enabled, the log address is already GPA
433  *
434  * Caller should have iotlb_lock read-locked
435  */
436 uint64_t
437 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
438                 uint64_t log_addr)
439 {
440         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
441                 const uint64_t exp_size = sizeof(uint64_t);
442                 uint64_t hva, gpa;
443                 uint64_t size = exp_size;
444
445                 hva = vhost_iova_to_vva(dev, vq, log_addr,
446                                         &size, VHOST_ACCESS_RW);
447
448                 if (size != exp_size)
449                         return 0;
450
451                 gpa = hva_to_gpa(dev, hva, exp_size);
452                 if (!gpa) {
453                         VHOST_LOG_DATA(ERR,
454                                 "(%s) failed to find GPA for log_addr: 0x%"
455                                 PRIx64 " hva: 0x%" PRIx64 "\n",
456                                 dev->ifname, log_addr, hva);
457                         return 0;
458                 }
459                 return gpa;
460
461         } else
462                 return log_addr;
463 }
464
465 /* Caller should have iotlb_lock read-locked */
466 static int
467 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
468 {
469         uint64_t req_size, size;
470
471         req_size = sizeof(struct vring_desc) * vq->size;
472         size = req_size;
473         vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
474                                                 vq->ring_addrs.desc_user_addr,
475                                                 &size, VHOST_ACCESS_RW);
476         if (!vq->desc || size != req_size)
477                 return -1;
478
479         req_size = sizeof(struct vring_avail);
480         req_size += sizeof(uint16_t) * vq->size;
481         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
482                 req_size += sizeof(uint16_t);
483         size = req_size;
484         vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
485                                                 vq->ring_addrs.avail_user_addr,
486                                                 &size, VHOST_ACCESS_RW);
487         if (!vq->avail || size != req_size)
488                 return -1;
489
490         req_size = sizeof(struct vring_used);
491         req_size += sizeof(struct vring_used_elem) * vq->size;
492         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
493                 req_size += sizeof(uint16_t);
494         size = req_size;
495         vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
496                                                 vq->ring_addrs.used_user_addr,
497                                                 &size, VHOST_ACCESS_RW);
498         if (!vq->used || size != req_size)
499                 return -1;
500
501         return 0;
502 }
503
504 /* Caller should have iotlb_lock read-locked */
505 static int
506 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
507 {
508         uint64_t req_size, size;
509
510         req_size = sizeof(struct vring_packed_desc) * vq->size;
511         size = req_size;
512         vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
513                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
514                                 &size, VHOST_ACCESS_RW);
515         if (!vq->desc_packed || size != req_size)
516                 return -1;
517
518         req_size = sizeof(struct vring_packed_desc_event);
519         size = req_size;
520         vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
521                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
522                                 &size, VHOST_ACCESS_RW);
523         if (!vq->driver_event || size != req_size)
524                 return -1;
525
526         req_size = sizeof(struct vring_packed_desc_event);
527         size = req_size;
528         vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
529                 vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
530                                 &size, VHOST_ACCESS_RW);
531         if (!vq->device_event || size != req_size)
532                 return -1;
533
534         return 0;
535 }
536
537 int
538 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
539 {
540
541         if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
542                 return -1;
543
544         if (vq_is_packed(dev)) {
545                 if (vring_translate_packed(dev, vq) < 0)
546                         return -1;
547         } else {
548                 if (vring_translate_split(dev, vq) < 0)
549                         return -1;
550         }
551
552         if (log_translate(dev, vq) < 0)
553                 return -1;
554
555         vq->access_ok = true;
556
557         return 0;
558 }
559
560 void
561 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
562 {
563         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
564                 vhost_user_iotlb_wr_lock(vq);
565
566         vq->access_ok = false;
567         vq->desc = NULL;
568         vq->avail = NULL;
569         vq->used = NULL;
570         vq->log_guest_addr = 0;
571
572         if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
573                 vhost_user_iotlb_wr_unlock(vq);
574 }
575
576 static void
577 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
578 {
579         struct vhost_virtqueue *vq;
580         int numa_node = SOCKET_ID_ANY;
581
582         if (vring_idx >= VHOST_MAX_VRING) {
583                 VHOST_LOG_CONFIG(ERR, "(%s) failed to init vring, out of bound (%d)\n",
584                                 dev->ifname, vring_idx);
585                 return;
586         }
587
588         vq = dev->virtqueue[vring_idx];
589         if (!vq) {
590                 VHOST_LOG_CONFIG(ERR, "(%s) virtqueue not allocated (%d)\n",
591                                 dev->ifname, vring_idx);
592                 return;
593         }
594
595         memset(vq, 0, sizeof(struct vhost_virtqueue));
596
597         vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
598         vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
599         vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
600
601 #ifdef RTE_LIBRTE_VHOST_NUMA
602         if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
603                 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
604                         dev->ifname, rte_strerror(errno));
605                 numa_node = SOCKET_ID_ANY;
606         }
607 #endif
608         vq->numa_node = numa_node;
609
610         vhost_user_iotlb_init(dev, vring_idx);
611 }
612
613 static void
614 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
615 {
616         struct vhost_virtqueue *vq;
617         int callfd;
618
619         if (vring_idx >= VHOST_MAX_VRING) {
620                 VHOST_LOG_CONFIG(ERR,
621                                 "(%s) failed to reset vring, out of bound (%d)\n",
622                                 dev->ifname, vring_idx);
623                 return;
624         }
625
626         vq = dev->virtqueue[vring_idx];
627         if (!vq) {
628                 VHOST_LOG_CONFIG(ERR, "(%s) failed to reset vring, virtqueue not allocated (%d)\n",
629                                 dev->ifname, vring_idx);
630                 return;
631         }
632
633         callfd = vq->callfd;
634         init_vring_queue(dev, vring_idx);
635         vq->callfd = callfd;
636 }
637
638 int
639 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
640 {
641         struct vhost_virtqueue *vq;
642         uint32_t i;
643
644         /* Also allocate holes, if any, up to requested vring index. */
645         for (i = 0; i <= vring_idx; i++) {
646                 if (dev->virtqueue[i])
647                         continue;
648
649                 vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
650                 if (vq == NULL) {
651                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate memory for vring %u.\n",
652                                         dev->ifname, i);
653                         return -1;
654                 }
655
656                 dev->virtqueue[i] = vq;
657                 init_vring_queue(dev, i);
658                 rte_spinlock_init(&vq->access_lock);
659                 vq->avail_wrap_counter = 1;
660                 vq->used_wrap_counter = 1;
661                 vq->signalled_used_valid = false;
662         }
663
664         dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
665
666         return 0;
667 }
668
669 /*
670  * Reset some variables in device structure, while keeping few
671  * others untouched, such as vid, ifname, nr_vring: they
672  * should be same unless the device is removed.
673  */
674 void
675 reset_device(struct virtio_net *dev)
676 {
677         uint32_t i;
678
679         dev->features = 0;
680         dev->protocol_features = 0;
681         dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
682
683         for (i = 0; i < dev->nr_vring; i++)
684                 reset_vring_queue(dev, i);
685 }
686
687 /*
688  * Invoked when there is a new vhost-user connection established (when
689  * there is a new virtio device being attached).
690  */
691 int
692 vhost_new_device(void)
693 {
694         struct virtio_net *dev;
695         int i;
696
697         pthread_mutex_lock(&vhost_dev_lock);
698         for (i = 0; i < RTE_MAX_VHOST_DEVICE; i++) {
699                 if (vhost_devices[i] == NULL)
700                         break;
701         }
702
703         if (i == RTE_MAX_VHOST_DEVICE) {
704                 VHOST_LOG_CONFIG(ERR, "failed to find a free slot for new device.\n");
705                 pthread_mutex_unlock(&vhost_dev_lock);
706                 return -1;
707         }
708
709         dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
710         if (dev == NULL) {
711                 VHOST_LOG_CONFIG(ERR, "failed to allocate memory for new device.\n");
712                 pthread_mutex_unlock(&vhost_dev_lock);
713                 return -1;
714         }
715
716         vhost_devices[i] = dev;
717         pthread_mutex_unlock(&vhost_dev_lock);
718
719         dev->vid = i;
720         dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
721         dev->slave_req_fd = -1;
722         dev->postcopy_ufd = -1;
723         rte_spinlock_init(&dev->slave_req_lock);
724
725         return i;
726 }
727
728 void
729 vhost_destroy_device_notify(struct virtio_net *dev)
730 {
731         struct rte_vdpa_device *vdpa_dev;
732
733         if (dev->flags & VIRTIO_DEV_RUNNING) {
734                 vdpa_dev = dev->vdpa_dev;
735                 if (vdpa_dev)
736                         vdpa_dev->ops->dev_close(dev->vid);
737                 dev->flags &= ~VIRTIO_DEV_RUNNING;
738                 dev->notify_ops->destroy_device(dev->vid);
739         }
740 }
741
742 /*
743  * Invoked when there is the vhost-user connection is broken (when
744  * the virtio device is being detached).
745  */
746 void
747 vhost_destroy_device(int vid)
748 {
749         struct virtio_net *dev = get_device(vid);
750
751         if (dev == NULL)
752                 return;
753
754         vhost_destroy_device_notify(dev);
755
756         cleanup_device(dev, 1);
757         free_device(dev);
758
759         vhost_devices[vid] = NULL;
760 }
761
762 void
763 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
764 {
765         struct virtio_net *dev = get_device(vid);
766
767         if (dev == NULL)
768                 return;
769
770         dev->vdpa_dev = vdpa_dev;
771 }
772
773 void
774 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
775 {
776         struct virtio_net *dev;
777         unsigned int len;
778
779         dev = get_device(vid);
780         if (dev == NULL)
781                 return;
782
783         len = if_len > sizeof(dev->ifname) ?
784                 sizeof(dev->ifname) : if_len;
785
786         strncpy(dev->ifname, if_name, len);
787         dev->ifname[sizeof(dev->ifname) - 1] = '\0';
788 }
789
790 void
791 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags, bool stats_enabled)
792 {
793         struct virtio_net *dev = get_device(vid);
794
795         if (dev == NULL)
796                 return;
797
798         if (enable)
799                 dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
800         else
801                 dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
802         if (!compliant_ol_flags)
803                 dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
804         else
805                 dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
806         if (stats_enabled)
807                 dev->flags |= VIRTIO_DEV_STATS_ENABLED;
808         else
809                 dev->flags &= ~VIRTIO_DEV_STATS_ENABLED;
810 }
811
812 void
813 vhost_enable_extbuf(int vid)
814 {
815         struct virtio_net *dev = get_device(vid);
816
817         if (dev == NULL)
818                 return;
819
820         dev->extbuf = 1;
821 }
822
823 void
824 vhost_enable_linearbuf(int vid)
825 {
826         struct virtio_net *dev = get_device(vid);
827
828         if (dev == NULL)
829                 return;
830
831         dev->linearbuf = 1;
832 }
833
834 int
835 rte_vhost_get_mtu(int vid, uint16_t *mtu)
836 {
837         struct virtio_net *dev = get_device(vid);
838
839         if (dev == NULL || mtu == NULL)
840                 return -ENODEV;
841
842         if (!(dev->flags & VIRTIO_DEV_READY))
843                 return -EAGAIN;
844
845         if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
846                 return -ENOTSUP;
847
848         *mtu = dev->mtu;
849
850         return 0;
851 }
852
853 int
854 rte_vhost_get_numa_node(int vid)
855 {
856 #ifdef RTE_LIBRTE_VHOST_NUMA
857         struct virtio_net *dev = get_device(vid);
858         int numa_node;
859         int ret;
860
861         if (dev == NULL || numa_available() != 0)
862                 return -1;
863
864         ret = get_mempolicy(&numa_node, NULL, 0, dev,
865                             MPOL_F_NODE | MPOL_F_ADDR);
866         if (ret < 0) {
867                 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
868                         dev->ifname, rte_strerror(errno));
869                 return -1;
870         }
871
872         return numa_node;
873 #else
874         RTE_SET_USED(vid);
875         return -1;
876 #endif
877 }
878
879 uint32_t
880 rte_vhost_get_queue_num(int vid)
881 {
882         struct virtio_net *dev = get_device(vid);
883
884         if (dev == NULL)
885                 return 0;
886
887         return dev->nr_vring / 2;
888 }
889
890 uint16_t
891 rte_vhost_get_vring_num(int vid)
892 {
893         struct virtio_net *dev = get_device(vid);
894
895         if (dev == NULL)
896                 return 0;
897
898         return dev->nr_vring;
899 }
900
901 int
902 rte_vhost_get_ifname(int vid, char *buf, size_t len)
903 {
904         struct virtio_net *dev = get_device(vid);
905
906         if (dev == NULL || buf == NULL)
907                 return -1;
908
909         len = RTE_MIN(len, sizeof(dev->ifname));
910
911         strncpy(buf, dev->ifname, len);
912         buf[len - 1] = '\0';
913
914         return 0;
915 }
916
917 int
918 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
919 {
920         struct virtio_net *dev;
921
922         dev = get_device(vid);
923         if (dev == NULL || features == NULL)
924                 return -1;
925
926         *features = dev->features;
927         return 0;
928 }
929
930 int
931 rte_vhost_get_negotiated_protocol_features(int vid,
932                                            uint64_t *protocol_features)
933 {
934         struct virtio_net *dev;
935
936         dev = get_device(vid);
937         if (dev == NULL || protocol_features == NULL)
938                 return -1;
939
940         *protocol_features = dev->protocol_features;
941         return 0;
942 }
943
944 int
945 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
946 {
947         struct virtio_net *dev;
948         struct rte_vhost_memory *m;
949         size_t size;
950
951         dev = get_device(vid);
952         if (dev == NULL || mem == NULL)
953                 return -1;
954
955         size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
956         m = malloc(sizeof(struct rte_vhost_memory) + size);
957         if (!m)
958                 return -1;
959
960         m->nregions = dev->mem->nregions;
961         memcpy(m->regions, dev->mem->regions, size);
962         *mem = m;
963
964         return 0;
965 }
966
967 int
968 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
969                           struct rte_vhost_vring *vring)
970 {
971         struct virtio_net *dev;
972         struct vhost_virtqueue *vq;
973
974         dev = get_device(vid);
975         if (dev == NULL || vring == NULL)
976                 return -1;
977
978         if (vring_idx >= VHOST_MAX_VRING)
979                 return -1;
980
981         vq = dev->virtqueue[vring_idx];
982         if (!vq)
983                 return -1;
984
985         if (vq_is_packed(dev)) {
986                 vring->desc_packed = vq->desc_packed;
987                 vring->driver_event = vq->driver_event;
988                 vring->device_event = vq->device_event;
989         } else {
990                 vring->desc = vq->desc;
991                 vring->avail = vq->avail;
992                 vring->used = vq->used;
993         }
994         vring->log_guest_addr  = vq->log_guest_addr;
995
996         vring->callfd  = vq->callfd;
997         vring->kickfd  = vq->kickfd;
998         vring->size    = vq->size;
999
1000         return 0;
1001 }
1002
1003 int
1004 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
1005                                   struct rte_vhost_ring_inflight *vring)
1006 {
1007         struct virtio_net *dev;
1008         struct vhost_virtqueue *vq;
1009
1010         dev = get_device(vid);
1011         if (unlikely(!dev))
1012                 return -1;
1013
1014         if (vring_idx >= VHOST_MAX_VRING)
1015                 return -1;
1016
1017         vq = dev->virtqueue[vring_idx];
1018         if (unlikely(!vq))
1019                 return -1;
1020
1021         if (vq_is_packed(dev)) {
1022                 if (unlikely(!vq->inflight_packed))
1023                         return -1;
1024
1025                 vring->inflight_packed = vq->inflight_packed;
1026         } else {
1027                 if (unlikely(!vq->inflight_split))
1028                         return -1;
1029
1030                 vring->inflight_split = vq->inflight_split;
1031         }
1032
1033         vring->resubmit_inflight = vq->resubmit_inflight;
1034
1035         return 0;
1036 }
1037
1038 int
1039 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
1040                                   uint16_t idx)
1041 {
1042         struct vhost_virtqueue *vq;
1043         struct virtio_net *dev;
1044
1045         dev = get_device(vid);
1046         if (unlikely(!dev))
1047                 return -1;
1048
1049         if (unlikely(!(dev->protocol_features &
1050             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1051                 return 0;
1052
1053         if (unlikely(vq_is_packed(dev)))
1054                 return -1;
1055
1056         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1057                 return -1;
1058
1059         vq = dev->virtqueue[vring_idx];
1060         if (unlikely(!vq))
1061                 return -1;
1062
1063         if (unlikely(!vq->inflight_split))
1064                 return -1;
1065
1066         if (unlikely(idx >= vq->size))
1067                 return -1;
1068
1069         vq->inflight_split->desc[idx].counter = vq->global_counter++;
1070         vq->inflight_split->desc[idx].inflight = 1;
1071         return 0;
1072 }
1073
1074 int
1075 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1076                                    uint16_t head, uint16_t last,
1077                                    uint16_t *inflight_entry)
1078 {
1079         struct rte_vhost_inflight_info_packed *inflight_info;
1080         struct virtio_net *dev;
1081         struct vhost_virtqueue *vq;
1082         struct vring_packed_desc *desc;
1083         uint16_t old_free_head, free_head;
1084
1085         dev = get_device(vid);
1086         if (unlikely(!dev))
1087                 return -1;
1088
1089         if (unlikely(!(dev->protocol_features &
1090             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1091                 return 0;
1092
1093         if (unlikely(!vq_is_packed(dev)))
1094                 return -1;
1095
1096         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1097                 return -1;
1098
1099         vq = dev->virtqueue[vring_idx];
1100         if (unlikely(!vq))
1101                 return -1;
1102
1103         inflight_info = vq->inflight_packed;
1104         if (unlikely(!inflight_info))
1105                 return -1;
1106
1107         if (unlikely(head >= vq->size))
1108                 return -1;
1109
1110         desc = vq->desc_packed;
1111         old_free_head = inflight_info->old_free_head;
1112         if (unlikely(old_free_head >= vq->size))
1113                 return -1;
1114
1115         free_head = old_free_head;
1116
1117         /* init header descriptor */
1118         inflight_info->desc[old_free_head].num = 0;
1119         inflight_info->desc[old_free_head].counter = vq->global_counter++;
1120         inflight_info->desc[old_free_head].inflight = 1;
1121
1122         /* save desc entry in flight entry */
1123         while (head != ((last + 1) % vq->size)) {
1124                 inflight_info->desc[old_free_head].num++;
1125                 inflight_info->desc[free_head].addr = desc[head].addr;
1126                 inflight_info->desc[free_head].len = desc[head].len;
1127                 inflight_info->desc[free_head].flags = desc[head].flags;
1128                 inflight_info->desc[free_head].id = desc[head].id;
1129
1130                 inflight_info->desc[old_free_head].last = free_head;
1131                 free_head = inflight_info->desc[free_head].next;
1132                 inflight_info->free_head = free_head;
1133                 head = (head + 1) % vq->size;
1134         }
1135
1136         inflight_info->old_free_head = free_head;
1137         *inflight_entry = old_free_head;
1138
1139         return 0;
1140 }
1141
1142 int
1143 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1144                                   uint16_t last_used_idx, uint16_t idx)
1145 {
1146         struct virtio_net *dev;
1147         struct vhost_virtqueue *vq;
1148
1149         dev = get_device(vid);
1150         if (unlikely(!dev))
1151                 return -1;
1152
1153         if (unlikely(!(dev->protocol_features &
1154             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1155                 return 0;
1156
1157         if (unlikely(vq_is_packed(dev)))
1158                 return -1;
1159
1160         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1161                 return -1;
1162
1163         vq = dev->virtqueue[vring_idx];
1164         if (unlikely(!vq))
1165                 return -1;
1166
1167         if (unlikely(!vq->inflight_split))
1168                 return -1;
1169
1170         if (unlikely(idx >= vq->size))
1171                 return -1;
1172
1173         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1174
1175         vq->inflight_split->desc[idx].inflight = 0;
1176
1177         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1178
1179         vq->inflight_split->used_idx = last_used_idx;
1180         return 0;
1181 }
1182
1183 int
1184 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1185                                    uint16_t head)
1186 {
1187         struct rte_vhost_inflight_info_packed *inflight_info;
1188         struct virtio_net *dev;
1189         struct vhost_virtqueue *vq;
1190
1191         dev = get_device(vid);
1192         if (unlikely(!dev))
1193                 return -1;
1194
1195         if (unlikely(!(dev->protocol_features &
1196             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1197                 return 0;
1198
1199         if (unlikely(!vq_is_packed(dev)))
1200                 return -1;
1201
1202         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1203                 return -1;
1204
1205         vq = dev->virtqueue[vring_idx];
1206         if (unlikely(!vq))
1207                 return -1;
1208
1209         inflight_info = vq->inflight_packed;
1210         if (unlikely(!inflight_info))
1211                 return -1;
1212
1213         if (unlikely(head >= vq->size))
1214                 return -1;
1215
1216         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1217
1218         inflight_info->desc[head].inflight = 0;
1219
1220         rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1221
1222         inflight_info->old_free_head = inflight_info->free_head;
1223         inflight_info->old_used_idx = inflight_info->used_idx;
1224         inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1225
1226         return 0;
1227 }
1228
1229 int
1230 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1231                                      uint16_t idx)
1232 {
1233         struct virtio_net *dev;
1234         struct vhost_virtqueue *vq;
1235
1236         dev = get_device(vid);
1237         if (unlikely(!dev))
1238                 return -1;
1239
1240         if (unlikely(!(dev->protocol_features &
1241             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1242                 return 0;
1243
1244         if (unlikely(vq_is_packed(dev)))
1245                 return -1;
1246
1247         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1248                 return -1;
1249
1250         vq = dev->virtqueue[vring_idx];
1251         if (unlikely(!vq))
1252                 return -1;
1253
1254         if (unlikely(!vq->inflight_split))
1255                 return -1;
1256
1257         if (unlikely(idx >= vq->size))
1258                 return -1;
1259
1260         vq->inflight_split->last_inflight_io = idx;
1261         return 0;
1262 }
1263
1264 int
1265 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1266                                       uint16_t head)
1267 {
1268         struct rte_vhost_inflight_info_packed *inflight_info;
1269         struct virtio_net *dev;
1270         struct vhost_virtqueue *vq;
1271         uint16_t last;
1272
1273         dev = get_device(vid);
1274         if (unlikely(!dev))
1275                 return -1;
1276
1277         if (unlikely(!(dev->protocol_features &
1278             (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1279                 return 0;
1280
1281         if (unlikely(!vq_is_packed(dev)))
1282                 return -1;
1283
1284         if (unlikely(vring_idx >= VHOST_MAX_VRING))
1285                 return -1;
1286
1287         vq = dev->virtqueue[vring_idx];
1288         if (unlikely(!vq))
1289                 return -1;
1290
1291         inflight_info = vq->inflight_packed;
1292         if (unlikely(!inflight_info))
1293                 return -1;
1294
1295         if (unlikely(head >= vq->size))
1296                 return -1;
1297
1298         last = inflight_info->desc[head].last;
1299         if (unlikely(last >= vq->size))
1300                 return -1;
1301
1302         inflight_info->desc[last].next = inflight_info->free_head;
1303         inflight_info->free_head = head;
1304         inflight_info->used_idx += inflight_info->desc[head].num;
1305         if (inflight_info->used_idx >= inflight_info->desc_num) {
1306                 inflight_info->used_idx -= inflight_info->desc_num;
1307                 inflight_info->used_wrap_counter =
1308                         !inflight_info->used_wrap_counter;
1309         }
1310
1311         return 0;
1312 }
1313
1314 int
1315 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1316 {
1317         struct virtio_net *dev;
1318         struct vhost_virtqueue *vq;
1319
1320         dev = get_device(vid);
1321         if (!dev)
1322                 return -1;
1323
1324         if (vring_idx >= VHOST_MAX_VRING)
1325                 return -1;
1326
1327         vq = dev->virtqueue[vring_idx];
1328         if (!vq)
1329                 return -1;
1330
1331         rte_spinlock_lock(&vq->access_lock);
1332
1333         if (vq_is_packed(dev))
1334                 vhost_vring_call_packed(dev, vq);
1335         else
1336                 vhost_vring_call_split(dev, vq);
1337
1338         rte_spinlock_unlock(&vq->access_lock);
1339
1340         return 0;
1341 }
1342
1343 uint16_t
1344 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1345 {
1346         struct virtio_net *dev;
1347         struct vhost_virtqueue *vq;
1348         uint16_t ret = 0;
1349
1350         dev = get_device(vid);
1351         if (!dev)
1352                 return 0;
1353
1354         if (queue_id >= VHOST_MAX_VRING)
1355                 return 0;
1356
1357         vq = dev->virtqueue[queue_id];
1358         if (!vq)
1359                 return 0;
1360
1361         rte_spinlock_lock(&vq->access_lock);
1362
1363         if (unlikely(!vq->enabled || vq->avail == NULL))
1364                 goto out;
1365
1366         ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1367
1368 out:
1369         rte_spinlock_unlock(&vq->access_lock);
1370         return ret;
1371 }
1372
1373 static inline int
1374 vhost_enable_notify_split(struct virtio_net *dev,
1375                 struct vhost_virtqueue *vq, int enable)
1376 {
1377         if (vq->used == NULL)
1378                 return -1;
1379
1380         if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1381                 if (enable)
1382                         vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1383                 else
1384                         vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1385         } else {
1386                 if (enable)
1387                         vhost_avail_event(vq) = vq->last_avail_idx;
1388         }
1389         return 0;
1390 }
1391
1392 static inline int
1393 vhost_enable_notify_packed(struct virtio_net *dev,
1394                 struct vhost_virtqueue *vq, int enable)
1395 {
1396         uint16_t flags;
1397
1398         if (vq->device_event == NULL)
1399                 return -1;
1400
1401         if (!enable) {
1402                 vq->device_event->flags = VRING_EVENT_F_DISABLE;
1403                 return 0;
1404         }
1405
1406         flags = VRING_EVENT_F_ENABLE;
1407         if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1408                 flags = VRING_EVENT_F_DESC;
1409                 vq->device_event->off_wrap = vq->last_avail_idx |
1410                         vq->avail_wrap_counter << 15;
1411         }
1412
1413         rte_atomic_thread_fence(__ATOMIC_RELEASE);
1414
1415         vq->device_event->flags = flags;
1416         return 0;
1417 }
1418
1419 int
1420 vhost_enable_guest_notification(struct virtio_net *dev,
1421                 struct vhost_virtqueue *vq, int enable)
1422 {
1423         /*
1424          * If the virtqueue is not ready yet, it will be applied
1425          * when it will become ready.
1426          */
1427         if (!vq->ready)
1428                 return 0;
1429
1430         if (vq_is_packed(dev))
1431                 return vhost_enable_notify_packed(dev, vq, enable);
1432         else
1433                 return vhost_enable_notify_split(dev, vq, enable);
1434 }
1435
1436 int
1437 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1438 {
1439         struct virtio_net *dev = get_device(vid);
1440         struct vhost_virtqueue *vq;
1441         int ret;
1442
1443         if (!dev)
1444                 return -1;
1445
1446         if (queue_id >= VHOST_MAX_VRING)
1447                 return -1;
1448
1449         vq = dev->virtqueue[queue_id];
1450         if (!vq)
1451                 return -1;
1452
1453         rte_spinlock_lock(&vq->access_lock);
1454
1455         vq->notif_enable = enable;
1456         ret = vhost_enable_guest_notification(dev, vq, enable);
1457
1458         rte_spinlock_unlock(&vq->access_lock);
1459
1460         return ret;
1461 }
1462
1463 void
1464 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1465 {
1466         struct virtio_net *dev = get_device(vid);
1467
1468         if (dev == NULL)
1469                 return;
1470
1471         vhost_log_write(dev, addr, len);
1472 }
1473
1474 void
1475 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1476                          uint64_t offset, uint64_t len)
1477 {
1478         struct virtio_net *dev;
1479         struct vhost_virtqueue *vq;
1480
1481         dev = get_device(vid);
1482         if (dev == NULL)
1483                 return;
1484
1485         if (vring_idx >= VHOST_MAX_VRING)
1486                 return;
1487         vq = dev->virtqueue[vring_idx];
1488         if (!vq)
1489                 return;
1490
1491         vhost_log_used_vring(dev, vq, offset, len);
1492 }
1493
1494 uint32_t
1495 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1496 {
1497         struct virtio_net *dev;
1498         struct vhost_virtqueue *vq;
1499         uint32_t ret = 0;
1500
1501         dev = get_device(vid);
1502         if (dev == NULL)
1503                 return 0;
1504
1505         if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1506                 VHOST_LOG_DATA(ERR, "(%s) %s: invalid virtqueue idx %d.\n",
1507                         dev->ifname, __func__, qid);
1508                 return 0;
1509         }
1510
1511         vq = dev->virtqueue[qid];
1512         if (vq == NULL)
1513                 return 0;
1514
1515         rte_spinlock_lock(&vq->access_lock);
1516
1517         if (unlikely(!vq->enabled || vq->avail == NULL))
1518                 goto out;
1519
1520         ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1521
1522 out:
1523         rte_spinlock_unlock(&vq->access_lock);
1524         return ret;
1525 }
1526
1527 struct rte_vdpa_device *
1528 rte_vhost_get_vdpa_device(int vid)
1529 {
1530         struct virtio_net *dev = get_device(vid);
1531
1532         if (dev == NULL)
1533                 return NULL;
1534
1535         return dev->vdpa_dev;
1536 }
1537
1538 int
1539 rte_vhost_get_log_base(int vid, uint64_t *log_base,
1540                 uint64_t *log_size)
1541 {
1542         struct virtio_net *dev = get_device(vid);
1543
1544         if (dev == NULL || log_base == NULL || log_size == NULL)
1545                 return -1;
1546
1547         *log_base = dev->log_base;
1548         *log_size = dev->log_size;
1549
1550         return 0;
1551 }
1552
1553 int
1554 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1555                 uint16_t *last_avail_idx, uint16_t *last_used_idx)
1556 {
1557         struct vhost_virtqueue *vq;
1558         struct virtio_net *dev = get_device(vid);
1559
1560         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1561                 return -1;
1562
1563         if (queue_id >= VHOST_MAX_VRING)
1564                 return -1;
1565
1566         vq = dev->virtqueue[queue_id];
1567         if (!vq)
1568                 return -1;
1569
1570         if (vq_is_packed(dev)) {
1571                 *last_avail_idx = (vq->avail_wrap_counter << 15) |
1572                                   vq->last_avail_idx;
1573                 *last_used_idx = (vq->used_wrap_counter << 15) |
1574                                  vq->last_used_idx;
1575         } else {
1576                 *last_avail_idx = vq->last_avail_idx;
1577                 *last_used_idx = vq->last_used_idx;
1578         }
1579
1580         return 0;
1581 }
1582
1583 int
1584 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1585                 uint16_t last_avail_idx, uint16_t last_used_idx)
1586 {
1587         struct vhost_virtqueue *vq;
1588         struct virtio_net *dev = get_device(vid);
1589
1590         if (!dev)
1591                 return -1;
1592
1593         if (queue_id >= VHOST_MAX_VRING)
1594                 return -1;
1595
1596         vq = dev->virtqueue[queue_id];
1597         if (!vq)
1598                 return -1;
1599
1600         if (vq_is_packed(dev)) {
1601                 vq->last_avail_idx = last_avail_idx & 0x7fff;
1602                 vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1603                 vq->last_used_idx = last_used_idx & 0x7fff;
1604                 vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1605         } else {
1606                 vq->last_avail_idx = last_avail_idx;
1607                 vq->last_used_idx = last_used_idx;
1608         }
1609
1610         return 0;
1611 }
1612
1613 int
1614 rte_vhost_get_vring_base_from_inflight(int vid,
1615                                        uint16_t queue_id,
1616                                        uint16_t *last_avail_idx,
1617                                        uint16_t *last_used_idx)
1618 {
1619         struct rte_vhost_inflight_info_packed *inflight_info;
1620         struct vhost_virtqueue *vq;
1621         struct virtio_net *dev = get_device(vid);
1622
1623         if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1624                 return -1;
1625
1626         if (queue_id >= VHOST_MAX_VRING)
1627                 return -1;
1628
1629         vq = dev->virtqueue[queue_id];
1630         if (!vq)
1631                 return -1;
1632
1633         if (!vq_is_packed(dev))
1634                 return -1;
1635
1636         inflight_info = vq->inflight_packed;
1637         if (!inflight_info)
1638                 return -1;
1639
1640         *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1641                           inflight_info->old_used_idx;
1642         *last_used_idx = *last_avail_idx;
1643
1644         return 0;
1645 }
1646
1647 int
1648 rte_vhost_extern_callback_register(int vid,
1649                 struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1650 {
1651         struct virtio_net *dev = get_device(vid);
1652
1653         if (dev == NULL || ops == NULL)
1654                 return -1;
1655
1656         dev->extern_ops = *ops;
1657         dev->extern_data = ctx;
1658         return 0;
1659 }
1660
1661 static __rte_always_inline int
1662 async_channel_register(int vid, uint16_t queue_id)
1663 {
1664         struct virtio_net *dev = get_device(vid);
1665         struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
1666         struct vhost_async *async;
1667         int node = vq->numa_node;
1668
1669         if (unlikely(vq->async)) {
1670                 VHOST_LOG_CONFIG(ERR,
1671                                 "(%s) async register failed: already registered (qid: %d)\n",
1672                                 dev->ifname, queue_id);
1673                 return -1;
1674         }
1675
1676         async = rte_zmalloc_socket(NULL, sizeof(struct vhost_async), 0, node);
1677         if (!async) {
1678                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async metadata (qid: %d)\n",
1679                                 dev->ifname, queue_id);
1680                 return -1;
1681         }
1682
1683         async->pkts_info = rte_malloc_socket(NULL, vq->size * sizeof(struct async_inflight_info),
1684                         RTE_CACHE_LINE_SIZE, node);
1685         if (!async->pkts_info) {
1686                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async_pkts_info (qid: %d)\n",
1687                                 dev->ifname, queue_id);
1688                 goto out_free_async;
1689         }
1690
1691         async->pkts_cmpl_flag = rte_zmalloc_socket(NULL, vq->size * sizeof(bool),
1692                         RTE_CACHE_LINE_SIZE, node);
1693         if (!async->pkts_cmpl_flag) {
1694                 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async pkts_cmpl_flag (qid: %d)\n",
1695                                 dev->ifname, queue_id);
1696                 goto out_free_async;
1697         }
1698
1699         if (vq_is_packed(dev)) {
1700                 async->buffers_packed = rte_malloc_socket(NULL,
1701                                 vq->size * sizeof(struct vring_used_elem_packed),
1702                                 RTE_CACHE_LINE_SIZE, node);
1703                 if (!async->buffers_packed) {
1704                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async buffers (qid: %d)\n",
1705                                         dev->ifname, queue_id);
1706                         goto out_free_inflight;
1707                 }
1708         } else {
1709                 async->descs_split = rte_malloc_socket(NULL,
1710                                 vq->size * sizeof(struct vring_used_elem),
1711                                 RTE_CACHE_LINE_SIZE, node);
1712                 if (!async->descs_split) {
1713                         VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async descs (qid: %d)\n",
1714                                         dev->ifname, queue_id);
1715                         goto out_free_inflight;
1716                 }
1717         }
1718
1719         vq->async = async;
1720
1721         return 0;
1722 out_free_inflight:
1723         rte_free(async->pkts_info);
1724 out_free_async:
1725         rte_free(async);
1726
1727         return -1;
1728 }
1729
1730 int
1731 rte_vhost_async_channel_register(int vid, uint16_t queue_id)
1732 {
1733         struct vhost_virtqueue *vq;
1734         struct virtio_net *dev = get_device(vid);
1735         int ret;
1736
1737         if (dev == NULL)
1738                 return -1;
1739
1740         if (queue_id >= VHOST_MAX_VRING)
1741                 return -1;
1742
1743         vq = dev->virtqueue[queue_id];
1744
1745         if (unlikely(vq == NULL || !dev->async_copy))
1746                 return -1;
1747
1748         rte_spinlock_lock(&vq->access_lock);
1749         ret = async_channel_register(vid, queue_id);
1750         rte_spinlock_unlock(&vq->access_lock);
1751
1752         return ret;
1753 }
1754
1755 int
1756 rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id)
1757 {
1758         struct vhost_virtqueue *vq;
1759         struct virtio_net *dev = get_device(vid);
1760
1761         if (dev == NULL)
1762                 return -1;
1763
1764         if (queue_id >= VHOST_MAX_VRING)
1765                 return -1;
1766
1767         vq = dev->virtqueue[queue_id];
1768
1769         if (unlikely(vq == NULL || !dev->async_copy))
1770                 return -1;
1771
1772         if (unlikely(!rte_spinlock_is_locked(&vq->access_lock))) {
1773                 VHOST_LOG_CONFIG(ERR, "(%s) %s() called without access lock taken.\n",
1774                                 dev->ifname, __func__);
1775                 return -1;
1776         }
1777
1778         return async_channel_register(vid, queue_id);
1779 }
1780
1781 int
1782 rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1783 {
1784         struct vhost_virtqueue *vq;
1785         struct virtio_net *dev = get_device(vid);
1786         int ret = -1;
1787
1788         if (dev == NULL)
1789                 return ret;
1790
1791         if (queue_id >= VHOST_MAX_VRING)
1792                 return ret;
1793
1794         vq = dev->virtqueue[queue_id];
1795
1796         if (vq == NULL)
1797                 return ret;
1798
1799         if (!rte_spinlock_trylock(&vq->access_lock)) {
1800                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel, virtqueue busy.\n",
1801                                 dev->ifname);
1802                 return ret;
1803         }
1804
1805         if (!vq->async) {
1806                 ret = 0;
1807         } else if (vq->async->pkts_inflight_n) {
1808                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1809                 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1810                         dev->ifname);
1811         } else {
1812                 vhost_free_async_mem(vq);
1813                 ret = 0;
1814         }
1815
1816         rte_spinlock_unlock(&vq->access_lock);
1817
1818         return ret;
1819 }
1820
1821 int
1822 rte_vhost_async_channel_unregister_thread_unsafe(int vid, uint16_t queue_id)
1823 {
1824         struct vhost_virtqueue *vq;
1825         struct virtio_net *dev = get_device(vid);
1826
1827         if (dev == NULL)
1828                 return -1;
1829
1830         if (queue_id >= VHOST_MAX_VRING)
1831                 return -1;
1832
1833         vq = dev->virtqueue[queue_id];
1834
1835         if (vq == NULL)
1836                 return -1;
1837
1838         if (unlikely(!rte_spinlock_is_locked(&vq->access_lock))) {
1839                 VHOST_LOG_CONFIG(ERR, "(%s) %s() called without access lock taken.\n",
1840                                 dev->ifname, __func__);
1841                 return -1;
1842         }
1843
1844         if (!vq->async)
1845                 return 0;
1846
1847         if (vq->async->pkts_inflight_n) {
1848                 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1849                 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1850                         dev->ifname);
1851                 return -1;
1852         }
1853
1854         vhost_free_async_mem(vq);
1855
1856         return 0;
1857 }
1858
1859 int
1860 rte_vhost_async_dma_configure(int16_t dma_id, uint16_t vchan_id)
1861 {
1862         struct rte_dma_info info;
1863         void *pkts_cmpl_flag_addr;
1864         uint16_t max_desc;
1865
1866         if (!rte_dma_is_valid(dma_id)) {
1867                 VHOST_LOG_CONFIG(ERR, "DMA %d is not found.\n", dma_id);
1868                 return -1;
1869         }
1870
1871         rte_dma_info_get(dma_id, &info);
1872         if (vchan_id >= info.max_vchans) {
1873                 VHOST_LOG_CONFIG(ERR, "Invalid DMA %d vChannel %u.\n", dma_id, vchan_id);
1874                 return -1;
1875         }
1876
1877         if (!dma_copy_track[dma_id].vchans) {
1878                 struct async_dma_vchan_info *vchans;
1879
1880                 vchans = rte_zmalloc(NULL, sizeof(struct async_dma_vchan_info) * info.max_vchans,
1881                                 RTE_CACHE_LINE_SIZE);
1882                 if (vchans == NULL) {
1883                         VHOST_LOG_CONFIG(ERR, "Failed to allocate vchans for DMA %d vChannel %u.\n",
1884                                         dma_id, vchan_id);
1885                         return -1;
1886                 }
1887
1888                 dma_copy_track[dma_id].vchans = vchans;
1889         }
1890
1891         if (dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr) {
1892                 VHOST_LOG_CONFIG(INFO, "DMA %d vChannel %u already registered.\n", dma_id,
1893                                 vchan_id);
1894                 return 0;
1895         }
1896
1897         max_desc = info.max_desc;
1898         if (!rte_is_power_of_2(max_desc))
1899                 max_desc = rte_align32pow2(max_desc);
1900
1901         pkts_cmpl_flag_addr = rte_zmalloc(NULL, sizeof(bool *) * max_desc, RTE_CACHE_LINE_SIZE);
1902         if (!pkts_cmpl_flag_addr) {
1903                 VHOST_LOG_CONFIG(ERR, "Failed to allocate pkts_cmpl_flag_addr for DMA %d "
1904                                 "vChannel %u.\n", dma_id, vchan_id);
1905
1906                 if (dma_copy_track[dma_id].nr_vchans == 0) {
1907                         rte_free(dma_copy_track[dma_id].vchans);
1908                         dma_copy_track[dma_id].vchans = NULL;
1909                 }
1910                 return -1;
1911         }
1912
1913         dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr = pkts_cmpl_flag_addr;
1914         dma_copy_track[dma_id].vchans[vchan_id].ring_size = max_desc;
1915         dma_copy_track[dma_id].vchans[vchan_id].ring_mask = max_desc - 1;
1916         dma_copy_track[dma_id].nr_vchans++;
1917
1918         return 0;
1919 }
1920
1921 int
1922 rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
1923 {
1924         struct vhost_virtqueue *vq;
1925         struct virtio_net *dev = get_device(vid);
1926         int ret = -1;
1927
1928         if (dev == NULL)
1929                 return ret;
1930
1931         if (queue_id >= VHOST_MAX_VRING)
1932                 return ret;
1933
1934         vq = dev->virtqueue[queue_id];
1935
1936         if (vq == NULL)
1937                 return ret;
1938
1939         if (!rte_spinlock_trylock(&vq->access_lock)) {
1940                 VHOST_LOG_CONFIG(DEBUG,
1941                         "(%s) failed to check in-flight packets. virtqueue busy.\n",
1942                         dev->ifname);
1943                 return ret;
1944         }
1945
1946         if (vq->async)
1947                 ret = vq->async->pkts_inflight_n;
1948
1949         rte_spinlock_unlock(&vq->access_lock);
1950
1951         return ret;
1952 }
1953
1954 int
1955 rte_vhost_async_get_inflight_thread_unsafe(int vid, uint16_t queue_id)
1956 {
1957         struct vhost_virtqueue *vq;
1958         struct virtio_net *dev = get_device(vid);
1959         int ret = -1;
1960
1961         if (dev == NULL)
1962                 return ret;
1963
1964         if (queue_id >= VHOST_MAX_VRING)
1965                 return ret;
1966
1967         vq = dev->virtqueue[queue_id];
1968
1969         if (vq == NULL)
1970                 return ret;
1971
1972         if (unlikely(!rte_spinlock_is_locked(&vq->access_lock))) {
1973                 VHOST_LOG_CONFIG(ERR, "(%s) %s() called without access lock taken.\n",
1974                                 dev->ifname, __func__);
1975                 return -1;
1976         }
1977
1978         if (!vq->async)
1979                 return ret;
1980
1981         ret = vq->async->pkts_inflight_n;
1982
1983         return ret;
1984 }
1985
1986 int
1987 rte_vhost_get_monitor_addr(int vid, uint16_t queue_id,
1988                 struct rte_vhost_power_monitor_cond *pmc)
1989 {
1990         struct virtio_net *dev = get_device(vid);
1991         struct vhost_virtqueue *vq;
1992
1993         if (dev == NULL)
1994                 return -1;
1995         if (queue_id >= VHOST_MAX_VRING)
1996                 return -1;
1997
1998         vq = dev->virtqueue[queue_id];
1999         if (vq == NULL)
2000                 return -1;
2001
2002         if (vq_is_packed(dev)) {
2003                 struct vring_packed_desc *desc;
2004                 desc = vq->desc_packed;
2005                 pmc->addr = &desc[vq->last_avail_idx].flags;
2006                 if (vq->avail_wrap_counter)
2007                         pmc->val = VRING_DESC_F_AVAIL;
2008                 else
2009                         pmc->val = VRING_DESC_F_USED;
2010                 pmc->mask = VRING_DESC_F_AVAIL | VRING_DESC_F_USED;
2011                 pmc->size = sizeof(desc[vq->last_avail_idx].flags);
2012                 pmc->match = 1;
2013         } else {
2014                 pmc->addr = &vq->avail->idx;
2015                 pmc->val = vq->last_avail_idx & (vq->size - 1);
2016                 pmc->mask = vq->size - 1;
2017                 pmc->size = sizeof(vq->avail->idx);
2018                 pmc->match = 0;
2019         }
2020
2021         return 0;
2022 }
2023
2024
2025 int
2026 rte_vhost_vring_stats_get_names(int vid, uint16_t queue_id,
2027                 struct rte_vhost_stat_name *name, unsigned int size)
2028 {
2029         struct virtio_net *dev = get_device(vid);
2030         unsigned int i;
2031
2032         if (dev == NULL)
2033                 return -1;
2034
2035         if (queue_id >= dev->nr_vring)
2036                 return -1;
2037
2038         if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
2039                 return -1;
2040
2041         if (name == NULL || size < VHOST_NB_VQ_STATS)
2042                 return VHOST_NB_VQ_STATS;
2043
2044         for (i = 0; i < VHOST_NB_VQ_STATS; i++)
2045                 snprintf(name[i].name, sizeof(name[i].name), "%s_q%u_%s",
2046                                 (queue_id & 1) ? "rx" : "tx",
2047                                 queue_id / 2, vhost_vq_stat_strings[i].name);
2048
2049         return VHOST_NB_VQ_STATS;
2050 }
2051
2052 int
2053 rte_vhost_vring_stats_get(int vid, uint16_t queue_id,
2054                 struct rte_vhost_stat *stats, unsigned int n)
2055 {
2056         struct virtio_net *dev = get_device(vid);
2057         struct vhost_virtqueue *vq;
2058         unsigned int i;
2059
2060         if (dev == NULL)
2061                 return -1;
2062
2063         if (queue_id >= dev->nr_vring)
2064                 return -1;
2065
2066         if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
2067                 return -1;
2068
2069         if (stats == NULL || n < VHOST_NB_VQ_STATS)
2070                 return VHOST_NB_VQ_STATS;
2071
2072         vq = dev->virtqueue[queue_id];
2073
2074         rte_spinlock_lock(&vq->access_lock);
2075         for (i = 0; i < VHOST_NB_VQ_STATS; i++) {
2076                 stats[i].value =
2077                         *(uint64_t *)(((char *)vq) + vhost_vq_stat_strings[i].offset);
2078                 stats[i].id = i;
2079         }
2080         rte_spinlock_unlock(&vq->access_lock);
2081
2082         return VHOST_NB_VQ_STATS;
2083 }
2084
2085 int rte_vhost_vring_stats_reset(int vid, uint16_t queue_id)
2086 {
2087         struct virtio_net *dev = get_device(vid);
2088         struct vhost_virtqueue *vq;
2089
2090         if (dev == NULL)
2091                 return -1;
2092
2093         if (queue_id >= dev->nr_vring)
2094                 return -1;
2095
2096         if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
2097                 return -1;
2098
2099         vq = dev->virtqueue[queue_id];
2100
2101         rte_spinlock_lock(&vq->access_lock);
2102         memset(&vq->stats, 0, sizeof(vq->stats));
2103         rte_spinlock_unlock(&vq->access_lock);
2104
2105         return 0;
2106 }
2107
2108 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
2109 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);