a891c56771f57769501725f53c323738a4221eb4
[dpdk.git] / examples / vhost_xen / vhost_monitor.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdlib.h>
35 #include <stdio.h>
36 #include <dirent.h>
37 #include <unistd.h>
38 #include <sys/eventfd.h>
39 #include <sys/ioctl.h>
40 #include <sys/mman.h>
41 #include <xen/xen-compat.h>
42 #if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
43 #include <xs.h>
44 #else
45 #include <xenstore.h>
46 #endif
47 #include <linux/virtio_ring.h>
48 #include <linux/virtio_pci.h>
49 #include <linux/virtio_net.h>
50
51 #include <rte_ethdev.h>
52 #include <rte_log.h>
53 #include <rte_malloc.h>
54 #include <rte_string_fns.h>
55
56 #include "virtio-net.h"
57 #include "xen_vhost.h"
58
59 struct virtio_watch {
60         struct xs_handle *xs;
61         int watch_fd;
62 };
63
64
65 /* device ops to add/remove device to/from data core. */
66 static struct virtio_net_device_ops const *notify_ops;
67
68 /* root address of the linked list in the configuration core. */
69 static struct virtio_net_config_ll *ll_root = NULL;
70
71 /* root address of VM. */
72 static struct xen_guestlist guest_root;
73
74 static struct virtio_watch watch;
75
76 static void
77 vq_vring_init(struct vhost_virtqueue *vq, unsigned int num, uint8_t *p,
78         unsigned long align)
79 {
80         vq->size = num;
81         vq->desc = (struct vring_desc *) p;
82         vq->avail = (struct vring_avail *) (p +
83                 num * sizeof(struct vring_desc));
84         vq->used = (void *)
85                 RTE_ALIGN_CEIL( (uintptr_t)(&vq->avail->ring[num]), align);
86
87 }
88
89 static int
90 init_watch(void)
91 {
92         struct xs_handle *xs;
93         int ret;
94         int fd;
95
96         /* get a connection to the daemon */
97         xs = xs_daemon_open();
98         if (xs == NULL) {
99                 RTE_LOG(ERR, XENHOST, "xs_daemon_open failed\n");
100                 return (-1);
101         }
102
103         ret = xs_watch(xs, "/local/domain", "mytoken");
104         if (ret == 0) {
105                 RTE_LOG(ERR, XENHOST, "%s: xs_watch failed\n", __func__);
106                 xs_daemon_close(xs);
107                 return (-1);
108         }
109
110         /* We are notified of read availability on the watch via the file descriptor. */
111         fd = xs_fileno(xs);
112         watch.xs = xs;
113         watch.watch_fd = fd;
114
115         TAILQ_INIT(&guest_root);
116         return 0;
117 }
118
119 static struct xen_guest *
120 get_xen_guest(int dom_id)
121 {
122         struct xen_guest *guest = NULL;
123
124         TAILQ_FOREACH(guest, &guest_root, next) {
125                 if(guest->dom_id == dom_id)
126                         return guest;
127         }
128
129         return (NULL);
130 }
131
132
133 static struct xen_guest *
134 add_xen_guest(int32_t dom_id)
135 {
136         struct xen_guest *guest = NULL;
137
138         if ((guest = get_xen_guest(dom_id)) != NULL)
139                 return guest;
140
141         guest = calloc(1, sizeof(struct xen_guest));
142         if (guest) {
143                 RTE_LOG(ERR, XENHOST, "  %s: return newly created guest with %d rings\n", __func__, guest->vring_num);
144                 TAILQ_INSERT_TAIL(&guest_root, guest, next);
145                 guest->dom_id = dom_id;
146         }
147
148         return guest;
149 }
150
151 static void
152 cleanup_device(struct virtio_net_config_ll *ll_dev)
153 {
154         if (ll_dev == NULL)
155                 return;
156         if (ll_dev->dev.virtqueue_rx) {
157                 rte_free(ll_dev->dev.virtqueue_rx);
158                 ll_dev->dev.virtqueue_rx = NULL;
159         }
160         if (ll_dev->dev.virtqueue_tx) {
161                 rte_free(ll_dev->dev.virtqueue_tx);
162                 ll_dev->dev.virtqueue_tx = NULL;
163         }
164         free(ll_dev);
165 }
166
167 /*
168  * Add entry containing a device to the device configuration linked list.
169  */
170 static void
171 add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
172 {
173         struct virtio_net_config_ll *ll_dev = ll_root;
174
175         /* If ll_dev == NULL then this is the first device so go to else */
176         if (ll_dev) {
177                 /* If the 1st device_id != 0 then we insert our device here. */
178                 if (ll_dev->dev.device_fh != 0) {
179                         new_ll_dev->dev.device_fh = 0;
180                         new_ll_dev->next = ll_dev;
181                         ll_root = new_ll_dev;
182                 } else {
183                         /* increment through the ll until we find un unused device_id,
184                          * insert the device at that entry
185                          */
186                         while ((ll_dev->next != NULL) && (ll_dev->dev.device_fh == (ll_dev->next->dev.device_fh - 1)))
187                                 ll_dev = ll_dev->next;
188
189                         new_ll_dev->dev.device_fh = ll_dev->dev.device_fh + 1;
190                         new_ll_dev->next = ll_dev->next;
191                         ll_dev->next = new_ll_dev;
192                 }
193         } else {
194                 ll_root = new_ll_dev;
195                 ll_root->dev.device_fh = 0;
196         }
197 }
198
199
200 /*
201  * Remove an entry from the device configuration linked list.
202  */
203 static struct virtio_net_config_ll *
204 rm_config_ll_entry(struct virtio_net_config_ll *ll_dev, struct virtio_net_config_ll *ll_dev_last)
205 {
206         /* First remove the device and then clean it up. */
207         if (ll_dev == ll_root) {
208                 ll_root = ll_dev->next;
209                 cleanup_device(ll_dev);
210                 return ll_root;
211         } else {
212                 ll_dev_last->next = ll_dev->next;
213                 cleanup_device(ll_dev);
214                 return ll_dev_last->next;
215         }
216 }
217
218 /*
219  * Retrieves an entry from the devices configuration linked list.
220  */
221 static struct virtio_net_config_ll *
222 get_config_ll_entry(unsigned int virtio_idx, unsigned int dom_id)
223 {
224         struct virtio_net_config_ll *ll_dev = ll_root;
225
226         /* Loop through linked list until the dom_id is found. */
227         while (ll_dev != NULL) {
228                 if (ll_dev->dev.dom_id == dom_id && ll_dev->dev.virtio_idx == virtio_idx)
229                         return ll_dev;
230                 ll_dev = ll_dev->next;
231         }
232
233         return NULL;
234 }
235
236 /*
237  * Initialise all variables in device structure.
238  */
239 static void
240 init_dev(struct virtio_net *dev)
241 {
242         RTE_SET_USED(dev);
243 }
244
245
246 static struct
247 virtio_net_config_ll *new_device(unsigned int virtio_idx, struct xen_guest *guest)
248 {
249         struct virtio_net_config_ll *new_ll_dev;
250         struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
251         size_t size, vq_ring_size, vq_size = VQ_DESC_NUM;
252         void *vq_ring_virt_mem;
253         uint64_t gpa;
254         uint32_t i;
255
256         /* Setup device and virtqueues. */
257         new_ll_dev   = calloc(1, sizeof(struct virtio_net_config_ll));
258         virtqueue_rx = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), RTE_CACHE_LINE_SIZE);
259         virtqueue_tx = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), RTE_CACHE_LINE_SIZE);
260         if (new_ll_dev == NULL || virtqueue_rx == NULL || virtqueue_tx == NULL)
261                 goto err;
262
263         new_ll_dev->dev.virtqueue_rx = virtqueue_rx;
264         new_ll_dev->dev.virtqueue_tx = virtqueue_tx;
265         new_ll_dev->dev.dom_id       = guest->dom_id;
266         new_ll_dev->dev.virtio_idx   = virtio_idx;
267         /* Initialise device and virtqueues. */
268         init_dev(&new_ll_dev->dev);
269
270         size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
271         vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
272         (void)vq_ring_size;
273
274         vq_ring_virt_mem = guest->vring[virtio_idx].rxvring_addr;
275         vq_vring_init(virtqueue_rx, vq_size, vq_ring_virt_mem, VIRTIO_PCI_VRING_ALIGN);
276         virtqueue_rx->size = vq_size;
277         virtqueue_rx->vhost_hlen = sizeof(struct virtio_net_hdr);
278
279         vq_ring_virt_mem = guest->vring[virtio_idx].txvring_addr;
280         vq_vring_init(virtqueue_tx, vq_size, vq_ring_virt_mem, VIRTIO_PCI_VRING_ALIGN);
281         virtqueue_tx->size = vq_size;
282         memcpy(&new_ll_dev->dev.mac_address, &guest->vring[virtio_idx].addr, sizeof(struct ether_addr));
283
284         /* virtio_memory has to be one per domid */
285         new_ll_dev->dev.mem = malloc(sizeof(struct virtio_memory) + sizeof(struct virtio_memory_regions) * MAX_XENVIRT_MEMPOOL);
286         new_ll_dev->dev.mem->nregions = guest->pool_num;
287         for (i = 0; i < guest->pool_num; i++) {
288                 gpa = new_ll_dev->dev.mem->regions[i].guest_phys_address = (uint64_t)guest->mempool[i].gva;
289                 new_ll_dev->dev.mem->regions[i].guest_phys_address_end = gpa + guest->mempool[i].mempfn_num * getpagesize();
290                 new_ll_dev->dev.mem->regions[i].address_offset = (uint64_t)guest->mempool[i].hva - gpa;
291         }
292
293         new_ll_dev->next = NULL;
294
295         /* Add entry to device configuration linked list. */
296         add_config_ll_entry(new_ll_dev);
297         return new_ll_dev;
298 err:
299         if (new_ll_dev)
300                 free(new_ll_dev);
301         rte_free(virtqueue_rx);
302         rte_free(virtqueue_tx);
303
304         return NULL;
305 }
306
307 static void
308 destroy_guest(struct xen_guest *guest)
309 {
310         uint32_t i;
311
312         for (i = 0; i < guest->vring_num; i++)
313                 cleanup_vring(&guest->vring[i]);
314         /* clean mempool */
315         for (i = 0; i < guest->pool_num; i++)
316                 cleanup_mempool(&guest->mempool[i]);
317         free(guest);
318
319         return;
320 }
321
322 /*
323  * This function will cleanup the device and remove it from device configuration linked list.
324  */
325 static void
326 destroy_device(unsigned int virtio_idx, unsigned int dom_id)
327 {
328         struct virtio_net_config_ll *ll_dev_cur_ctx, *ll_dev_last = NULL;
329         struct virtio_net_config_ll *ll_dev_cur = ll_root;
330
331         /* clean virtio device */
332         struct xen_guest *guest = NULL;
333         guest = get_xen_guest(dom_id);
334         if (guest == NULL)
335                 return;
336
337         /* Find the linked list entry for the device to be removed. */
338         ll_dev_cur_ctx = get_config_ll_entry(virtio_idx, dom_id);
339         while (ll_dev_cur != NULL) {
340                 /* If the device is found or a device that doesn't exist is found then it is removed. */
341                 if  (ll_dev_cur == ll_dev_cur_ctx) {
342                         if ((ll_dev_cur->dev.flags & VIRTIO_DEV_RUNNING))
343                                 notify_ops->destroy_device(&(ll_dev_cur->dev));
344                         ll_dev_cur = rm_config_ll_entry(ll_dev_cur, ll_dev_last);
345                 } else {
346                         ll_dev_last = ll_dev_cur;
347                         ll_dev_cur = ll_dev_cur->next;
348                 }
349         }
350         RTE_LOG(INFO, XENHOST, "  %s guest:%p vring:%p rxvring:%p txvring:%p flag:%p\n",
351                 __func__, guest, &guest->vring[virtio_idx], guest->vring[virtio_idx].rxvring_addr, guest->vring[virtio_idx].txvring_addr, guest->vring[virtio_idx].flag);
352         cleanup_vring(&guest->vring[virtio_idx]);
353         guest->vring[virtio_idx].removed = 1;
354         guest->vring_num -= 1;
355 }
356
357
358
359
360 static void
361 watch_unmap_event(void)
362 {
363         int i;
364         struct xen_guest *guest  = NULL;
365         bool remove_request;
366
367         TAILQ_FOREACH(guest, &guest_root, next) {
368                 for (i = 0; i < MAX_VIRTIO; i++) {
369                         if (guest->vring[i].dom_id && guest->vring[i].removed == 0 && *guest->vring[i].flag == 0) {
370                                 RTE_LOG(INFO, XENHOST, "\n\n");
371                                 RTE_LOG(INFO, XENHOST, "  #####%s:  (%d, %d) to be removed\n",
372                                         __func__,
373                                         guest->vring[i].dom_id,
374                                         i);
375                                 destroy_device(i, guest->dom_id);
376                                 RTE_LOG(INFO, XENHOST, "  %s: DOM %u, vring num: %d\n",
377                                         __func__,
378                                         guest->dom_id,
379                                         guest->vring_num);
380                         }
381                 }
382         }
383
384 _find_next_remove:
385         guest = NULL;
386         remove_request = false;
387         TAILQ_FOREACH(guest, &guest_root, next) {
388                 if (guest->vring_num == 0) {
389                         remove_request = true;
390                         break;
391                 }
392         }
393         if (remove_request == true) {
394                 TAILQ_REMOVE(&guest_root, guest, next);
395                 RTE_LOG(INFO, XENHOST, "  #####%s: destroy guest (%d)\n", __func__, guest->dom_id);
396                 destroy_guest(guest);
397                 goto _find_next_remove;
398         }
399         return;
400 }
401
402 /*
403  * OK, if the guest starts first, it is ok.
404  * if host starts first, it is ok.
405  * if guest starts, and has run for sometime, and host stops and restarts,
406  * then last_used_idx  0? how to solve this. */
407
408 static void virtio_init(void)
409 {
410         uint32_t len, e_num;
411         uint32_t i,j;
412         char **dom;
413         char *status;
414         int dom_id;
415         char path[PATH_MAX];
416         char node[PATH_MAX];
417         xs_transaction_t th;
418         struct xen_guest *guest;
419         struct virtio_net_config_ll *net_config;
420         char *end;
421         int val;
422
423         /* init env for watch the node */
424         if (init_watch() < 0)
425                 return;
426
427         dom = xs_directory(watch.xs, XBT_NULL, "/local/domain", &e_num);
428
429         for (i = 0; i < e_num; i++) {
430                 errno = 0;
431                 dom_id = strtol(dom[i], &end, 0);
432                 if (errno != 0 || end == NULL || dom_id == 0)
433                         continue;
434
435                 for (j = 0; j < RTE_MAX_ETHPORTS; j++) {
436                         snprintf(node, PATH_MAX, "%s%d", VIRTIO_START, j);
437                         snprintf(path, PATH_MAX, XEN_VM_NODE_FMT,
438                                         dom_id, node);
439
440                         th = xs_transaction_start(watch.xs);
441                         status = xs_read(watch.xs, th, path, &len);
442                         xs_transaction_end(watch.xs, th, false);
443
444                         if (status == NULL)
445                                 break;
446
447                         /* if there's any valid virtio device */
448                         errno = 0;
449                         val = strtol(status, &end, 0);
450                         if (errno != 0 || end == NULL || dom_id == 0)
451                                 val = 0;
452                         if (val == 1) {
453                                 guest = add_xen_guest(dom_id);
454                                 if (guest == NULL)
455                                         continue;
456                                 RTE_LOG(INFO, XENHOST, "  there's a new virtio existed, new a virtio device\n\n");
457
458                                 RTE_LOG(INFO, XENHOST, "  parse_vringnode dom_id %d virtioidx %d\n",dom_id,j);
459                                 if (parse_vringnode(guest, j)) {
460                                         RTE_LOG(ERR, XENHOST, "  there is invalid information in xenstore\n");
461                                         TAILQ_REMOVE(&guest_root, guest, next);
462                                         destroy_guest(guest);
463
464                                         continue;
465                                 }
466
467                                 /*if pool_num > 0, then mempool has already been parsed*/
468                                 if (guest->pool_num == 0 && parse_mempoolnode(guest)) {
469                                         RTE_LOG(ERR, XENHOST, "  there is error information in xenstore\n");
470                                         TAILQ_REMOVE(&guest_root, guest, next);
471                                         destroy_guest(guest);
472                                         continue;
473                                 }
474
475                                 net_config = new_device(j, guest);
476                                 /* every thing is ready now, added into data core */
477                                 notify_ops->new_device(&net_config->dev);
478                         }
479                 }
480         }
481
482         free(dom);
483         return;
484 }
485
486 void
487 virtio_monitor_loop(void)
488 {
489         char **vec;
490         xs_transaction_t th;
491         char *buf;
492         unsigned int len;
493         unsigned int dom_id;
494         uint32_t virtio_idx;
495         struct xen_guest *guest;
496         struct virtio_net_config_ll *net_config;
497         enum fieldnames {
498                 FLD_NULL = 0,
499                 FLD_LOCAL,
500                 FLD_DOMAIN,
501                 FLD_ID,
502                 FLD_CONTROL,
503                 FLD_DPDK,
504                 FLD_NODE,
505                 _NUM_FLD
506         };
507         char *str_fld[_NUM_FLD];
508         char *str;
509         char *end;
510
511         virtio_init();
512         while (1) {
513                 watch_unmap_event();
514
515                 usleep(50);
516                 vec = xs_check_watch(watch.xs);
517
518                 if (vec == NULL)
519                         continue;
520
521                 th = xs_transaction_start(watch.xs);
522
523                 buf = xs_read(watch.xs, th, vec[XS_WATCH_PATH],&len);
524                 xs_transaction_end(watch.xs, th, false);
525
526                 if (buf) {
527                         /* theres' some node for vhost existed */
528                         if (rte_strsplit(vec[XS_WATCH_PATH], strnlen(vec[XS_WATCH_PATH], PATH_MAX),
529                                                 str_fld, _NUM_FLD, '/') == _NUM_FLD) {
530                                 if (strstr(str_fld[FLD_NODE], VIRTIO_START)) {
531                                         errno = 0;
532                                         str = str_fld[FLD_ID];
533                                         dom_id = strtoul(str, &end, 0);
534                                         if (errno != 0 || end == NULL || end == str ) {
535                                                 RTE_LOG(INFO, XENHOST, "invalid domain id\n");
536                                                 continue;
537                                         }
538
539                                         errno = 0;
540                                         str = str_fld[FLD_NODE] + sizeof(VIRTIO_START) - 1;
541                                         virtio_idx = strtoul(str, &end, 0);
542                                         if (errno != 0 || end == NULL || end == str
543                                                         || virtio_idx > MAX_VIRTIO) {
544                                                 RTE_LOG(INFO, XENHOST, "invalid virtio idx\n");
545                                                 continue;
546                                         }
547                                         RTE_LOG(INFO, XENHOST, "  #####virtio dev (%d, %d) is started\n", dom_id, virtio_idx);
548
549                                         guest = add_xen_guest(dom_id);
550                                         if (guest == NULL)
551                                                 continue;
552                                         guest->dom_id = dom_id;
553                                         if (parse_vringnode(guest, virtio_idx)) {
554                                                 RTE_LOG(ERR, XENHOST, "  there is invalid information in xenstore\n");
555                                                 /*guest newly created? guest existed ?*/
556                                                 TAILQ_REMOVE(&guest_root, guest, next);
557                                                 destroy_guest(guest);
558                                                 continue;
559                                         }
560                                         /*if pool_num > 0, then mempool has already been parsed*/
561                                         if (guest->pool_num == 0 && parse_mempoolnode(guest)) {
562                                                 RTE_LOG(ERR, XENHOST, "  there is error information in xenstore\n");
563                                                 TAILQ_REMOVE(&guest_root, guest, next);
564                                                 destroy_guest(guest);
565                                                 continue;
566                                         }
567
568
569                                         net_config = new_device(virtio_idx, guest);
570                                         RTE_LOG(INFO, XENHOST, "  Add to dataplane core\n");
571                                         notify_ops->new_device(&net_config->dev);
572
573                                 }
574                         }
575                 }
576
577                 free(vec);
578         }
579         return;
580 }
581
582 /*
583  * Register ops so that we can add/remove device to data core.
584  */
585 int
586 init_virtio_xen(struct virtio_net_device_ops const *const ops)
587 {
588         notify_ops = ops;
589         if (xenhost_init())
590                 return -1;
591         return 0;
592 }