doc: announce change to make DPDK IOVA aware
[dpdk.git] / examples / vhost_xen / vhost_monitor.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdlib.h>
35 #include <stdio.h>
36 #include <dirent.h>
37 #include <unistd.h>
38 #include <sys/eventfd.h>
39 #include <sys/ioctl.h>
40 #include <sys/mman.h>
41 #include <xen/xen-compat.h>
42 #if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
43 #include <xs.h>
44 #else
45 #include <xenstore.h>
46 #endif
47 #include <linux/virtio_ring.h>
48 #include <linux/virtio_pci.h>
49 #include <linux/virtio_net.h>
50
51 #include <rte_ethdev.h>
52 #include <rte_log.h>
53 #include <rte_malloc.h>
54 #include <rte_string_fns.h>
55
56 #include "virtio-net.h"
57 #include "xen_vhost.h"
58
59 struct virtio_watch {
60         struct xs_handle *xs;
61         int watch_fd;
62 };
63
64
65 /* device ops to add/remove device to/from data core. */
66 static struct virtio_net_device_ops const *notify_ops;
67
68 /* root address of the linked list in the configuration core. */
69 static struct virtio_net_config_ll *ll_root = NULL;
70
71 /* root address of VM. */
72 static struct xen_guestlist guest_root;
73
74 static struct virtio_watch watch;
75
76 static void
77 vq_vring_init(struct vhost_virtqueue *vq, unsigned int num, uint8_t *p,
78         unsigned long align)
79 {
80         vq->size = num;
81         vq->desc = (struct vring_desc *) p;
82         vq->avail = (struct vring_avail *) (p +
83                 num * sizeof(struct vring_desc));
84         vq->used = (void *)
85                 RTE_ALIGN_CEIL( (uintptr_t)(&vq->avail->ring[num]), align);
86
87 }
88
89 static int
90 init_watch(void)
91 {
92         struct xs_handle *xs;
93         int ret;
94         int fd;
95
96         /* get a connection to the daemon */
97         xs = xs_daemon_open();
98         if (xs == NULL) {
99                 RTE_LOG(ERR, XENHOST, "xs_daemon_open failed\n");
100                 return -1;
101         }
102
103         ret = xs_watch(xs, "/local/domain", "mytoken");
104         if (ret == 0) {
105                 RTE_LOG(ERR, XENHOST, "%s: xs_watch failed\n", __func__);
106                 xs_daemon_close(xs);
107                 return -1;
108         }
109
110         /* We are notified of read availability on the watch via the file descriptor. */
111         fd = xs_fileno(xs);
112         watch.xs = xs;
113         watch.watch_fd = fd;
114
115         TAILQ_INIT(&guest_root);
116         return 0;
117 }
118
119 static struct xen_guest *
120 get_xen_guest(int dom_id)
121 {
122         struct xen_guest *guest = NULL;
123
124         TAILQ_FOREACH(guest, &guest_root, next) {
125                 if(guest->dom_id == dom_id)
126                         return guest;
127         }
128
129         return NULL;
130 }
131
132
133 static struct xen_guest *
134 add_xen_guest(int32_t dom_id)
135 {
136         struct xen_guest *guest = NULL;
137
138         if ((guest = get_xen_guest(dom_id)) != NULL)
139                 return guest;
140
141         guest = calloc(1, sizeof(struct xen_guest));
142         if (guest) {
143                 RTE_LOG(ERR, XENHOST, "  %s: return newly created guest with %d rings\n", __func__, guest->vring_num);
144                 TAILQ_INSERT_TAIL(&guest_root, guest, next);
145                 guest->dom_id = dom_id;
146         }
147
148         return guest;
149 }
150
151 static void
152 cleanup_device(struct virtio_net_config_ll *ll_dev)
153 {
154         if (ll_dev == NULL)
155                 return;
156         if (ll_dev->dev.virtqueue_rx) {
157                 rte_free(ll_dev->dev.virtqueue_rx);
158                 ll_dev->dev.virtqueue_rx = NULL;
159         }
160         if (ll_dev->dev.virtqueue_tx) {
161                 rte_free(ll_dev->dev.virtqueue_tx);
162                 ll_dev->dev.virtqueue_tx = NULL;
163         }
164         free(ll_dev);
165 }
166
167 /*
168  * Add entry containing a device to the device configuration linked list.
169  */
170 static void
171 add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
172 {
173         struct virtio_net_config_ll *ll_dev = ll_root;
174
175         /* If ll_dev == NULL then this is the first device so go to else */
176         if (ll_dev) {
177                 /* If the 1st device_id != 0 then we insert our device here. */
178                 if (ll_dev->dev.device_fh != 0) {
179                         new_ll_dev->dev.device_fh = 0;
180                         new_ll_dev->next = ll_dev;
181                         ll_root = new_ll_dev;
182                 } else {
183                         /* increment through the ll until we find un unused device_id,
184                          * insert the device at that entry
185                          */
186                         while ((ll_dev->next != NULL) && (ll_dev->dev.device_fh == (ll_dev->next->dev.device_fh - 1)))
187                                 ll_dev = ll_dev->next;
188
189                         new_ll_dev->dev.device_fh = ll_dev->dev.device_fh + 1;
190                         new_ll_dev->next = ll_dev->next;
191                         ll_dev->next = new_ll_dev;
192                 }
193         } else {
194                 ll_root = new_ll_dev;
195                 ll_root->dev.device_fh = 0;
196         }
197 }
198
199
200 /*
201  * Remove an entry from the device configuration linked list.
202  */
203 static struct virtio_net_config_ll *
204 rm_config_ll_entry(struct virtio_net_config_ll *ll_dev, struct virtio_net_config_ll *ll_dev_last)
205 {
206         /* First remove the device and then clean it up. */
207         if (ll_dev == ll_root) {
208                 ll_root = ll_dev->next;
209                 cleanup_device(ll_dev);
210                 return ll_root;
211         } else {
212                 ll_dev_last->next = ll_dev->next;
213                 cleanup_device(ll_dev);
214                 return ll_dev_last->next;
215         }
216 }
217
218 /*
219  * Retrieves an entry from the devices configuration linked list.
220  */
221 static struct virtio_net_config_ll *
222 get_config_ll_entry(unsigned int virtio_idx, unsigned int dom_id)
223 {
224         struct virtio_net_config_ll *ll_dev = ll_root;
225
226         /* Loop through linked list until the dom_id is found. */
227         while (ll_dev != NULL) {
228                 if (ll_dev->dev.dom_id == dom_id && ll_dev->dev.virtio_idx == virtio_idx)
229                         return ll_dev;
230                 ll_dev = ll_dev->next;
231         }
232
233         return NULL;
234 }
235
236 /*
237  * Initialise all variables in device structure.
238  */
239 static void
240 init_dev(struct virtio_net *dev)
241 {
242         RTE_SET_USED(dev);
243 }
244
245
246 static struct
247 virtio_net_config_ll *new_device(unsigned int virtio_idx, struct xen_guest *guest)
248 {
249         struct virtio_net_config_ll *new_ll_dev;
250         struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
251         size_t size, vq_ring_size, vq_size = VQ_DESC_NUM;
252         void *vq_ring_virt_mem;
253         uint64_t gpa;
254         uint32_t i;
255
256         /* Setup device and virtqueues. */
257         new_ll_dev   = calloc(1, sizeof(struct virtio_net_config_ll));
258         virtqueue_rx = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), RTE_CACHE_LINE_SIZE);
259         virtqueue_tx = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), RTE_CACHE_LINE_SIZE);
260         if (new_ll_dev == NULL || virtqueue_rx == NULL || virtqueue_tx == NULL)
261                 goto err;
262
263         new_ll_dev->dev.virtqueue_rx = virtqueue_rx;
264         new_ll_dev->dev.virtqueue_tx = virtqueue_tx;
265         new_ll_dev->dev.dom_id       = guest->dom_id;
266         new_ll_dev->dev.virtio_idx   = virtio_idx;
267         /* Initialise device and virtqueues. */
268         init_dev(&new_ll_dev->dev);
269
270         size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
271         vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
272         (void)vq_ring_size;
273
274         vq_ring_virt_mem = guest->vring[virtio_idx].rxvring_addr;
275         vq_vring_init(virtqueue_rx, vq_size, vq_ring_virt_mem, VIRTIO_PCI_VRING_ALIGN);
276         virtqueue_rx->size = vq_size;
277         virtqueue_rx->vhost_hlen = sizeof(struct virtio_net_hdr);
278
279         vq_ring_virt_mem = guest->vring[virtio_idx].txvring_addr;
280         vq_vring_init(virtqueue_tx, vq_size, vq_ring_virt_mem, VIRTIO_PCI_VRING_ALIGN);
281         virtqueue_tx->size = vq_size;
282         memcpy(&new_ll_dev->dev.mac_address, &guest->vring[virtio_idx].addr, sizeof(struct ether_addr));
283
284         /* virtio_memory has to be one per domid */
285         new_ll_dev->dev.mem = malloc(sizeof(struct virtio_memory) + sizeof(struct virtio_memory_regions) * MAX_XENVIRT_MEMPOOL);
286         new_ll_dev->dev.mem->nregions = guest->pool_num;
287         for (i = 0; i < guest->pool_num; i++) {
288                 gpa = new_ll_dev->dev.mem->regions[i].guest_phys_address =
289                                 (uint64_t)((uintptr_t)guest->mempool[i].gva);
290                 new_ll_dev->dev.mem->regions[i].guest_phys_address_end =
291                                 gpa + guest->mempool[i].mempfn_num * getpagesize();
292                 new_ll_dev->dev.mem->regions[i].address_offset =
293                                 (uint64_t)((uintptr_t)guest->mempool[i].hva -
294                                         (uintptr_t)gpa);
295         }
296
297         new_ll_dev->next = NULL;
298
299         /* Add entry to device configuration linked list. */
300         add_config_ll_entry(new_ll_dev);
301         return new_ll_dev;
302 err:
303         free(new_ll_dev);
304         rte_free(virtqueue_rx);
305         rte_free(virtqueue_tx);
306
307         return NULL;
308 }
309
310 static void
311 destroy_guest(struct xen_guest *guest)
312 {
313         uint32_t i;
314
315         for (i = 0; i < guest->vring_num; i++)
316                 cleanup_vring(&guest->vring[i]);
317         /* clean mempool */
318         for (i = 0; i < guest->pool_num; i++)
319                 cleanup_mempool(&guest->mempool[i]);
320         free(guest);
321
322         return;
323 }
324
325 /*
326  * This function will cleanup the device and remove it from device configuration linked list.
327  */
328 static void
329 destroy_device(unsigned int virtio_idx, unsigned int dom_id)
330 {
331         struct virtio_net_config_ll *ll_dev_cur_ctx, *ll_dev_last = NULL;
332         struct virtio_net_config_ll *ll_dev_cur = ll_root;
333
334         /* clean virtio device */
335         struct xen_guest *guest = NULL;
336         guest = get_xen_guest(dom_id);
337         if (guest == NULL)
338                 return;
339
340         /* Find the linked list entry for the device to be removed. */
341         ll_dev_cur_ctx = get_config_ll_entry(virtio_idx, dom_id);
342         while (ll_dev_cur != NULL) {
343                 /* If the device is found or a device that doesn't exist is found then it is removed. */
344                 if  (ll_dev_cur == ll_dev_cur_ctx) {
345                         if ((ll_dev_cur->dev.flags & VIRTIO_DEV_RUNNING))
346                                 notify_ops->destroy_device(&(ll_dev_cur->dev));
347                         ll_dev_cur = rm_config_ll_entry(ll_dev_cur, ll_dev_last);
348                 } else {
349                         ll_dev_last = ll_dev_cur;
350                         ll_dev_cur = ll_dev_cur->next;
351                 }
352         }
353         RTE_LOG(INFO, XENHOST, "  %s guest:%p vring:%p rxvring:%p txvring:%p flag:%p\n",
354                 __func__, guest, &guest->vring[virtio_idx], guest->vring[virtio_idx].rxvring_addr, guest->vring[virtio_idx].txvring_addr, guest->vring[virtio_idx].flag);
355         cleanup_vring(&guest->vring[virtio_idx]);
356         guest->vring[virtio_idx].removed = 1;
357         guest->vring_num -= 1;
358 }
359
360
361
362
363 static void
364 watch_unmap_event(void)
365 {
366         int i;
367         struct xen_guest *guest  = NULL;
368         bool remove_request;
369
370         TAILQ_FOREACH(guest, &guest_root, next) {
371                 for (i = 0; i < MAX_VIRTIO; i++) {
372                         if (guest->vring[i].dom_id && guest->vring[i].removed == 0 && *guest->vring[i].flag == 0) {
373                                 RTE_LOG(INFO, XENHOST, "\n\n");
374                                 RTE_LOG(INFO, XENHOST, "  #####%s:  (%d, %d) to be removed\n",
375                                         __func__,
376                                         guest->vring[i].dom_id,
377                                         i);
378                                 destroy_device(i, guest->dom_id);
379                                 RTE_LOG(INFO, XENHOST, "  %s: DOM %u, vring num: %d\n",
380                                         __func__,
381                                         guest->dom_id,
382                                         guest->vring_num);
383                         }
384                 }
385         }
386
387 _find_next_remove:
388         guest = NULL;
389         remove_request = false;
390         TAILQ_FOREACH(guest, &guest_root, next) {
391                 if (guest->vring_num == 0) {
392                         remove_request = true;
393                         break;
394                 }
395         }
396         if (remove_request == true) {
397                 TAILQ_REMOVE(&guest_root, guest, next);
398                 RTE_LOG(INFO, XENHOST, "  #####%s: destroy guest (%d)\n", __func__, guest->dom_id);
399                 destroy_guest(guest);
400                 goto _find_next_remove;
401         }
402         return;
403 }
404
405 /*
406  * OK, if the guest starts first, it is ok.
407  * if host starts first, it is ok.
408  * if guest starts, and has run for sometime, and host stops and restarts,
409  * then last_used_idx  0? how to solve this. */
410
411 static void virtio_init(void)
412 {
413         uint32_t len, e_num;
414         uint32_t i,j;
415         char **dom;
416         char *status;
417         int dom_id;
418         char path[PATH_MAX];
419         char node[PATH_MAX];
420         xs_transaction_t th;
421         struct xen_guest *guest;
422         struct virtio_net_config_ll *net_config;
423         char *end;
424         int val;
425
426         /* init env for watch the node */
427         if (init_watch() < 0)
428                 return;
429
430         dom = xs_directory(watch.xs, XBT_NULL, "/local/domain", &e_num);
431
432         for (i = 0; i < e_num; i++) {
433                 errno = 0;
434                 dom_id = strtol(dom[i], &end, 0);
435                 if (errno != 0 || end == NULL || dom_id == 0)
436                         continue;
437
438                 for (j = 0; j < RTE_MAX_ETHPORTS; j++) {
439                         snprintf(node, PATH_MAX, "%s%d", VIRTIO_START, j);
440                         snprintf(path, PATH_MAX, XEN_VM_NODE_FMT,
441                                         dom_id, node);
442
443                         th = xs_transaction_start(watch.xs);
444                         status = xs_read(watch.xs, th, path, &len);
445                         xs_transaction_end(watch.xs, th, false);
446
447                         if (status == NULL)
448                                 break;
449
450                         /* if there's any valid virtio device */
451                         errno = 0;
452                         val = strtol(status, &end, 0);
453                         if (errno != 0 || end == NULL || dom_id == 0)
454                                 val = 0;
455                         if (val == 1) {
456                                 guest = add_xen_guest(dom_id);
457                                 if (guest == NULL)
458                                         continue;
459                                 RTE_LOG(INFO, XENHOST, "  there's a new virtio existed, new a virtio device\n\n");
460
461                                 RTE_LOG(INFO, XENHOST, "  parse_vringnode dom_id %d virtioidx %d\n",dom_id,j);
462                                 if (parse_vringnode(guest, j)) {
463                                         RTE_LOG(ERR, XENHOST, "  there is invalid information in xenstore\n");
464                                         TAILQ_REMOVE(&guest_root, guest, next);
465                                         destroy_guest(guest);
466
467                                         continue;
468                                 }
469
470                                 /*if pool_num > 0, then mempool has already been parsed*/
471                                 if (guest->pool_num == 0 && parse_mempoolnode(guest)) {
472                                         RTE_LOG(ERR, XENHOST, "  there is error information in xenstore\n");
473                                         TAILQ_REMOVE(&guest_root, guest, next);
474                                         destroy_guest(guest);
475                                         continue;
476                                 }
477
478                                 net_config = new_device(j, guest);
479                                 /* every thing is ready now, added into data core */
480                                 notify_ops->new_device(&net_config->dev);
481                         }
482                 }
483         }
484
485         free(dom);
486         return;
487 }
488
489 void
490 virtio_monitor_loop(void)
491 {
492         char **vec;
493         xs_transaction_t th;
494         char *buf;
495         unsigned int len;
496         unsigned int dom_id;
497         uint32_t virtio_idx;
498         struct xen_guest *guest;
499         struct virtio_net_config_ll *net_config;
500         enum fieldnames {
501                 FLD_NULL = 0,
502                 FLD_LOCAL,
503                 FLD_DOMAIN,
504                 FLD_ID,
505                 FLD_CONTROL,
506                 FLD_DPDK,
507                 FLD_NODE,
508                 _NUM_FLD
509         };
510         char *str_fld[_NUM_FLD];
511         char *str;
512         char *end;
513
514         virtio_init();
515         while (1) {
516                 watch_unmap_event();
517
518                 usleep(50);
519                 vec = xs_check_watch(watch.xs);
520
521                 if (vec == NULL)
522                         continue;
523
524                 th = xs_transaction_start(watch.xs);
525
526                 buf = xs_read(watch.xs, th, vec[XS_WATCH_PATH],&len);
527                 xs_transaction_end(watch.xs, th, false);
528
529                 if (buf) {
530                         /* theres' some node for vhost existed */
531                         if (rte_strsplit(vec[XS_WATCH_PATH], strnlen(vec[XS_WATCH_PATH], PATH_MAX),
532                                                 str_fld, _NUM_FLD, '/') == _NUM_FLD) {
533                                 if (strstr(str_fld[FLD_NODE], VIRTIO_START)) {
534                                         errno = 0;
535                                         str = str_fld[FLD_ID];
536                                         dom_id = strtoul(str, &end, 0);
537                                         if (errno != 0 || end == NULL || end == str ) {
538                                                 RTE_LOG(INFO, XENHOST, "invalid domain id\n");
539                                                 continue;
540                                         }
541
542                                         errno = 0;
543                                         str = str_fld[FLD_NODE] + sizeof(VIRTIO_START) - 1;
544                                         virtio_idx = strtoul(str, &end, 0);
545                                         if (errno != 0 || end == NULL || end == str
546                                                         || virtio_idx > MAX_VIRTIO) {
547                                                 RTE_LOG(INFO, XENHOST, "invalid virtio idx\n");
548                                                 continue;
549                                         }
550                                         RTE_LOG(INFO, XENHOST, "  #####virtio dev (%d, %d) is started\n", dom_id, virtio_idx);
551
552                                         guest = add_xen_guest(dom_id);
553                                         if (guest == NULL)
554                                                 continue;
555                                         guest->dom_id = dom_id;
556                                         if (parse_vringnode(guest, virtio_idx)) {
557                                                 RTE_LOG(ERR, XENHOST, "  there is invalid information in xenstore\n");
558                                                 /*guest newly created? guest existed ?*/
559                                                 TAILQ_REMOVE(&guest_root, guest, next);
560                                                 destroy_guest(guest);
561                                                 continue;
562                                         }
563                                         /*if pool_num > 0, then mempool has already been parsed*/
564                                         if (guest->pool_num == 0 && parse_mempoolnode(guest)) {
565                                                 RTE_LOG(ERR, XENHOST, "  there is error information in xenstore\n");
566                                                 TAILQ_REMOVE(&guest_root, guest, next);
567                                                 destroy_guest(guest);
568                                                 continue;
569                                         }
570
571
572                                         net_config = new_device(virtio_idx, guest);
573                                         RTE_LOG(INFO, XENHOST, "  Add to dataplane core\n");
574                                         notify_ops->new_device(&net_config->dev);
575
576                                 }
577                         }
578                 }
579
580                 free(vec);
581         }
582         return;
583 }
584
585 /*
586  * Register ops so that we can add/remove device to data core.
587  */
588 int
589 init_virtio_xen(struct virtio_net_device_ops const *const ops)
590 {
591         notify_ops = ops;
592         if (xenhost_init())
593                 return -1;
594         return 0;
595 }