4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 #include <sys/ioctl.h>
41 #include <xen/sys/gntalloc.h>
42 #include <xen/sys/gntdev.h>
43 #include <xen/xen-compat.h>
44 #if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
50 #include <rte_common.h>
51 #include <rte_memory.h>
53 #include <rte_malloc.h>
54 #include <rte_string_fns.h>
56 #include <rte_debug.h>
58 #include "xen_vhost.h"
61 static struct xs_handle *xs = NULL;
63 /* gntdev file descriptor to map grant pages */
67 * The grant node format in xenstore for vring/mpool is like:
68 * idx#_rx_vring_gref = "gref1#, gref2#, gref3#"
69 * idx#_mempool_gref = "gref1#, gref2#, gref3#"
70 * each gref# is the grant reference for a shared page.
71 * In each shared page, we store the grant_node_item items.
73 struct grant_node_item {
76 } __attribute__((packed));
78 int cmdline_parse_etheraddr(void *tk, const char *srcbuf,
79 void *res, unsigned ressize);
81 /* Map grant ref refid at addr_ori*/
83 xen_grant_mmap(void *addr_ori, int domid, int refid, uint64_t *pindex)
85 struct ioctl_gntdev_map_grant_ref arg;
87 int pg_sz = getpagesize();
90 arg.refs[0].domid = domid;
91 arg.refs[0].ref = refid;
93 int rv = ioctl(d_fd, IOCTL_GNTDEV_MAP_GRANT_REF, &arg);
95 RTE_LOG(ERR, XENHOST, " %s: (%d,%d) %s (ioctl failed)\n", __func__,
96 domid, refid, strerror(errno));
100 if (addr_ori == NULL)
101 addr = mmap(addr_ori, pg_sz, PROT_READ|PROT_WRITE, MAP_SHARED,
104 addr = mmap(addr_ori, pg_sz, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_FIXED,
107 if (addr == MAP_FAILED) {
108 RTE_LOG(ERR, XENHOST, " %s: (%d, %d) %s (map failed)\n", __func__,
109 domid, refid, strerror(errno));
119 /* Unmap one grant ref, and munmap must be called before this */
121 xen_unmap_grant_ref(uint64_t index)
123 struct ioctl_gntdev_unmap_grant_ref arg;
128 rv = ioctl(d_fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &arg);
130 RTE_LOG(ERR, XENHOST, " %s: index 0x%" PRIx64 "unmap failed\n", __func__, index);
137 * Reserve a virtual address space.
138 * On success, returns the pointer. On failure, returns NULL.
141 get_xen_virtual(size_t size, size_t page_sz)
144 uintptr_t aligned_addr;
146 addr = mmap(NULL, size + page_sz, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
147 if (addr == MAP_FAILED) {
148 RTE_LOG(ERR, XENHOST, "failed get a virtual area\n");
152 aligned_addr = RTE_ALIGN_CEIL((uintptr_t)addr, page_sz);
153 munmap(addr, aligned_addr - (uintptr_t)addr);
154 munmap((void *)(aligned_addr + size), page_sz + (uintptr_t)addr - aligned_addr);
155 addr = (void *)(aligned_addr);
161 free_xen_virtual(void *addr, size_t size, size_t page_sz __rte_unused)
168 * Returns val str in xenstore.
170 * Full path string for key
172 * Pointer to Val str, NULL on failure
175 xen_read_node(char *path, uint32_t *len)
179 buf = xs_read(xs, XBT_NULL, path, len);
184 cal_pagenum(struct xen_gnt *gnt)
188 * the items in the page are in the format of
189 * gref#,pfn#,...,gref#,pfn#
190 * FIXME, 0 is reserved by system, use it as terminator.
192 for (i = 0; i < (PAGE_PFNNUM) / 2; i++) {
193 if (gnt->gref_pfn[i * 2].gref <= 0)
200 /* Frees memory allocated to a grant node */
202 xen_free_gntnode(struct xen_gntnode *gntnode)
206 if (gntnode->gnt_info)
207 free(gntnode->gnt_info);
212 * Parse a grant node.
216 * Full path string for a grant node, like for the following (key, val) pair
217 * idx#_mempool_gref = "gref#, gref#, gref#"
218 * path = 'local/domain/domid/control/dpdk/idx#_mempool_gref'
219 * gref# is a shared page contain packed (gref,pfn) entries
221 * Returns the pointer to xen_gntnode
223 static struct xen_gntnode *
224 parse_gntnode(int dom_id, char *path)
226 char **gref_list = NULL;
227 uint32_t i, len, gref_num;
230 struct xen_gntnode *gntnode = NULL;
231 struct xen_gnt *gnt = NULL;
232 int pg_sz = getpagesize();
236 if ((buf = xen_read_node(path, &len)) == NULL)
239 gref_list = malloc(MAX_GREF_PER_NODE * sizeof(char *));
240 if (gref_list == NULL)
243 gref_num = rte_strsplit(buf, len, gref_list, MAX_GREF_PER_NODE,
244 XEN_GREF_SPLITTOKEN);
246 RTE_LOG(ERR, XENHOST, " %s: invalid grant node format\n", __func__);
250 gntnode = calloc(1, sizeof(struct xen_gntnode));
251 gnt = calloc(gref_num, sizeof(struct xen_gnt));
252 if (gnt == NULL || gntnode == NULL)
255 for (i = 0; i < gref_num; i++) {
257 gnt[i].gref = strtol(gref_list[i], &end, 0);
258 if (errno != 0 || end == NULL || end == gref_list[i] ||
259 (*end != '\0' && *end != XEN_GREF_SPLITTOKEN)) {
260 RTE_LOG(ERR, XENHOST, " %s: parse grant node item failed\n", __func__);
263 addr = xen_grant_mmap(NULL, dom_id, gnt[i].gref, &index);
265 RTE_LOG(ERR, XENHOST, " %s: map gref %u failed\n", __func__, gnt[i].gref);
268 RTE_LOG(INFO, XENHOST, " %s: map gref %u to %p\n", __func__, gnt[i].gref, addr);
269 memcpy(gnt[i].gref_pfn, addr, pg_sz);
270 if (munmap(addr, pg_sz)) {
271 RTE_LOG(INFO, XENHOST, " %s: unmap gref %u failed\n", __func__, gnt[i].gref);
274 if (xen_unmap_grant_ref(index)) {
275 RTE_LOG(INFO, XENHOST, " %s: release gref %u failed\n", __func__, gnt[i].gref);
281 gntnode->gnt_num = gref_num;
282 gntnode->gnt_info = gnt;
301 * This function maps grant node of vring or mbuf pool to a continous virtual address space,
302 * and returns mapped address, pfn array, index array
304 * Pointer to grant node
308 * Pointer to pfn array, caller should free this array
310 * Pointer to number of pages
312 * Pointer to index array, used to release grefs when to free this node
314 * Pointer to mapped virtual address, NULL on failure
317 map_gntnode(struct xen_gntnode *gntnode, int domid, uint32_t **ppfn, uint32_t *pgs, uint64_t **ppindex)
321 size_t total_pages = 0;
325 uint32_t pfn_num = 0;
331 pg_sz = getpagesize();
332 for (i = 0; i < gntnode->gnt_num; i++) {
333 gnt = gntnode->gnt_info + i;
334 total_pages += cal_pagenum(gnt);
336 if ((addr = get_xen_virtual(total_pages * pg_sz, pg_sz)) == NULL) {
337 RTE_LOG(ERR, XENHOST, " %s: failed get_xen_virtual\n", __func__);
340 pfn = calloc(total_pages, (size_t)sizeof(uint32_t));
341 pindex = calloc(total_pages, (size_t)sizeof(uint64_t));
342 if (pfn == NULL || pindex == NULL) {
343 free_xen_virtual(addr, total_pages * pg_sz, pg_sz);
349 RTE_LOG(INFO, XENHOST, " %s: total pages:%zu, map to [%p, %p]\n", __func__, total_pages, addr, RTE_PTR_ADD(addr, total_pages * pg_sz - 1));
350 for (i = 0; i < gntnode->gnt_num; i++) {
351 gnt = gntnode->gnt_info + i;
352 for (j = 0; j < (PAGE_PFNNUM) / 2; j++) {
353 if ((gnt->gref_pfn[j * 2].gref) <= 0)
355 /*alternative: batch map, or through libxc*/
356 if (xen_grant_mmap(RTE_PTR_ADD(addr, pfn_num * pg_sz),
358 gnt->gref_pfn[j * 2].gref,
359 &pindex[pfn_num]) == NULL) {
362 pfn[pfn_num] = gnt->gref_pfn[j * 2 + 1].pfn_num;
369 munmap(addr, pfn_num * pg_sz);
370 for (i = 0; i < pfn_num; i++) {
371 xen_unmap_grant_ref(pindex[i]);
393 parse_mpool_va(struct xen_mempool *mempool)
395 char path[PATH_MAX] = {0};
402 snprintf(path, sizeof(path),
403 XEN_VM_ROOTNODE_FMT"/%d_"XEN_GVA_SUFFIX,
404 mempool->dom_id, mempool->pool_idx);
406 if((buf = xen_read_node(path, &len)) == NULL)
408 mempool->gva = (void *)strtoul(buf, &end, 16);
409 if (errno != 0 || end == NULL || end == buf || *end != '\0') {
424 map_mempoolnode(struct xen_gntnode *gntnode,
425 struct xen_mempool *mempool)
427 if (gntnode == NULL || mempool == NULL)
431 map_gntnode(gntnode, mempool->dom_id, &mempool->mempfn_tbl, &mempool->mempfn_num, &mempool->pindex);
433 RTE_LOG(INFO, XENHOST, " %s: map mempool at %p\n", __func__, (void *)mempool->hva);
442 cleanup_mempool(struct xen_mempool *mempool)
444 int pg_sz = getpagesize();
448 munmap(mempool->hva, mempool->mempfn_num * pg_sz);
451 if (mempool->pindex) {
452 RTE_LOG(INFO, XENHOST, " %s: unmap dom %02u mempool%02u %u grefs\n",
456 mempool->mempfn_num);
457 for (i = 0; i < mempool->mempfn_num; i ++) {
458 xen_unmap_grant_ref(mempool->pindex[i]);
461 mempool->pindex = NULL;
463 if (mempool->mempfn_tbl)
464 free(mempool->mempfn_tbl);
465 mempool->mempfn_tbl = NULL;
469 * process mempool node idx#_mempool_gref, idx = 0, 1, 2...
470 * untill we encounter a node that doesn't exist.
473 parse_mempoolnode(struct xen_guest *guest)
476 char path[PATH_MAX] = {0};
477 struct xen_gntnode *gntnode = NULL;
478 struct xen_mempool *mempool = NULL;
481 bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0]));
485 /* check if null terminated */
486 snprintf(path, sizeof(path),
487 XEN_VM_ROOTNODE_FMT"/%d_"XEN_MEMPOOL_SUFFIX,
491 if ((buf = xen_read_node(path, &len)) != NULL) {
492 /* this node exists */
495 if (guest->pool_num == 0) {
496 RTE_LOG(ERR, PMD, "no mempool found\n");
502 mempool = &guest->mempool[guest->pool_num];
503 mempool->dom_id = guest->dom_id;
504 mempool->pool_idx = guest->pool_num;
506 RTE_LOG(INFO, XENHOST, " %s: mempool %u parse gntnode %s\n", __func__, guest->pool_num, path);
507 gntnode = parse_gntnode(guest->dom_id, path);
511 if (parse_mpool_va(mempool))
514 RTE_LOG(INFO, XENHOST, " %s: mempool %u map gntnode %s\n", __func__, guest->pool_num, path);
515 if (map_mempoolnode(gntnode, mempool))
518 xen_free_gntnode(gntnode);
525 xen_free_gntnode(gntnode);
526 for (i = 0; i < MAX_XENVIRT_MEMPOOL ; i++) {
527 cleanup_mempool(&guest->mempool[i]);
529 /* reinitialise mempool */
530 bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0]));
535 xen_map_vringflag(struct xen_vring *vring)
537 char path[PATH_MAX] = {0};
540 int pg_sz = getpagesize();
543 snprintf(path, sizeof(path),
544 XEN_VM_ROOTNODE_FMT"/%d_"XEN_VRINGFLAG_SUFFIX,
545 vring->dom_id, vring->virtio_idx);
547 if((buf = xen_read_node(path, &len)) == NULL)
551 gref = strtol(buf, &end, 0);
552 if (errno != 0 || end == NULL || end == buf) {
555 vring->flag = xen_grant_mmap(0, vring->dom_id, gref, &vring->flag_index);
556 if (vring->flag == NULL || *vring->flag == 0)
565 munmap(vring->flag, pg_sz);
567 xen_unmap_grant_ref(vring->flag_index);
574 xen_map_rxvringnode(struct xen_gntnode *gntnode,
575 struct xen_vring *vring)
577 vring->rxvring_addr =
578 map_gntnode(gntnode, vring->dom_id, &vring->rxpfn_tbl, &vring->rxpfn_num, &vring->rx_pindex);
579 RTE_LOG(INFO, XENHOST, " %s: map rx vring at %p\n", __func__, (void *)vring->rxvring_addr);
580 if (vring->rxvring_addr)
587 xen_map_txvringnode(struct xen_gntnode *gntnode,
588 struct xen_vring *vring)
590 vring->txvring_addr =
591 map_gntnode(gntnode, vring->dom_id, &vring->txpfn_tbl, &vring->txpfn_num, &vring->tx_pindex);
592 RTE_LOG(INFO, XENHOST, " %s: map tx vring at %p\n", __func__, (void *)vring->txvring_addr);
593 if (vring->txvring_addr)
600 cleanup_vring(struct xen_vring *vring)
602 int pg_sz = getpagesize();
605 RTE_LOG(INFO, XENHOST, " %s: cleanup dom %u vring %u\n", __func__, vring->dom_id, vring->virtio_idx);
606 if (vring->rxvring_addr) {
607 munmap(vring->rxvring_addr, vring->rxpfn_num * pg_sz);
608 RTE_LOG(INFO, XENHOST, " %s: unmap rx vring [%p, %p]\n",
611 RTE_PTR_ADD(vring->rxvring_addr,
612 vring->rxpfn_num * pg_sz - 1));
614 vring->rxvring_addr = NULL;
617 if (vring->rx_pindex) {
618 RTE_LOG(INFO, XENHOST, " %s: unmap rx vring %u grefs\n", __func__, vring->rxpfn_num);
619 for (i = 0; i < vring->rxpfn_num; i++) {
620 xen_unmap_grant_ref(vring->rx_pindex[i]);
623 vring->rx_pindex = NULL;
625 if (vring->rxpfn_tbl)
626 free(vring->rxpfn_tbl);
627 vring->rxpfn_tbl = NULL;
629 if (vring->txvring_addr) {
630 munmap(vring->txvring_addr, vring->txpfn_num * pg_sz);
631 RTE_LOG(INFO, XENHOST, " %s: unmap tx vring [%p, %p]\n",
634 RTE_PTR_ADD(vring->txvring_addr,
635 vring->txpfn_num * pg_sz - 1));
637 vring->txvring_addr = NULL;
639 if (vring->tx_pindex) {
640 RTE_LOG(INFO, XENHOST, " %s: unmap tx vring %u grefs\n", __func__, vring->txpfn_num);
641 for (i = 0; i < vring->txpfn_num; i++) {
642 xen_unmap_grant_ref(vring->tx_pindex[i]);
645 vring->tx_pindex = NULL;
647 if (vring->txpfn_tbl)
648 free(vring->txpfn_tbl);
649 vring->txpfn_tbl = NULL;
652 if (!munmap((void *)vring->flag, pg_sz))
653 RTE_LOG(INFO, XENHOST, " %s: unmap flag page at %p\n", __func__, vring->flag);
654 if (!xen_unmap_grant_ref(vring->flag_index))
655 RTE_LOG(INFO, XENHOST, " %s: release flag ref index 0x%" PRIx64 "\n", __func__, vring->flag_index);
664 xen_parse_etheraddr(struct xen_vring *vring)
666 char path[PATH_MAX] = {0};
671 snprintf(path, sizeof(path),
672 XEN_VM_ROOTNODE_FMT"/%d_"XEN_ADDR_SUFFIX,
673 vring->dom_id, vring->virtio_idx);
675 if ((buf = xen_read_node(path, &len)) == NULL)
678 if (cmdline_parse_etheraddr(NULL, buf, &vring->addr,
679 sizeof(vring->addr)) < 0)
690 parse_vringnode(struct xen_guest *guest, uint32_t virtio_idx)
692 char path[PATH_MAX] = {0};
693 struct xen_gntnode *rx_gntnode = NULL;
694 struct xen_gntnode *tx_gntnode = NULL;
695 struct xen_vring *vring = NULL;
697 /*check if null terminated */
698 snprintf(path, sizeof(path),
699 XEN_VM_ROOTNODE_FMT"/%d_"XEN_RXVRING_SUFFIX,
703 RTE_LOG(INFO, XENHOST, " %s: virtio %u parse rx gntnode %s\n", __func__, virtio_idx, path);
704 rx_gntnode = parse_gntnode(guest->dom_id, path);
705 if (rx_gntnode == NULL)
708 /*check if null terminated */
709 snprintf(path, sizeof(path),
710 XEN_VM_ROOTNODE_FMT"/%d_"XEN_TXVRING_SUFFIX,
714 RTE_LOG(INFO, XENHOST, " %s: virtio %u parse tx gntnode %s\n", __func__, virtio_idx, path);
715 tx_gntnode = parse_gntnode(guest->dom_id, path);
716 if (tx_gntnode == NULL)
719 vring = &guest->vring[virtio_idx];
720 bzero(vring, sizeof(*vring));
721 vring->dom_id = guest->dom_id;
722 vring->virtio_idx = virtio_idx;
724 if (xen_parse_etheraddr(vring) != 0)
727 RTE_LOG(INFO, XENHOST, " %s: virtio %u map rx gntnode %s\n", __func__, virtio_idx, path);
728 if (xen_map_rxvringnode(rx_gntnode, vring) != 0)
731 RTE_LOG(INFO, XENHOST, " %s: virtio %u map tx gntnode %s\n", __func__, virtio_idx, path);
732 if (xen_map_txvringnode(tx_gntnode, vring) != 0)
735 if (xen_map_vringflag(vring) != 0)
740 xen_free_gntnode(rx_gntnode);
741 xen_free_gntnode(tx_gntnode);
747 xen_free_gntnode(rx_gntnode);
749 xen_free_gntnode(tx_gntnode);
751 cleanup_vring(vring);
752 bzero(vring, sizeof(*vring));
758 * Open xen grant dev driver
760 * 0 on success, -1 on failure.
765 d_fd = open(XEN_GNTDEV_FNAME, O_RDWR);
767 return d_fd == -1? (-1): (0);
771 * Initialise xenstore handle and open grant dev driver.
773 * 0 on success, -1 on failure.
778 xs = xs_daemon_open();
780 rte_panic("failed initialize xen daemon handler");
783 if (xen_grant_init())