4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 #include <sys/ioctl.h>
41 #include <xen/sys/gntalloc.h>
42 #include <xen/sys/gntdev.h>
43 #include <xen/xen-compat.h>
44 #if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
50 #include <rte_common.h>
51 #include <rte_memory.h>
52 #include <rte_tailq.h>
54 #include <rte_malloc.h>
55 #include <rte_string_fns.h>
57 #include <rte_debug.h>
59 #include "xen_vhost.h"
62 static struct xs_handle *xs = NULL;
64 /* gntdev file descriptor to map grant pages */
68 * The grant node format in xenstore for vring/mpool is like:
69 * idx#_rx_vring_gref = "gref1#, gref2#, gref3#"
70 * idx#_mempool_gref = "gref1#, gref2#, gref3#"
71 * each gref# is the grant reference for a shared page.
72 * In each shared page, we store the grant_node_item items.
74 struct grant_node_item {
77 } __attribute__((packed));
79 int cmdline_parse_etheraddr(void *tk, const char *srcbuf,
80 void *res, unsigned ressize);
82 /* Map grant ref refid at addr_ori*/
84 xen_grant_mmap(void *addr_ori, int domid, int refid, uint64_t *pindex)
86 struct ioctl_gntdev_map_grant_ref arg;
88 int pg_sz = getpagesize();
91 arg.refs[0].domid = domid;
92 arg.refs[0].ref = refid;
94 int rv = ioctl(d_fd, IOCTL_GNTDEV_MAP_GRANT_REF, &arg);
96 RTE_LOG(ERR, XENHOST, " %s: (%d,%d) %s (ioctl failed)\n", __func__,
97 domid, refid, strerror(errno));
101 if (addr_ori == NULL)
102 addr = mmap(addr_ori, pg_sz, PROT_READ|PROT_WRITE, MAP_SHARED,
105 addr = mmap(addr_ori, pg_sz, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_FIXED,
108 if (addr == MAP_FAILED) {
109 RTE_LOG(ERR, XENHOST, " %s: (%d, %d) %s (map failed)\n", __func__,
110 domid, refid, strerror(errno));
120 /* Unmap one grant ref, and munmap must be called before this */
122 xen_unmap_grant_ref(uint64_t index)
124 struct ioctl_gntdev_unmap_grant_ref arg;
129 rv = ioctl(d_fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &arg);
131 RTE_LOG(ERR, XENHOST, " %s: index 0x%" PRIx64 "unmap failed\n", __func__, index);
138 * Reserve a virtual address space.
139 * On success, returns the pointer. On failure, returns NULL.
142 get_xen_virtual(size_t size, size_t page_sz)
145 uintptr_t aligned_addr;
147 addr = mmap(NULL, size + page_sz, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
148 if (addr == MAP_FAILED) {
149 RTE_LOG(ERR, XENHOST, "failed get a virtual area\n");
153 aligned_addr = RTE_ALIGN_CEIL((uintptr_t)addr, page_sz);
154 munmap(addr, aligned_addr - (uintptr_t)addr);
155 munmap((void *)(aligned_addr + size), page_sz + (uintptr_t)addr - aligned_addr);
156 addr = (void *)(aligned_addr);
162 free_xen_virtual(void *addr, size_t size, size_t page_sz __rte_unused)
169 * Returns val str in xenstore.
171 * Full path string for key
173 * Pointer to Val str, NULL on failure
176 xen_read_node(char *path, uint32_t *len)
180 buf = xs_read(xs, XBT_NULL, path, len);
185 cal_pagenum(struct xen_gnt *gnt)
189 * the items in the page are in the format of
190 * gref#,pfn#,...,gref#,pfn#
191 * FIXME, 0 is reserved by system, use it as terminator.
193 for (i = 0; i < (PAGE_PFNNUM) / 2; i++) {
194 if (gnt->gref_pfn[i * 2].gref <= 0)
201 /* Frees memory allocated to a grant node */
203 xen_free_gntnode(struct xen_gntnode *gntnode)
207 if (gntnode->gnt_info)
208 free(gntnode->gnt_info);
213 * Parse a grant node.
217 * Full path string for a grant node, like for the following (key, val) pair
218 * idx#_mempool_gref = "gref#, gref#, gref#"
219 * path = 'local/domain/domid/control/dpdk/idx#_mempool_gref'
220 * gref# is a shared page contain packed (gref,pfn) entries
222 * Returns the pointer to xen_gntnode
224 static struct xen_gntnode *
225 parse_gntnode(int dom_id, char *path)
227 char **gref_list = NULL;
228 uint32_t i, len, gref_num;
231 struct xen_gntnode *gntnode = NULL;
232 struct xen_gnt *gnt = NULL;
233 int pg_sz = getpagesize();
237 if ((buf = xen_read_node(path, &len)) == NULL)
240 gref_list = malloc(MAX_GREF_PER_NODE * sizeof(char *));
241 if (gref_list == NULL)
244 gref_num = rte_strsplit(buf, len, gref_list, MAX_GREF_PER_NODE,
245 XEN_GREF_SPLITTOKEN);
247 RTE_LOG(ERR, XENHOST, " %s: invalid grant node format\n", __func__);
251 gntnode = calloc(1, sizeof(struct xen_gntnode));
252 gnt = calloc(gref_num, sizeof(struct xen_gnt));
253 if (gnt == NULL || gntnode == NULL)
256 for (i = 0; i < gref_num; i++) {
258 gnt[i].gref = strtol(gref_list[i], &end, 0);
259 if (errno != 0 || end == NULL || end == gref_list[i] ||
260 (*end != '\0' && *end != XEN_GREF_SPLITTOKEN)) {
261 RTE_LOG(ERR, XENHOST, " %s: parse grant node item failed\n", __func__);
264 addr = xen_grant_mmap(NULL, dom_id, gnt[i].gref, &index);
266 RTE_LOG(ERR, XENHOST, " %s: map gref %u failed\n", __func__, gnt[i].gref);
269 RTE_LOG(INFO, XENHOST, " %s: map gref %u to %p\n", __func__, gnt[i].gref, addr);
270 memcpy(gnt[i].gref_pfn, addr, pg_sz);
271 if (munmap(addr, pg_sz)) {
272 RTE_LOG(INFO, XENHOST, " %s: unmap gref %u failed\n", __func__, gnt[i].gref);
275 if (xen_unmap_grant_ref(index)) {
276 RTE_LOG(INFO, XENHOST, " %s: release gref %u failed\n", __func__, gnt[i].gref);
282 gntnode->gnt_num = gref_num;
283 gntnode->gnt_info = gnt;
302 * This function maps grant node of vring or mbuf pool to a continous virtual address space,
303 * and returns mapped address, pfn array, index array
305 * Pointer to grant node
309 * Pointer to pfn array, caller should free this array
311 * Pointer to number of pages
313 * Pointer to index array, used to release grefs when to free this node
315 * Pointer to mapped virtual address, NULL on failure
318 map_gntnode(struct xen_gntnode *gntnode, int domid, uint32_t **ppfn, uint32_t *pgs, uint64_t **ppindex)
322 size_t total_pages = 0;
326 uint32_t pfn_num = 0;
332 pg_sz = getpagesize();
333 for (i = 0; i < gntnode->gnt_num; i++) {
334 gnt = gntnode->gnt_info + i;
335 total_pages += cal_pagenum(gnt);
337 if ((addr = get_xen_virtual(total_pages * pg_sz, pg_sz)) == NULL) {
338 RTE_LOG(ERR, XENHOST, " %s: failed get_xen_virtual\n", __func__);
341 pfn = calloc(total_pages, (size_t)sizeof(uint32_t));
342 pindex = calloc(total_pages, (size_t)sizeof(uint64_t));
343 if (pfn == NULL || pindex == NULL) {
344 free_xen_virtual(addr, total_pages * pg_sz, pg_sz);
350 RTE_LOG(INFO, XENHOST, " %s: total pages:%zu, map to [%p, %p]\n", __func__, total_pages, addr, RTE_PTR_ADD(addr, total_pages * pg_sz - 1));
351 for (i = 0; i < gntnode->gnt_num; i++) {
352 gnt = gntnode->gnt_info + i;
353 for (j = 0; j < (PAGE_PFNNUM) / 2; j++) {
354 if ((gnt->gref_pfn[j * 2].gref) <= 0)
356 /*alternative: batch map, or through libxc*/
357 if (xen_grant_mmap(RTE_PTR_ADD(addr, pfn_num * pg_sz),
359 gnt->gref_pfn[j * 2].gref,
360 &pindex[pfn_num]) == NULL) {
363 pfn[pfn_num] = gnt->gref_pfn[j * 2 + 1].pfn_num;
370 munmap(addr, pfn_num * pg_sz);
371 for (i = 0; i < pfn_num; i++) {
372 xen_unmap_grant_ref(pindex[i]);
394 parse_mpool_va(struct xen_mempool *mempool)
396 char path[PATH_MAX] = {0};
403 snprintf(path, sizeof(path),
404 XEN_VM_ROOTNODE_FMT"/%d_"XEN_GVA_SUFFIX,
405 mempool->dom_id, mempool->pool_idx);
407 if((buf = xen_read_node(path, &len)) == NULL)
409 mempool->gva = (void *)strtoul(buf, &end, 16);
410 if (errno != 0 || end == NULL || end == buf || *end != '\0') {
425 map_mempoolnode(struct xen_gntnode *gntnode,
426 struct xen_mempool *mempool)
428 if (gntnode == NULL || mempool == NULL)
432 map_gntnode(gntnode, mempool->dom_id, &mempool->mempfn_tbl, &mempool->mempfn_num, &mempool->pindex);
434 RTE_LOG(INFO, XENHOST, " %s: map mempool at %p\n", __func__, (void *)mempool->hva);
443 cleanup_mempool(struct xen_mempool *mempool)
445 int pg_sz = getpagesize();
449 munmap(mempool->hva, mempool->mempfn_num * pg_sz);
452 if (mempool->pindex) {
453 RTE_LOG(INFO, XENHOST, " %s: unmap dom %02u mempool%02u %u grefs\n",
457 mempool->mempfn_num);
458 for (i = 0; i < mempool->mempfn_num; i ++) {
459 xen_unmap_grant_ref(mempool->pindex[i]);
462 mempool->pindex = NULL;
464 if (mempool->mempfn_tbl)
465 free(mempool->mempfn_tbl);
466 mempool->mempfn_tbl = NULL;
470 * process mempool node idx#_mempool_gref, idx = 0, 1, 2...
471 * untill we encounter a node that doesn't exist.
474 parse_mempoolnode(struct xen_guest *guest)
477 char path[PATH_MAX] = {0};
478 struct xen_gntnode *gntnode = NULL;
479 struct xen_mempool *mempool = NULL;
482 bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0]));
486 /* check if null terminated */
487 snprintf(path, sizeof(path),
488 XEN_VM_ROOTNODE_FMT"/%d_"XEN_MEMPOOL_SUFFIX,
492 if ((buf = xen_read_node(path, &len)) != NULL) {
493 /* this node exists */
496 if (guest->pool_num == 0) {
497 RTE_LOG(ERR, PMD, "no mempool found\n");
503 mempool = &guest->mempool[guest->pool_num];
504 mempool->dom_id = guest->dom_id;
505 mempool->pool_idx = guest->pool_num;
507 RTE_LOG(INFO, XENHOST, " %s: mempool %u parse gntnode %s\n", __func__, guest->pool_num, path);
508 gntnode = parse_gntnode(guest->dom_id, path);
512 if (parse_mpool_va(mempool))
515 RTE_LOG(INFO, XENHOST, " %s: mempool %u map gntnode %s\n", __func__, guest->pool_num, path);
516 if (map_mempoolnode(gntnode, mempool))
519 xen_free_gntnode(gntnode);
526 xen_free_gntnode(gntnode);
527 for (i = 0; i < MAX_XENVIRT_MEMPOOL ; i++) {
528 cleanup_mempool(&guest->mempool[i]);
530 /* reinitialise mempool */
531 bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0]));
536 xen_map_vringflag(struct xen_vring *vring)
538 char path[PATH_MAX] = {0};
541 int pg_sz = getpagesize();
544 snprintf(path, sizeof(path),
545 XEN_VM_ROOTNODE_FMT"/%d_"XEN_VRINGFLAG_SUFFIX,
546 vring->dom_id, vring->virtio_idx);
548 if((buf = xen_read_node(path, &len)) == NULL)
552 gref = strtol(buf, &end, 0);
553 if (errno != 0 || end == NULL || end == buf) {
556 vring->flag = xen_grant_mmap(0, vring->dom_id, gref, &vring->flag_index);
557 if (vring->flag == NULL || *vring->flag == 0)
566 munmap(vring->flag, pg_sz);
568 xen_unmap_grant_ref(vring->flag_index);
575 xen_map_rxvringnode(struct xen_gntnode *gntnode,
576 struct xen_vring *vring)
578 vring->rxvring_addr =
579 map_gntnode(gntnode, vring->dom_id, &vring->rxpfn_tbl, &vring->rxpfn_num, &vring->rx_pindex);
580 RTE_LOG(INFO, XENHOST, " %s: map rx vring at %p\n", __func__, (void *)vring->rxvring_addr);
581 if (vring->rxvring_addr)
588 xen_map_txvringnode(struct xen_gntnode *gntnode,
589 struct xen_vring *vring)
591 vring->txvring_addr =
592 map_gntnode(gntnode, vring->dom_id, &vring->txpfn_tbl, &vring->txpfn_num, &vring->tx_pindex);
593 RTE_LOG(INFO, XENHOST, " %s: map tx vring at %p\n", __func__, (void *)vring->txvring_addr);
594 if (vring->txvring_addr)
601 cleanup_vring(struct xen_vring *vring)
603 int pg_sz = getpagesize();
606 RTE_LOG(INFO, XENHOST, " %s: cleanup dom %u vring %u\n", __func__, vring->dom_id, vring->virtio_idx);
607 if (vring->rxvring_addr) {
608 munmap(vring->rxvring_addr, vring->rxpfn_num * pg_sz);
609 RTE_LOG(INFO, XENHOST, " %s: unmap rx vring [%p, %p]\n",
612 RTE_PTR_ADD(vring->rxvring_addr,
613 vring->rxpfn_num * pg_sz - 1));
615 vring->rxvring_addr = NULL;
618 if (vring->rx_pindex) {
619 RTE_LOG(INFO, XENHOST, " %s: unmap rx vring %u grefs\n", __func__, vring->rxpfn_num);
620 for (i = 0; i < vring->rxpfn_num; i++) {
621 xen_unmap_grant_ref(vring->rx_pindex[i]);
624 vring->rx_pindex = NULL;
626 if (vring->rxpfn_tbl)
627 free(vring->rxpfn_tbl);
628 vring->rxpfn_tbl = NULL;
630 if (vring->txvring_addr) {
631 munmap(vring->txvring_addr, vring->txpfn_num * pg_sz);
632 RTE_LOG(INFO, XENHOST, " %s: unmap tx vring [%p, %p]\n",
635 RTE_PTR_ADD(vring->txvring_addr,
636 vring->txpfn_num * pg_sz - 1));
638 vring->txvring_addr = NULL;
640 if (vring->tx_pindex) {
641 RTE_LOG(INFO, XENHOST, " %s: unmap tx vring %u grefs\n", __func__, vring->txpfn_num);
642 for (i = 0; i < vring->txpfn_num; i++) {
643 xen_unmap_grant_ref(vring->tx_pindex[i]);
646 vring->tx_pindex = NULL;
648 if (vring->txpfn_tbl)
649 free(vring->txpfn_tbl);
650 vring->txpfn_tbl = NULL;
653 if (!munmap((void *)vring->flag, pg_sz))
654 RTE_LOG(INFO, XENHOST, " %s: unmap flag page at %p\n", __func__, vring->flag);
655 if (!xen_unmap_grant_ref(vring->flag_index))
656 RTE_LOG(INFO, XENHOST, " %s: release flag ref index 0x%" PRIx64 "\n", __func__, vring->flag_index);
665 xen_parse_etheraddr(struct xen_vring *vring)
667 char path[PATH_MAX] = {0};
672 snprintf(path, sizeof(path),
673 XEN_VM_ROOTNODE_FMT"/%d_"XEN_ADDR_SUFFIX,
674 vring->dom_id, vring->virtio_idx);
676 if ((buf = xen_read_node(path, &len)) == NULL)
679 if (cmdline_parse_etheraddr(NULL, buf, &vring->addr,
680 sizeof(vring->addr)) < 0)
691 parse_vringnode(struct xen_guest *guest, uint32_t virtio_idx)
693 char path[PATH_MAX] = {0};
694 struct xen_gntnode *rx_gntnode = NULL;
695 struct xen_gntnode *tx_gntnode = NULL;
696 struct xen_vring *vring = NULL;
698 /*check if null terminated */
699 snprintf(path, sizeof(path),
700 XEN_VM_ROOTNODE_FMT"/%d_"XEN_RXVRING_SUFFIX,
704 RTE_LOG(INFO, XENHOST, " %s: virtio %u parse rx gntnode %s\n", __func__, virtio_idx, path);
705 rx_gntnode = parse_gntnode(guest->dom_id, path);
706 if (rx_gntnode == NULL)
709 /*check if null terminated */
710 snprintf(path, sizeof(path),
711 XEN_VM_ROOTNODE_FMT"/%d_"XEN_TXVRING_SUFFIX,
715 RTE_LOG(INFO, XENHOST, " %s: virtio %u parse tx gntnode %s\n", __func__, virtio_idx, path);
716 tx_gntnode = parse_gntnode(guest->dom_id, path);
717 if (tx_gntnode == NULL)
720 vring = &guest->vring[virtio_idx];
721 bzero(vring, sizeof(*vring));
722 vring->dom_id = guest->dom_id;
723 vring->virtio_idx = virtio_idx;
725 if (xen_parse_etheraddr(vring) != 0)
728 RTE_LOG(INFO, XENHOST, " %s: virtio %u map rx gntnode %s\n", __func__, virtio_idx, path);
729 if (xen_map_rxvringnode(rx_gntnode, vring) != 0)
732 RTE_LOG(INFO, XENHOST, " %s: virtio %u map tx gntnode %s\n", __func__, virtio_idx, path);
733 if (xen_map_txvringnode(tx_gntnode, vring) != 0)
736 if (xen_map_vringflag(vring) != 0)
741 xen_free_gntnode(rx_gntnode);
742 xen_free_gntnode(tx_gntnode);
748 xen_free_gntnode(rx_gntnode);
750 xen_free_gntnode(tx_gntnode);
752 cleanup_vring(vring);
753 bzero(vring, sizeof(*vring));
759 * Open xen grant dev driver
761 * 0 on success, -1 on failure.
766 d_fd = open(XEN_GNTDEV_FNAME, O_RDWR);
768 return d_fd == -1? (-1): (0);
772 * Initialise xenstore handle and open grant dev driver.
774 * 0 on success, -1 on failure.
779 xs = xs_daemon_open();
781 rte_panic("failed initialize xen daemon handler");
784 if (xen_grant_init())