4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 #include <sys/ioctl.h>
41 #include <xen/sys/gntalloc.h>
42 #include <xen/sys/gntdev.h>
43 #include <xen/xen-compat.h>
44 #if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
50 #include <rte_common.h>
51 #include <rte_memory.h>
53 #include <rte_malloc.h>
54 #include <rte_string_fns.h>
56 #include <rte_debug.h>
58 #include "xen_vhost.h"
61 static struct xs_handle *xs = NULL;
63 /* gntdev file descriptor to map grant pages */
67 * The grant node format in xenstore for vring/mpool is like:
68 * idx#_rx_vring_gref = "gref1#, gref2#, gref3#"
69 * idx#_mempool_gref = "gref1#, gref2#, gref3#"
70 * each gref# is the grant reference for a shared page.
71 * In each shared page, we store the grant_node_item items.
73 struct grant_node_item {
76 } __attribute__((packed));
78 int cmdline_parse_etheraddr(void *tk, const char *srcbuf,
79 void *res, unsigned ressize);
81 /* Map grant ref refid at addr_ori*/
83 xen_grant_mmap(void *addr_ori, int domid, int refid, uint64_t *pindex)
85 struct ioctl_gntdev_map_grant_ref arg;
87 int pg_sz = getpagesize();
90 arg.refs[0].domid = domid;
91 arg.refs[0].ref = refid;
93 int rv = ioctl(d_fd, IOCTL_GNTDEV_MAP_GRANT_REF, &arg);
95 RTE_LOG(ERR, XENHOST, " %s: (%d,%d) %s (ioctl failed)\n", __func__,
96 domid, refid, strerror(errno));
100 if (addr_ori == NULL)
101 addr = mmap(addr_ori, pg_sz, PROT_READ|PROT_WRITE, MAP_SHARED,
104 addr = mmap(addr_ori, pg_sz, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_FIXED,
107 if (addr == MAP_FAILED) {
108 RTE_LOG(ERR, XENHOST, " %s: (%d, %d) %s (map failed)\n", __func__,
109 domid, refid, strerror(errno));
119 /* Unmap one grant ref, and munmap must be called before this */
121 xen_unmap_grant_ref(uint64_t index)
123 struct ioctl_gntdev_unmap_grant_ref arg;
128 rv = ioctl(d_fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &arg);
130 RTE_LOG(ERR, XENHOST, " %s: index 0x%" PRIx64 "unmap failed\n", __func__, index);
137 * Reserve a virtual address space.
138 * On success, returns the pointer. On failure, returns NULL.
141 get_xen_virtual(size_t size, size_t page_sz)
144 uintptr_t aligned_addr;
146 addr = mmap(NULL, size + page_sz, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
147 if (addr == MAP_FAILED) {
148 RTE_LOG(ERR, XENHOST, "failed get a virtual area\n");
152 aligned_addr = RTE_ALIGN_CEIL((uintptr_t)addr, page_sz);
153 munmap(addr, aligned_addr - (uintptr_t)addr);
154 munmap((void *)(aligned_addr + size), page_sz + (uintptr_t)addr - aligned_addr);
155 addr = (void *)(aligned_addr);
161 free_xen_virtual(void *addr, size_t size, size_t page_sz __rte_unused)
168 * Returns val str in xenstore.
170 * Full path string for key
172 * Pointer to Val str, NULL on failure
175 xen_read_node(char *path, uint32_t *len)
179 buf = xs_read(xs, XBT_NULL, path, len);
184 cal_pagenum(struct xen_gnt *gnt)
188 * the items in the page are in the format of
189 * gref#,pfn#,...,gref#,pfn#
190 * FIXME, 0 is reserved by system, use it as terminator.
192 for (i = 0; i < (PAGE_PFNNUM) / 2; i++) {
193 if (gnt->gref_pfn[i * 2].gref <= 0)
200 /* Frees memory allocated to a grant node */
202 xen_free_gntnode(struct xen_gntnode *gntnode)
206 free(gntnode->gnt_info);
211 * Parse a grant node.
215 * Full path string for a grant node, like for the following (key, val) pair
216 * idx#_mempool_gref = "gref#, gref#, gref#"
217 * path = 'local/domain/domid/control/dpdk/idx#_mempool_gref'
218 * gref# is a shared page contain packed (gref,pfn) entries
220 * Returns the pointer to xen_gntnode
222 static struct xen_gntnode *
223 parse_gntnode(int dom_id, char *path)
225 char **gref_list = NULL;
226 uint32_t i, len, gref_num;
229 struct xen_gntnode *gntnode = NULL;
230 struct xen_gnt *gnt = NULL;
231 int pg_sz = getpagesize();
235 if ((buf = xen_read_node(path, &len)) == NULL)
238 gref_list = malloc(MAX_GREF_PER_NODE * sizeof(char *));
239 if (gref_list == NULL)
242 gref_num = rte_strsplit(buf, len, gref_list, MAX_GREF_PER_NODE,
243 XEN_GREF_SPLITTOKEN);
245 RTE_LOG(ERR, XENHOST, " %s: invalid grant node format\n", __func__);
249 gntnode = calloc(1, sizeof(struct xen_gntnode));
250 gnt = calloc(gref_num, sizeof(struct xen_gnt));
251 if (gnt == NULL || gntnode == NULL)
254 for (i = 0; i < gref_num; i++) {
256 gnt[i].gref = strtol(gref_list[i], &end, 0);
257 if (errno != 0 || end == NULL || end == gref_list[i] ||
258 (*end != '\0' && *end != XEN_GREF_SPLITTOKEN)) {
259 RTE_LOG(ERR, XENHOST, " %s: parse grant node item failed\n", __func__);
262 addr = xen_grant_mmap(NULL, dom_id, gnt[i].gref, &index);
264 RTE_LOG(ERR, XENHOST, " %s: map gref %u failed\n", __func__, gnt[i].gref);
267 RTE_LOG(INFO, XENHOST, " %s: map gref %u to %p\n", __func__, gnt[i].gref, addr);
268 memcpy(gnt[i].gref_pfn, addr, pg_sz);
269 if (munmap(addr, pg_sz)) {
270 RTE_LOG(INFO, XENHOST, " %s: unmap gref %u failed\n", __func__, gnt[i].gref);
273 if (xen_unmap_grant_ref(index)) {
274 RTE_LOG(INFO, XENHOST, " %s: release gref %u failed\n", __func__, gnt[i].gref);
280 gntnode->gnt_num = gref_num;
281 gntnode->gnt_info = gnt;
296 * This function maps grant node of vring or mbuf pool to a continous virtual address space,
297 * and returns mapped address, pfn array, index array
299 * Pointer to grant node
303 * Pointer to pfn array, caller should free this array
305 * Pointer to number of pages
307 * Pointer to index array, used to release grefs when to free this node
309 * Pointer to mapped virtual address, NULL on failure
312 map_gntnode(struct xen_gntnode *gntnode, int domid, uint32_t **ppfn, uint32_t *pgs, uint64_t **ppindex)
316 size_t total_pages = 0;
320 uint32_t pfn_num = 0;
326 pg_sz = getpagesize();
327 for (i = 0; i < gntnode->gnt_num; i++) {
328 gnt = gntnode->gnt_info + i;
329 total_pages += cal_pagenum(gnt);
331 if ((addr = get_xen_virtual(total_pages * pg_sz, pg_sz)) == NULL) {
332 RTE_LOG(ERR, XENHOST, " %s: failed get_xen_virtual\n", __func__);
335 pfn = calloc(total_pages, (size_t)sizeof(uint32_t));
336 pindex = calloc(total_pages, (size_t)sizeof(uint64_t));
337 if (pfn == NULL || pindex == NULL) {
338 free_xen_virtual(addr, total_pages * pg_sz, pg_sz);
344 RTE_LOG(INFO, XENHOST, " %s: total pages:%zu, map to [%p, %p]\n", __func__, total_pages, addr, RTE_PTR_ADD(addr, total_pages * pg_sz - 1));
345 for (i = 0; i < gntnode->gnt_num; i++) {
346 gnt = gntnode->gnt_info + i;
347 for (j = 0; j < (PAGE_PFNNUM) / 2; j++) {
348 if ((gnt->gref_pfn[j * 2].gref) <= 0)
350 /*alternative: batch map, or through libxc*/
351 if (xen_grant_mmap(RTE_PTR_ADD(addr, pfn_num * pg_sz),
353 gnt->gref_pfn[j * 2].gref,
354 &pindex[pfn_num]) == NULL) {
357 pfn[pfn_num] = gnt->gref_pfn[j * 2 + 1].pfn_num;
364 munmap(addr, pfn_num * pg_sz);
365 for (i = 0; i < pfn_num; i++) {
366 xen_unmap_grant_ref(pindex[i]);
388 parse_mpool_va(struct xen_mempool *mempool)
390 char path[PATH_MAX] = {0};
397 snprintf(path, sizeof(path),
398 XEN_VM_ROOTNODE_FMT"/%d_"XEN_GVA_SUFFIX,
399 mempool->dom_id, mempool->pool_idx);
401 if((buf = xen_read_node(path, &len)) == NULL)
403 mempool->gva = (void *)strtoul(buf, &end, 16);
404 if (errno != 0 || end == NULL || end == buf || *end != '\0') {
418 map_mempoolnode(struct xen_gntnode *gntnode,
419 struct xen_mempool *mempool)
421 if (gntnode == NULL || mempool == NULL)
425 map_gntnode(gntnode, mempool->dom_id, &mempool->mempfn_tbl, &mempool->mempfn_num, &mempool->pindex);
427 RTE_LOG(INFO, XENHOST, " %s: map mempool at %p\n", __func__, (void *)mempool->hva);
436 cleanup_mempool(struct xen_mempool *mempool)
438 int pg_sz = getpagesize();
442 munmap(mempool->hva, mempool->mempfn_num * pg_sz);
445 if (mempool->pindex) {
446 RTE_LOG(INFO, XENHOST, " %s: unmap dom %02u mempool%02u %u grefs\n",
450 mempool->mempfn_num);
451 for (i = 0; i < mempool->mempfn_num; i ++) {
452 xen_unmap_grant_ref(mempool->pindex[i]);
455 mempool->pindex = NULL;
457 free(mempool->mempfn_tbl);
458 mempool->mempfn_tbl = NULL;
462 * process mempool node idx#_mempool_gref, idx = 0, 1, 2...
463 * untill we encounter a node that doesn't exist.
466 parse_mempoolnode(struct xen_guest *guest)
469 char path[PATH_MAX] = {0};
470 struct xen_gntnode *gntnode = NULL;
471 struct xen_mempool *mempool = NULL;
474 bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0]));
478 /* check if null terminated */
479 snprintf(path, sizeof(path),
480 XEN_VM_ROOTNODE_FMT"/%d_"XEN_MEMPOOL_SUFFIX,
484 if ((buf = xen_read_node(path, &len)) != NULL) {
485 /* this node exists */
488 if (guest->pool_num == 0) {
489 RTE_LOG(ERR, PMD, "no mempool found\n");
495 mempool = &guest->mempool[guest->pool_num];
496 mempool->dom_id = guest->dom_id;
497 mempool->pool_idx = guest->pool_num;
499 RTE_LOG(INFO, XENHOST, " %s: mempool %u parse gntnode %s\n", __func__, guest->pool_num, path);
500 gntnode = parse_gntnode(guest->dom_id, path);
504 if (parse_mpool_va(mempool))
507 RTE_LOG(INFO, XENHOST, " %s: mempool %u map gntnode %s\n", __func__, guest->pool_num, path);
508 if (map_mempoolnode(gntnode, mempool))
511 xen_free_gntnode(gntnode);
518 xen_free_gntnode(gntnode);
519 for (i = 0; i < MAX_XENVIRT_MEMPOOL ; i++) {
520 cleanup_mempool(&guest->mempool[i]);
522 /* reinitialise mempool */
523 bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0]));
528 xen_map_vringflag(struct xen_vring *vring)
530 char path[PATH_MAX] = {0};
533 int pg_sz = getpagesize();
536 snprintf(path, sizeof(path),
537 XEN_VM_ROOTNODE_FMT"/%d_"XEN_VRINGFLAG_SUFFIX,
538 vring->dom_id, vring->virtio_idx);
540 if((buf = xen_read_node(path, &len)) == NULL)
544 gref = strtol(buf, &end, 0);
545 if (errno != 0 || end == NULL || end == buf) {
548 vring->flag = xen_grant_mmap(0, vring->dom_id, gref, &vring->flag_index);
549 if (vring->flag == NULL || *vring->flag == 0)
557 munmap(vring->flag, pg_sz);
559 xen_unmap_grant_ref(vring->flag_index);
566 xen_map_rxvringnode(struct xen_gntnode *gntnode,
567 struct xen_vring *vring)
569 vring->rxvring_addr =
570 map_gntnode(gntnode, vring->dom_id, &vring->rxpfn_tbl, &vring->rxpfn_num, &vring->rx_pindex);
571 RTE_LOG(INFO, XENHOST, " %s: map rx vring at %p\n", __func__, (void *)vring->rxvring_addr);
572 if (vring->rxvring_addr)
579 xen_map_txvringnode(struct xen_gntnode *gntnode,
580 struct xen_vring *vring)
582 vring->txvring_addr =
583 map_gntnode(gntnode, vring->dom_id, &vring->txpfn_tbl, &vring->txpfn_num, &vring->tx_pindex);
584 RTE_LOG(INFO, XENHOST, " %s: map tx vring at %p\n", __func__, (void *)vring->txvring_addr);
585 if (vring->txvring_addr)
592 cleanup_vring(struct xen_vring *vring)
594 int pg_sz = getpagesize();
597 RTE_LOG(INFO, XENHOST, " %s: cleanup dom %u vring %u\n", __func__, vring->dom_id, vring->virtio_idx);
598 if (vring->rxvring_addr) {
599 munmap(vring->rxvring_addr, vring->rxpfn_num * pg_sz);
600 RTE_LOG(INFO, XENHOST, " %s: unmap rx vring [%p, %p]\n",
603 RTE_PTR_ADD(vring->rxvring_addr,
604 vring->rxpfn_num * pg_sz - 1));
606 vring->rxvring_addr = NULL;
609 if (vring->rx_pindex) {
610 RTE_LOG(INFO, XENHOST, " %s: unmap rx vring %u grefs\n", __func__, vring->rxpfn_num);
611 for (i = 0; i < vring->rxpfn_num; i++) {
612 xen_unmap_grant_ref(vring->rx_pindex[i]);
615 vring->rx_pindex = NULL;
617 free(vring->rxpfn_tbl);
618 vring->rxpfn_tbl = NULL;
620 if (vring->txvring_addr) {
621 munmap(vring->txvring_addr, vring->txpfn_num * pg_sz);
622 RTE_LOG(INFO, XENHOST, " %s: unmap tx vring [%p, %p]\n",
625 RTE_PTR_ADD(vring->txvring_addr,
626 vring->txpfn_num * pg_sz - 1));
628 vring->txvring_addr = NULL;
630 if (vring->tx_pindex) {
631 RTE_LOG(INFO, XENHOST, " %s: unmap tx vring %u grefs\n", __func__, vring->txpfn_num);
632 for (i = 0; i < vring->txpfn_num; i++) {
633 xen_unmap_grant_ref(vring->tx_pindex[i]);
636 vring->tx_pindex = NULL;
638 free(vring->txpfn_tbl);
639 vring->txpfn_tbl = NULL;
642 if (!munmap((void *)vring->flag, pg_sz))
643 RTE_LOG(INFO, XENHOST, " %s: unmap flag page at %p\n", __func__, vring->flag);
644 if (!xen_unmap_grant_ref(vring->flag_index))
645 RTE_LOG(INFO, XENHOST, " %s: release flag ref index 0x%" PRIx64 "\n", __func__, vring->flag_index);
654 xen_parse_etheraddr(struct xen_vring *vring)
656 char path[PATH_MAX] = {0};
661 snprintf(path, sizeof(path),
662 XEN_VM_ROOTNODE_FMT"/%d_"XEN_ADDR_SUFFIX,
663 vring->dom_id, vring->virtio_idx);
665 if ((buf = xen_read_node(path, &len)) == NULL)
668 if (cmdline_parse_etheraddr(NULL, buf, &vring->addr,
669 sizeof(vring->addr)) < 0)
679 parse_vringnode(struct xen_guest *guest, uint32_t virtio_idx)
681 char path[PATH_MAX] = {0};
682 struct xen_gntnode *rx_gntnode = NULL;
683 struct xen_gntnode *tx_gntnode = NULL;
684 struct xen_vring *vring = NULL;
686 /*check if null terminated */
687 snprintf(path, sizeof(path),
688 XEN_VM_ROOTNODE_FMT"/%d_"XEN_RXVRING_SUFFIX,
692 RTE_LOG(INFO, XENHOST, " %s: virtio %u parse rx gntnode %s\n", __func__, virtio_idx, path);
693 rx_gntnode = parse_gntnode(guest->dom_id, path);
694 if (rx_gntnode == NULL)
697 /*check if null terminated */
698 snprintf(path, sizeof(path),
699 XEN_VM_ROOTNODE_FMT"/%d_"XEN_TXVRING_SUFFIX,
703 RTE_LOG(INFO, XENHOST, " %s: virtio %u parse tx gntnode %s\n", __func__, virtio_idx, path);
704 tx_gntnode = parse_gntnode(guest->dom_id, path);
705 if (tx_gntnode == NULL)
708 vring = &guest->vring[virtio_idx];
709 bzero(vring, sizeof(*vring));
710 vring->dom_id = guest->dom_id;
711 vring->virtio_idx = virtio_idx;
713 if (xen_parse_etheraddr(vring) != 0)
716 RTE_LOG(INFO, XENHOST, " %s: virtio %u map rx gntnode %s\n", __func__, virtio_idx, path);
717 if (xen_map_rxvringnode(rx_gntnode, vring) != 0)
720 RTE_LOG(INFO, XENHOST, " %s: virtio %u map tx gntnode %s\n", __func__, virtio_idx, path);
721 if (xen_map_txvringnode(tx_gntnode, vring) != 0)
724 if (xen_map_vringflag(vring) != 0)
729 xen_free_gntnode(rx_gntnode);
730 xen_free_gntnode(tx_gntnode);
736 xen_free_gntnode(rx_gntnode);
738 xen_free_gntnode(tx_gntnode);
740 cleanup_vring(vring);
741 bzero(vring, sizeof(*vring));
747 * Open xen grant dev driver
749 * 0 on success, -1 on failure.
754 d_fd = open(XEN_GNTDEV_FNAME, O_RDWR);
756 return d_fd == -1? (-1): (0);
760 * Initialise xenstore handle and open grant dev driver.
762 * 0 on success, -1 on failure.
767 xs = xs_daemon_open();
769 rte_panic("failed initialize xen daemon handler");
772 if (xen_grant_init())