2 * This file is provided under a dual BSD/GPLv2 license. When using or
3 * redistributing this file, you may do so under either license.
7 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
21 * The full GNU General Public License is included in this distribution
22 * in the file called LICENSE.GPL.
24 * Contact Information:
29 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
30 * All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
36 * * Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * * Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in
40 * the documentation and/or other materials provided with the
42 * * Neither the name of Intel Corporation nor the names of its
43 * contributors may be used to endorse or promote products derived
44 * from this software without specific prior written permission.
46 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
47 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
48 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
49 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
50 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
51 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
52 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
53 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
54 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
55 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
56 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
60 #include <linux/module.h>
61 #include <linux/miscdevice.h>
63 #include <linux/device.h>
64 #include <linux/errno.h>
65 #include <linux/vmalloc.h>
67 #include <linux/version.h>
71 #include <xen/xen-ops.h>
72 #include <xen/interface/memory.h>
74 #include <rte_config.h>
75 #include <exec-env/rte_dom0_common.h>
78 #include "dom0_mm_dev.h"
80 MODULE_LICENSE("Dual BSD/GPL");
81 MODULE_AUTHOR("Intel Corporation");
82 MODULE_DESCRIPTION("Kernel Module for supporting DPDK running on Xen Dom0");
84 static struct dom0_mm_dev dom0_dev;
85 static struct kobject *dom0_kobj = NULL;
87 static struct memblock_info *rsv_mm_info;
89 /* Default configuration for reserved memory size(2048 MB). */
90 static uint32_t rsv_memsize = 2048;
92 static int dom0_open(struct inode *inode, struct file *file);
93 static int dom0_release(struct inode *inode, struct file *file);
94 static int dom0_ioctl(struct file *file, unsigned int ioctl_num,
95 unsigned long ioctl_param);
96 static int dom0_mmap(struct file *file, struct vm_area_struct *vma);
97 static int dom0_memory_free(uint32_t size);
98 static int dom0_memory_release(struct dom0_mm_data *mm_data);
100 static const struct file_operations data_fops = {
101 .owner = THIS_MODULE,
103 .release = dom0_release,
105 .unlocked_ioctl = (void *)dom0_ioctl,
109 show_memsize_rsvd(struct device *dev, struct device_attribute *attr, char *buf)
111 return snprintf(buf, 10, "%u\n", dom0_dev.used_memsize);
115 show_memsize(struct device *dev, struct device_attribute *attr, char *buf)
117 return snprintf(buf, 10, "%u\n", dom0_dev.config_memsize);
121 store_memsize(struct device *dev, struct device_attribute *attr,
122 const char *buf, size_t count)
125 unsigned long mem_size;
127 if (0 != kstrtoul(buf, 0, &mem_size))
130 mutex_lock(&dom0_dev.data_lock);
134 } else if (mem_size > (rsv_memsize - dom0_dev.used_memsize)) {
135 XEN_ERR("configure memory size fail\n");
139 dom0_dev.config_memsize = mem_size;
142 mutex_unlock(&dom0_dev.data_lock);
143 return err ? err : count;
146 static DEVICE_ATTR(memsize, S_IRUGO | S_IWUSR, show_memsize, store_memsize);
147 static DEVICE_ATTR(memsize_rsvd, S_IRUGO, show_memsize_rsvd, NULL);
149 static struct attribute *dev_attrs[] = {
150 &dev_attr_memsize.attr,
151 &dev_attr_memsize_rsvd.attr,
155 /* the memory size unit is MB */
156 static const struct attribute_group dev_attr_grp = {
157 .name = "memsize-mB",
163 sort_viraddr(struct memblock_info *mb, int cnt)
167 uint64_t tmp_viraddr;
169 /*sort virtual address and pfn */
170 for(i = 0; i < cnt; i ++) {
171 for(j = cnt - 1; j > i; j--) {
172 if(mb[j].pfn < mb[j - 1].pfn) {
173 tmp_pfn = mb[j - 1].pfn;
174 mb[j - 1].pfn = mb[j].pfn;
177 tmp_viraddr = mb[j - 1].vir_addr;
178 mb[j - 1].vir_addr = mb[j].vir_addr;
179 mb[j].vir_addr = tmp_viraddr;
186 dom0_find_memdata(const char * mem_name)
190 for(i = 0; i< NUM_MEM_CTX; i++) {
191 if(dom0_dev.mm_data[i] == NULL)
193 if (!strncmp(dom0_dev.mm_data[i]->name, mem_name,
194 sizeof(char) * DOM0_NAME_MAX)) {
204 dom0_find_mempos(void)
209 for(i = 0; i< NUM_MEM_CTX; i++) {
210 if(dom0_dev.mm_data[i] == NULL){
220 dom0_memory_release(struct dom0_mm_data *mm_data)
223 uint32_t num_block, block_id;
225 /* each memory block is 2M */
226 num_block = mm_data->mem_size / SIZE_PER_BLOCK;
230 /* reset global memory data */
231 idx = dom0_find_memdata(mm_data->name);
233 dom0_dev.used_memsize -= mm_data->mem_size;
234 dom0_dev.mm_data[idx] = NULL;
235 dom0_dev.num_mem_ctx--;
238 /* reset these memory blocks status as free */
239 for (idx = 0; idx < num_block; idx++) {
240 block_id = mm_data->block_num[idx];
241 rsv_mm_info[block_id].used = 0;
244 memset(mm_data, 0, sizeof(struct dom0_mm_data));
250 dom0_memory_free(uint32_t rsv_size)
252 uint64_t vstart, vaddr;
253 uint32_t i, num_block, size;
255 if (!xen_pv_domain())
258 /* each memory block is 2M */
259 num_block = rsv_size / SIZE_PER_BLOCK;
263 /* free all memory blocks of size of 4M and destroy contiguous region */
264 for (i = 0; i < dom0_dev.num_bigblock * 2; i += 2) {
265 vstart = rsv_mm_info[i].vir_addr;
267 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
268 if (rsv_mm_info[i].exchange_flag)
269 xen_destroy_contiguous_region(vstart,
270 DOM0_CONTIG_NUM_ORDER);
271 if (rsv_mm_info[i + 1].exchange_flag)
272 xen_destroy_contiguous_region(vstart +
274 DOM0_CONTIG_NUM_ORDER);
276 if (rsv_mm_info[i].exchange_flag)
277 xen_destroy_contiguous_region(rsv_mm_info[i].pfn
279 DOM0_CONTIG_NUM_ORDER);
280 if (rsv_mm_info[i + 1].exchange_flag)
281 xen_destroy_contiguous_region(rsv_mm_info[i].pfn
282 * PAGE_SIZE + DOM0_MEMBLOCK_SIZE,
283 DOM0_CONTIG_NUM_ORDER);
286 size = DOM0_MEMBLOCK_SIZE * 2;
289 ClearPageReserved(virt_to_page(vaddr));
293 free_pages(vstart, MAX_NUM_ORDER);
297 /* free all memory blocks size of 2M and destroy contiguous region */
298 for (; i < num_block; i++) {
299 vstart = rsv_mm_info[i].vir_addr;
301 if (rsv_mm_info[i].exchange_flag)
302 xen_destroy_contiguous_region(vstart,
303 DOM0_CONTIG_NUM_ORDER);
305 size = DOM0_MEMBLOCK_SIZE;
308 ClearPageReserved(virt_to_page(vaddr));
312 free_pages(vstart, DOM0_CONTIG_NUM_ORDER);
316 memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block);
324 find_free_memory(uint32_t count, struct dom0_mm_data *mm_data)
329 while ((i < count) && (j < rsv_memsize / SIZE_PER_BLOCK)) {
330 if (rsv_mm_info[j].used == 0) {
331 mm_data->block_info[i].pfn = rsv_mm_info[j].pfn;
332 mm_data->block_info[i].vir_addr =
333 rsv_mm_info[j].vir_addr;
334 mm_data->block_info[i].mfn = rsv_mm_info[j].mfn;
335 mm_data->block_info[i].exchange_flag =
336 rsv_mm_info[j].exchange_flag;
337 mm_data->block_num[i] = j;
338 rsv_mm_info[j].used = 1;
346 * Find all memory segments in which physical addresses are contiguous.
349 find_memseg(int count, struct dom0_mm_data * mm_data)
353 uint64_t zone_len, pfn, num_block;
356 if (mm_data->block_info[i].exchange_flag == 0) {
361 pfn = mm_data->block_info[i].pfn;
362 mm_data->seg_info[idx].pfn = pfn;
363 mm_data->seg_info[idx].mfn[k] = mm_data->block_info[i].mfn;
365 for (j = i + 1; j < count; j++) {
367 /* ignore exchange fail memory block */
368 if (mm_data->block_info[j].exchange_flag == 0)
371 if (mm_data->block_info[j].pfn !=
372 (mm_data->block_info[j - 1].pfn +
373 DOM0_MEMBLOCK_SIZE / PAGE_SIZE))
376 mm_data->seg_info[idx].mfn[k] = mm_data->block_info[j].mfn;
380 zone_len = num_block * DOM0_MEMBLOCK_SIZE;
381 mm_data->seg_info[idx].size = zone_len;
383 XEN_PRINT("memseg id=%d, size=0x%llx\n", idx, zone_len);
386 if (idx == DOM0_NUM_MEMSEG)
389 mm_data->num_memseg = idx;
393 dom0_memory_reserve(uint32_t rsv_size)
395 uint64_t pfn, vstart, vaddr;
396 uint32_t i, num_block, size, allocated_size = 0;
398 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
399 dma_addr_t dma_handle;
402 /* 2M as memory block */
403 num_block = rsv_size / SIZE_PER_BLOCK;
405 rsv_mm_info = vmalloc(sizeof(struct memblock_info) * num_block);
407 XEN_ERR("Unable to allocate device memory information\n");
410 memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block);
412 /* try alloc size of 4M once */
413 for (i = 0; i < num_block; i += 2) {
414 vstart = (unsigned long)
415 __get_free_pages(GFP_ATOMIC, MAX_NUM_ORDER);
419 dom0_dev.num_bigblock = i / 2 + 1;
420 allocated_size = SIZE_PER_BLOCK * (i + 2);
423 size = DOM0_MEMBLOCK_SIZE * 2;
427 SetPageReserved(virt_to_page(vaddr));
432 pfn = virt_to_pfn(vstart);
433 rsv_mm_info[i].pfn = pfn;
434 rsv_mm_info[i].vir_addr = vstart;
435 rsv_mm_info[i + 1].pfn =
436 pfn + DOM0_MEMBLOCK_SIZE / PAGE_SIZE;
437 rsv_mm_info[i + 1].vir_addr =
438 vstart + DOM0_MEMBLOCK_SIZE;
441 /*if it failed to alloc 4M, and continue to alloc 2M once */
442 for (; i < num_block; i++) {
443 vstart = (unsigned long)
444 __get_free_pages(GFP_ATOMIC, DOM0_CONTIG_NUM_ORDER);
446 XEN_ERR("allocate memory fail.\n");
447 dom0_memory_free(allocated_size);
451 allocated_size += SIZE_PER_BLOCK;
453 size = DOM0_MEMBLOCK_SIZE;
456 SetPageReserved(virt_to_page(vaddr));
460 pfn = virt_to_pfn(vstart);
461 rsv_mm_info[i].pfn = pfn;
462 rsv_mm_info[i].vir_addr = vstart;
465 sort_viraddr(rsv_mm_info, num_block);
467 for (i = 0; i< num_block; i++) {
470 * This API is used to exchage MFN for getting a block of
471 * contiguous physical addresses, its maximum size is 2M.
473 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
474 if (xen_create_contiguous_region(rsv_mm_info[i].vir_addr,
475 DOM0_CONTIG_NUM_ORDER, 0) == 0) {
477 if (xen_create_contiguous_region(rsv_mm_info[i].pfn * PAGE_SIZE,
478 DOM0_CONTIG_NUM_ORDER, 0, &dma_handle) == 0) {
480 rsv_mm_info[i].exchange_flag = 1;
482 pfn_to_mfn(rsv_mm_info[i].pfn);
483 rsv_mm_info[i].used = 0;
485 XEN_ERR("exchange memeory fail\n");
486 rsv_mm_info[i].exchange_flag = 0;
487 dom0_dev.fail_times++;
488 if (dom0_dev.fail_times > MAX_EXCHANGE_FAIL_TIME) {
489 dom0_memory_free(rsv_size);
499 dom0_prepare_memsegs(struct memory_info *meminfo, struct dom0_mm_data *mm_data)
504 /* check if there is a free name buffer */
505 memcpy(mm_data->name, meminfo->name, DOM0_NAME_MAX);
506 mm_data->name[DOM0_NAME_MAX - 1] = '\0';
507 idx = dom0_find_mempos();
511 num_block = meminfo->size / SIZE_PER_BLOCK;
512 /* find free memory and new memory segments*/
513 find_free_memory(num_block, mm_data);
514 find_memseg(num_block, mm_data);
516 /* update private memory data */
518 mm_data->mem_size = meminfo->size;
520 /* update global memory data */
521 dom0_dev.mm_data[idx] = mm_data;
522 dom0_dev.num_mem_ctx++;
523 dom0_dev.used_memsize += mm_data->mem_size;
529 dom0_check_memory (struct memory_info *meminfo)
534 /* round memory size to the next even number. */
535 if (meminfo->size % 2)
538 mem_size = meminfo->size;
539 if (dom0_dev.num_mem_ctx > NUM_MEM_CTX) {
540 XEN_ERR("Memory data space is full in Dom0 driver\n");
543 idx = dom0_find_memdata(meminfo->name);
545 XEN_ERR("Memory data name %s has already exsited in Dom0 driver.\n",
549 if ((dom0_dev.used_memsize + mem_size) > rsv_memsize) {
550 XEN_ERR("Total size can't be larger than reserved size.\n");
563 if (rsv_memsize > DOM0_CONFIG_MEMSIZE) {
564 XEN_ERR("The reserved memory size cannot be greater than %d\n",
565 DOM0_CONFIG_MEMSIZE);
569 /* Setup the misc device */
570 dom0_dev.miscdev.minor = MISC_DYNAMIC_MINOR;
571 dom0_dev.miscdev.name = "dom0_mm";
572 dom0_dev.miscdev.fops = &data_fops;
574 /* register misc char device */
575 if (misc_register(&dom0_dev.miscdev) != 0) {
576 XEN_ERR("Misc device registration failed\n");
580 mutex_init(&dom0_dev.data_lock);
581 dom0_kobj = kobject_create_and_add("dom0-mm", mm_kobj);
584 XEN_ERR("dom0-mm object creation failed\n");
585 misc_deregister(&dom0_dev.miscdev);
589 if (sysfs_create_group(dom0_kobj, &dev_attr_grp)) {
590 kobject_put(dom0_kobj);
591 misc_deregister(&dom0_dev.miscdev);
595 if (dom0_memory_reserve(rsv_memsize) < 0) {
596 sysfs_remove_group(dom0_kobj, &dev_attr_grp);
597 kobject_put(dom0_kobj);
598 misc_deregister(&dom0_dev.miscdev);
602 XEN_PRINT("####### DPDK Xen Dom0 module loaded #######\n");
610 if (rsv_mm_info != NULL)
611 dom0_memory_free(rsv_memsize);
613 sysfs_remove_group(dom0_kobj, &dev_attr_grp);
614 kobject_put(dom0_kobj);
615 misc_deregister(&dom0_dev.miscdev);
617 XEN_PRINT("####### DPDK Xen Dom0 module unloaded #######\n");
621 dom0_open(struct inode *inode, struct file *file)
623 file->private_data = NULL;
625 XEN_PRINT(KERN_INFO "/dev/dom0_mm opened\n");
630 dom0_release(struct inode *inode, struct file *file)
633 struct dom0_mm_data *mm_data = file->private_data;
638 mutex_lock(&dom0_dev.data_lock);
639 if (--mm_data->refcnt == 0)
640 ret = dom0_memory_release(mm_data);
641 mutex_unlock(&dom0_dev.data_lock);
643 file->private_data = NULL;
644 XEN_PRINT(KERN_INFO "/dev/dom0_mm closed\n");
649 dom0_mmap(struct file *file, struct vm_area_struct *vm)
652 uint32_t idx = vm->vm_pgoff;
653 uint64_t pfn, size = vm->vm_end - vm->vm_start;
654 struct dom0_mm_data *mm_data = file->private_data;
659 mutex_lock(&dom0_dev.data_lock);
660 if (idx >= mm_data->num_memseg) {
661 mutex_unlock(&dom0_dev.data_lock);
665 if (size > mm_data->seg_info[idx].size){
666 mutex_unlock(&dom0_dev.data_lock);
670 XEN_PRINT("mmap memseg idx =%d,size = 0x%llx\n", idx, size);
672 pfn = mm_data->seg_info[idx].pfn;
673 mutex_unlock(&dom0_dev.data_lock);
675 status = remap_pfn_range(vm, vm->vm_start, pfn, size, PAGE_SHARED);
680 dom0_ioctl(struct file *file,
681 unsigned int ioctl_num,
682 unsigned long ioctl_param)
685 char name[DOM0_NAME_MAX] = {0};
686 struct memory_info meminfo;
687 struct dom0_mm_data *mm_data = file->private_data;
689 XEN_PRINT("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num, ioctl_param);
692 * Switch according to the ioctl called
694 switch _IOC_NR(ioctl_num) {
695 case _IOC_NR(RTE_DOM0_IOCTL_PREPARE_MEMSEG):
696 ret = copy_from_user(&meminfo, (void *)ioctl_param,
697 sizeof(struct memory_info));
701 if (mm_data != NULL) {
702 XEN_ERR("Cannot create memory segment for the same"
703 " file descriptor\n");
707 /* Allocate private data */
708 mm_data = vmalloc(sizeof(struct dom0_mm_data));
710 XEN_ERR("Unable to allocate device private data\n");
713 memset(mm_data, 0, sizeof(struct dom0_mm_data));
715 mutex_lock(&dom0_dev.data_lock);
716 /* check if we can allocate memory*/
717 if (dom0_check_memory(&meminfo) < 0) {
718 mutex_unlock(&dom0_dev.data_lock);
723 /* allocate memory and created memory segments*/
724 if (dom0_prepare_memsegs(&meminfo, mm_data) < 0) {
725 XEN_ERR("create memory segment fail.\n");
726 mutex_unlock(&dom0_dev.data_lock);
730 file->private_data = mm_data;
731 mutex_unlock(&dom0_dev.data_lock);
734 /* support multiple process in term of memory mapping*/
735 case _IOC_NR(RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG):
736 ret = copy_from_user(name, (void *)ioctl_param,
737 sizeof(char) * DOM0_NAME_MAX);
741 mutex_lock(&dom0_dev.data_lock);
742 idx = dom0_find_memdata(name);
744 mutex_unlock(&dom0_dev.data_lock);
748 mm_data = dom0_dev.mm_data[idx];
750 file->private_data = mm_data;
751 mutex_unlock(&dom0_dev.data_lock);
754 case _IOC_NR(RTE_DOM0_IOCTL_GET_NUM_MEMSEG):
755 ret = copy_to_user((void *)ioctl_param, &mm_data->num_memseg,
761 case _IOC_NR(RTE_DOM0_IOCTL_GET_MEMSEG_INFO):
762 ret = copy_to_user((void *)ioctl_param,
763 &mm_data->seg_info[0],
764 sizeof(struct memseg_info) *
765 mm_data->num_memseg);
770 XEN_PRINT("IOCTL default \n");
777 module_init(dom0_init);
778 module_exit(dom0_exit);
780 module_param(rsv_memsize, uint, S_IRUGO | S_IWUSR);
781 MODULE_PARM_DESC(rsv_memsize, "Xen-dom0 reserved memory size(MB).\n");