2 * This file is provided under a dual BSD/GPLv2 license. When using or
3 * redistributing this file, you may do so under either license.
7 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
21 * The full GNU General Public License is included in this distribution
22 * in the file called LICENSE.GPL.
24 * Contact Information:
29 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
30 * All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
36 * * Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * * Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in
40 * the documentation and/or other materials provided with the
42 * * Neither the name of Intel Corporation nor the names of its
43 * contributors may be used to endorse or promote products derived
44 * from this software without specific prior written permission.
46 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
47 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
48 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
49 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
50 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
51 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
52 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
53 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
54 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
55 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
56 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
60 #include <linux/module.h>
61 #include <linux/miscdevice.h>
63 #include <linux/device.h>
64 #include <linux/errno.h>
65 #include <linux/vmalloc.h>
70 #include <xen/xen-ops.h>
71 #include <xen/interface/memory.h>
73 #include <rte_config.h>
74 #include <exec-env/rte_dom0_common.h>
76 #include "dom0_mm_dev.h"
78 MODULE_LICENSE("Dual BSD/GPL");
79 MODULE_AUTHOR("Intel Corporation");
80 MODULE_DESCRIPTION("Kernel Module for supporting DPDK running on Xen Dom0");
82 static struct dom0_mm_dev dom0_dev;
83 static struct kobject *dom0_kobj = NULL;
85 static int dom0_open(struct inode *inode, struct file *file);
86 static int dom0_release(struct inode *inode, struct file *file);
87 static int dom0_ioctl(struct file *file, unsigned int ioctl_num,
88 unsigned long ioctl_param);
89 static int dom0_mmap(struct file *file, struct vm_area_struct *vma);
90 static int dom0_memory_free(struct dom0_mm_data *mm_data);
92 static const struct file_operations data_fops = {
95 .release = dom0_release,
97 .unlocked_ioctl = (void *)dom0_ioctl,
101 show_memsize_rsvd(struct device *dev, struct device_attribute *attr, char *buf)
103 return snprintf(buf, 10, "%u\n", dom0_dev.allocated_memsize);
107 show_memsize(struct device *dev, struct device_attribute *attr, char *buf)
109 return snprintf(buf, 10, "%u\n", dom0_dev.config_memsize);
113 store_memsize(struct device *dev, struct device_attribute *attr,
114 const char *buf, size_t count)
117 unsigned long mem_size;
119 if (0 != strict_strtoul(buf, 0, &mem_size))
122 mutex_lock(&dom0_dev.data_lock);
126 } else if (mem_size < dom0_dev.allocated_memsize ||
127 mem_size > DOM0_CONFIG_MEMSIZE) {
128 XEN_ERR("configure memory size fail\n");
132 dom0_dev.config_memsize = mem_size;
135 mutex_unlock(&dom0_dev.data_lock);
136 return err ? err : count;
139 static DEVICE_ATTR(memsize, S_IRUGO | S_IWUSR, show_memsize, store_memsize);
140 static DEVICE_ATTR(memsize_rsvd, S_IRUGO, show_memsize_rsvd, NULL);
142 static struct attribute *dev_attrs[] = {
143 &dev_attr_memsize.attr,
144 &dev_attr_memsize_rsvd.attr,
148 /* the memory size unit is MB */
149 static const struct attribute_group dev_attr_grp = {
150 .name = "memsize-mB",
156 sort_viraddr(struct memblock_info *mb, int cnt)
160 uint64_t tmp_viraddr;
162 /*sort virtual address and pfn */
163 for(i = 0; i < cnt; i ++) {
164 for(j = cnt - 1; j > i; j--) {
165 if(mb[j].pfn < mb[j - 1].pfn) {
166 tmp_pfn = mb[j - 1].pfn;
167 mb[j - 1].pfn = mb[j].pfn;
170 tmp_viraddr = mb[j - 1].vir_addr;
171 mb[j - 1].vir_addr = mb[j].vir_addr;
172 mb[j].vir_addr = tmp_viraddr;
179 dom0_find_memdata(const char * mem_name)
183 for(i = 0; i< NUM_MEM_CTX; i++) {
184 if(dom0_dev.mm_data[i] == NULL)
186 if (!strncmp(dom0_dev.mm_data[i]->name, mem_name,
187 sizeof(char) * DOM0_NAME_MAX)) {
197 dom0_find_mempos(const char * mem_name)
202 for(i = 0; i< NUM_MEM_CTX; i++) {
203 if(dom0_dev.mm_data[i] == NULL){
213 dom0_memory_free(struct dom0_mm_data * mm_data)
216 uint64_t vstart, vaddr;
217 uint32_t i, num_block, size;
219 if (!xen_pv_domain())
222 /* each memory block is 2M */
223 num_block = mm_data->mem_size / 2;
227 /* free memory and destroy contiguous region in Xen*/
228 for (i = 0; i< num_block; i++) {
229 vstart = mm_data->block_info[i].vir_addr;
231 if (mm_data->block_info[i].exchange_flag)
232 xen_destroy_contiguous_region(vstart,
233 DOM0_CONTIG_NUM_ORDER);
235 size = DOM0_MEMBLOCK_SIZE;
238 ClearPageReserved(virt_to_page(vaddr));
242 free_pages(vstart, DOM0_CONTIG_NUM_ORDER);
246 /* reset global memory data */
247 idx = dom0_find_memdata(mm_data->name);
249 dom0_dev.allocated_memsize -= mm_data->mem_size;
250 dom0_dev.mm_data[idx] = NULL;
251 dom0_dev.num_mem_ctx--;
253 memset(mm_data, 0, sizeof(struct dom0_mm_data));
260 * Find all memory segments in which physical addresses are contiguous.
263 find_memseg(int count, struct dom0_mm_data * mm_data)
267 uint64_t zone_len, pfn, num_block;
270 if (mm_data->block_info[i].exchange_flag == 0) {
275 pfn = mm_data->block_info[i].pfn;
276 mm_data->seg_info[idx].pfn = pfn;
277 mm_data->seg_info[idx].mfn[k] = mm_data->block_info[i].mfn;
279 for (j = i + 1; j < count; j++) {
281 /* ignore exchange fail memory block */
282 if (mm_data->block_info[j].exchange_flag == 0)
285 if (mm_data->block_info[j].pfn !=
286 (mm_data->block_info[j - 1].pfn +
287 DOM0_MEMBLOCK_SIZE / PAGE_SIZE))
290 mm_data->seg_info[idx].mfn[k] = mm_data->block_info[j].mfn;
294 zone_len = num_block * DOM0_MEMBLOCK_SIZE;
295 mm_data->seg_info[idx].size = zone_len;
297 XEN_PRINT("memseg id=%d, size=0x%llx\n", idx, zone_len);
300 if (idx == DOM0_NUM_MEMSEG)
303 mm_data->num_memseg = idx;
307 dom0_prepare_memsegs(struct memory_info* meminfo, struct dom0_mm_data *mm_data)
309 uint64_t pfn, vstart, vaddr;
310 uint32_t i, num_block, size;
313 /* Allocate 2M memory once */
314 num_block = meminfo->size / 2;
316 for (i = 0; i< num_block; i++) {
317 vstart = (unsigned long)
318 __get_free_pages(GFP_ATOMIC, DOM0_CONTIG_NUM_ORDER);
320 XEN_ERR("allocate memory fail.\n");
321 mm_data->mem_size = 2 * i;
322 dom0_memory_free(mm_data);
326 size = DOM0_MEMBLOCK_SIZE;
329 SetPageReserved(virt_to_page(vaddr));
333 pfn = virt_to_pfn(vstart);
334 mm_data->block_info[i].pfn = pfn;
335 mm_data->block_info[i].vir_addr = vstart;
338 sort_viraddr(mm_data->block_info, num_block);
340 for (i = 0; i< num_block; i++) {
343 * This API is used to exchage MFN for getting a block of
344 * contiguous physical addresses, its maximum size is 2M.
346 if (xen_create_contiguous_region(mm_data->block_info[i].vir_addr,
347 DOM0_CONTIG_NUM_ORDER, 0) == 0) {
348 mm_data->block_info[i].exchange_flag = 1;
349 mm_data->block_info[i].mfn =
350 pfn_to_mfn(mm_data->block_info[i].pfn);
352 XEN_ERR("exchange memeory fail\n");
353 mm_data->block_info[i].exchange_flag = 0;
354 mm_data->fail_times++;
355 if (mm_data->fail_times > MAX_EXCHANGE_FAIL_TIME) {
356 mm_data->mem_size = meminfo->size;
357 dom0_memory_free(mm_data);
363 find_memseg(num_block, mm_data);
365 /* update private memory data */
367 mm_data->mem_size = meminfo->size;
368 memcpy(mm_data->name, meminfo->name, DOM0_NAME_MAX);
369 mm_data->name[DOM0_NAME_MAX -1] = '\0';
371 /* update global memory data */
372 idx = dom0_find_mempos(meminfo->name);
374 dom0_memory_free(mm_data);
378 dom0_dev.mm_data[idx] = mm_data;
379 dom0_dev.num_mem_ctx++;
380 dom0_dev.allocated_memsize += mm_data->mem_size;
386 dom0_check_memory (struct memory_info *meminfo)
391 /* round memory size to the next even number. */
392 if (meminfo->size % 2)
395 mem_size = meminfo->size;
396 if (dom0_dev.num_mem_ctx > NUM_MEM_CTX) {
397 XEN_ERR("Memory data space is full in Dom0 driver\n");
400 idx = dom0_find_memdata(meminfo->name);
402 XEN_ERR("Memory data name %s has already exsited in Dom0 driver.\n",
406 if ((dom0_dev.allocated_memsize + mem_size) >
407 dom0_dev.config_memsize) {
408 XEN_ERR("total memory size can't be larger than config memory size.\n");
421 /* Setup the misc device */
422 dom0_dev.miscdev.minor = MISC_DYNAMIC_MINOR;
423 dom0_dev.miscdev.name = "dom0_mm";
424 dom0_dev.miscdev.fops = &data_fops;
426 /* register misc char device */
427 if (misc_register(&dom0_dev.miscdev) != 0) {
428 XEN_ERR("Misc device registration failed\n");
432 mutex_init(&dom0_dev.data_lock);
433 dom0_kobj = kobject_create_and_add("dom0-mm", mm_kobj);
436 XEN_ERR("dom0-mm object creation failed\n");
437 misc_deregister(&dom0_dev.miscdev);
441 if (sysfs_create_group(dom0_kobj, &dev_attr_grp)) {
442 sysfs_remove_group(dom0_kobj, &dev_attr_grp);
443 kobject_put(dom0_kobj);
444 misc_deregister(&dom0_dev.miscdev);
448 XEN_PRINT("####### DPDK Xen Dom0 module loaded #######\n");
455 sysfs_remove_group(dom0_kobj, &dev_attr_grp);
456 kobject_put(dom0_kobj);
457 misc_deregister(&dom0_dev.miscdev);
459 XEN_PRINT("####### DPDK Xen Dom0 module unloaded #######\n");
463 dom0_open(struct inode *inode, struct file *file)
465 file->private_data = NULL;
467 XEN_PRINT(KERN_INFO "/dev/dom0_mm opened\n");
472 dom0_release(struct inode *inode, struct file *file)
475 struct dom0_mm_data *mm_data = file->private_data;
480 mutex_lock(&dom0_dev.data_lock);
481 if (--mm_data->refcnt == 0)
482 ret = dom0_memory_free(mm_data);
483 mutex_unlock(&dom0_dev.data_lock);
485 file->private_data = NULL;
486 XEN_PRINT(KERN_INFO "/dev/dom0_mm closed\n");
491 dom0_mmap(struct file *file, struct vm_area_struct *vm)
494 uint32_t idx = vm->vm_pgoff;
495 uint64_t pfn, size = vm->vm_end - vm->vm_start;
496 struct dom0_mm_data *mm_data = file->private_data;
501 mutex_lock(&dom0_dev.data_lock);
502 if (idx >= mm_data->num_memseg) {
503 mutex_unlock(&dom0_dev.data_lock);
507 if (size > mm_data->seg_info[idx].size){
508 mutex_unlock(&dom0_dev.data_lock);
512 XEN_PRINT("mmap memseg idx =%d,size = 0x%llx\n", idx, size);
514 pfn = mm_data->seg_info[idx].pfn;
515 mutex_unlock(&dom0_dev.data_lock);
517 status = remap_pfn_range(vm, vm->vm_start, pfn, size, PAGE_SHARED);
522 dom0_ioctl(struct file *file,
523 unsigned int ioctl_num,
524 unsigned long ioctl_param)
527 char name[DOM0_NAME_MAX] = {0};
528 struct memory_info meminfo;
529 struct dom0_mm_data *mm_data = file->private_data;
531 XEN_PRINT("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num, ioctl_param);
534 * Switch according to the ioctl called
536 switch _IOC_NR(ioctl_num) {
537 case _IOC_NR(RTE_DOM0_IOCTL_PREPARE_MEMSEG):
538 ret = copy_from_user(&meminfo, (void *)ioctl_param,
539 sizeof(struct memory_info));
543 if (mm_data != NULL) {
544 XEN_ERR("Cannot create memory segment for the same"
545 " file descriptor\n");
549 /* Allocate private data */
550 mm_data = vmalloc(sizeof(struct dom0_mm_data));
552 XEN_ERR("Unable to allocate device private data\n");
555 memset(mm_data, 0, sizeof(struct dom0_mm_data));
557 mutex_lock(&dom0_dev.data_lock);
558 /* check if we can allocate memory*/
559 if (dom0_check_memory(&meminfo) < 0) {
560 mutex_unlock(&dom0_dev.data_lock);
565 /* allocate memories and created memory segments*/
566 if (dom0_prepare_memsegs(&meminfo, mm_data) < 0) {
567 XEN_ERR("create memory segment fail.\n");
568 mutex_unlock(&dom0_dev.data_lock);
572 file->private_data = mm_data;
573 mutex_unlock(&dom0_dev.data_lock);
576 /* support multiple process in term of memory mapping*/
577 case _IOC_NR(RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG):
578 ret = copy_from_user(name, (void *)ioctl_param,
579 sizeof(char) * DOM0_NAME_MAX);
583 mutex_lock(&dom0_dev.data_lock);
584 idx = dom0_find_memdata(name);
586 mutex_unlock(&dom0_dev.data_lock);
590 mm_data = dom0_dev.mm_data[idx];
592 file->private_data = mm_data;
593 mutex_unlock(&dom0_dev.data_lock);
596 case _IOC_NR(RTE_DOM0_IOCTL_GET_NUM_MEMSEG):
597 ret = copy_to_user((void *)ioctl_param, &mm_data->num_memseg,
603 case _IOC_NR(RTE_DOM0_IOCTL_GET_MEMSEG_INFO):
604 ret = copy_to_user((void *)ioctl_param,
605 &mm_data->seg_info[0],
606 sizeof(struct memseg_info) *
607 mm_data->num_memseg);
612 XEN_PRINT("IOCTL default \n");
619 module_init(dom0_init);
620 module_exit(dom0_exit);