4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 #include <sys/types.h>
45 #include <sys/queue.h>
50 #include <sys/ioctl.h>
54 #include <rte_memory.h>
55 #include <rte_memzone.h>
56 #include <rte_launch.h>
58 #include <rte_eal_memconfig.h>
59 #include <rte_per_lcore.h>
60 #include <rte_lcore.h>
61 #include <rte_common.h>
62 #include <rte_string_fns.h>
64 #include "eal_private.h"
65 #include "eal_internal_cfg.h"
66 #include "eal_filesystem.h"
67 #include <exec-env/rte_dom0_common.h>
69 #define PAGE_SIZE RTE_PGSIZE_4K
70 #define DEFAUL_DOM0_NAME "dom0-mem"
72 static int xen_fd = -1;
73 static const char sys_dir_path[] = "/sys/kernel/mm/dom0-mm/memsize-mB";
76 * Try to mmap *size bytes in /dev/zero. If it is successful, return the
77 * pointer to the mmap'd area and keep *size unmodified. Else, retry
78 * with a smaller zone: decrease *size by mem_size until it reaches
79 * 0. In this case, return NULL. Note: this function returns an address
80 * which is a multiple of mem_size size.
83 xen_get_virtual_area(size_t *size, size_t mem_size)
89 RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zu bytes\n", *size);
91 fd = open("/dev/zero", O_RDONLY);
93 RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n");
97 addr = mmap(NULL, (*size) + mem_size, PROT_READ,
99 if (addr == MAP_FAILED)
101 } while (addr == MAP_FAILED && *size > 0);
103 if (addr == MAP_FAILED) {
105 RTE_LOG(ERR, EAL, "Cannot get a virtual area\n");
109 munmap(addr, (*size) + mem_size);
112 /* align addr to a mem_size boundary */
113 aligned_addr = (uintptr_t)addr;
114 aligned_addr = RTE_ALIGN_CEIL(aligned_addr, mem_size);
115 addr = (void *)(aligned_addr);
117 RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
124 * Get memory size configuration from /sys/devices/virtual/misc/dom0_mm
125 * /memsize-mB/memsize file, and the size unit is mB.
128 get_xen_memory_size(void)
131 unsigned long mem_size = 0;
132 static const char *file_name;
134 file_name = "memsize";
135 snprintf(path, sizeof(path), "%s/%s",
136 sys_dir_path, file_name);
138 if (eal_parse_sysfs_value(path, &mem_size) < 0)
142 rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s was not"
143 " configured.\n",sys_dir_path, file_name);
145 rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s must be"
146 " even number.\n",sys_dir_path, file_name);
148 if (mem_size > DOM0_CONFIG_MEMSIZE)
149 rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s should not be larger"
150 " than %d mB\n",sys_dir_path, file_name, DOM0_CONFIG_MEMSIZE);
156 * Based on physical address to caculate MFN in Xen Dom0.
159 rte_xen_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr)
162 uint64_t mfn, mfn_offset;
163 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
164 struct rte_memseg *memseg = mcfg->memseg;
166 /* find the memory segment owning the physical address */
167 if (memseg_id == -1) {
168 for (i = 0; i < RTE_MAX_MEMSEG; i++) {
169 if ((phy_addr >= memseg[i].phys_addr) &&
170 (phys_addr < memseg[i].phys_addr +
177 return RTE_BAD_PHYS_ADDR;
180 mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M;
182 /*the MFN is contiguous in 2M */
183 mfn_offset = (phy_addr - memseg[memseg_id].phys_addr) %
184 RTE_PGSIZE_2M / PAGE_SIZE;
185 mfn = mfn_offset + memseg[memseg_id].mfn[mfn_id];
187 /** return mechine address */
188 return mfn * PAGE_SIZE + phy_addr % PAGE_SIZE;
192 rte_xen_dom0_memory_init(void)
194 void *vir_addr, *vma_addr = NULL;
196 uint32_t i, requested, mem_size, memseg_idx, num_memseg = 0;
198 struct memory_info meminfo;
199 struct memseg_info seginfo[RTE_MAX_MEMSEG];
200 int flags, page_size = getpagesize();
201 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
202 struct rte_memseg *memseg = mcfg->memseg;
203 uint64_t total_mem = internal_config.memory;
205 memset(seginfo, 0, sizeof(seginfo));
206 memset(&meminfo, 0, sizeof(struct memory_info));
208 mem_size = get_xen_memory_size();
209 requested = (unsigned) (total_mem / 0x100000);
210 if (requested > mem_size)
211 /* if we didn't satisfy total memory requirements */
212 rte_exit(EXIT_FAILURE,"Not enough memory available! Requested: %uMB,"
213 " available: %uMB\n", requested, mem_size);
214 else if (total_mem != 0)
215 mem_size = requested;
217 /* Check FD and open once */
219 xen_fd = open(DOM0_MM_DEV, O_RDWR);
221 RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
226 meminfo.size = mem_size;
228 /* construct memory mangement name for Dom0 */
229 snprintf(meminfo.name, DOM0_NAME_MAX, "%s-%s",
230 internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
232 /* Notify kernel driver to allocate memory */
233 ret = ioctl(xen_fd, RTE_DOM0_IOCTL_PREPARE_MEMSEG, &meminfo);
235 RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memory\n");
240 /* Get number of memory segment from driver */
241 ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_NUM_MEMSEG, &num_memseg);
243 RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg count.\n");
248 if(num_memseg > RTE_MAX_MEMSEG){
249 RTE_LOG(ERR, EAL, "XEN DOM0: the memseg count %d is greater"
250 " than max memseg %d.\n",num_memseg, RTE_MAX_MEMSEG);
255 /* get all memory segements information */
256 ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_MEMSEG_INFO, seginfo);
258 RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg info.\n");
263 /* map all memory segments to contiguous user space */
264 for (memseg_idx = 0; memseg_idx < num_memseg; memseg_idx++)
266 vma_len = seginfo[memseg_idx].size;
269 * get the biggest virtual memory area up to vma_len. If it fails,
270 * vma_addr is NULL, so let the kernel provide the address.
272 vma_addr = xen_get_virtual_area(&vma_len, RTE_PGSIZE_2M);
273 if (vma_addr == NULL) {
275 vma_len = RTE_PGSIZE_2M;
277 flags = MAP_SHARED | MAP_FIXED;
279 seginfo[memseg_idx].size = vma_len;
280 vir_addr = mmap(vma_addr, seginfo[memseg_idx].size,
281 PROT_READ|PROT_WRITE, flags, xen_fd,
282 memseg_idx * page_size);
283 if (vir_addr == MAP_FAILED) {
284 RTE_LOG(ERR, EAL, "XEN DOM0:Could not mmap %s\n",
290 memseg[memseg_idx].addr = vir_addr;
291 memseg[memseg_idx].phys_addr = page_size *
292 seginfo[memseg_idx].pfn ;
293 memseg[memseg_idx].len = seginfo[memseg_idx].size;
294 for ( i = 0; i < seginfo[memseg_idx].size / RTE_PGSIZE_2M; i++)
295 memseg[memseg_idx].mfn[i] = seginfo[memseg_idx].mfn[i];
297 /* MFNs are continuous in 2M, so assume that page size is 2M */
298 memseg[memseg_idx].hugepage_sz = RTE_PGSIZE_2M;
300 memseg[memseg_idx].nchannel = mcfg->nchannel;
301 memseg[memseg_idx].nrank = mcfg->nrank;
303 /* NUMA is not suppoted in Xen Dom0, so only set socket 0*/
304 memseg[memseg_idx].socket_id = 0;
317 * This creates the memory mappings in the secondary process to match that of
318 * the server process. It goes through each memory segment in the DPDK runtime
319 * configuration, mapping them in order to form a contiguous block in the
320 * virtual memory space
323 rte_xen_dom0_memory_attach(void)
325 const struct rte_mem_config *mcfg;
326 unsigned s = 0; /* s used to track the segment number */
330 char name[DOM0_NAME_MAX] = {0};
331 int page_size = getpagesize();
333 mcfg = rte_eal_get_configuration()->mem_config;
335 /* Check FD and open once */
337 xen_fd = open(DOM0_MM_DEV, O_RDWR);
339 RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
344 /* construct memory mangement name for Dom0 */
345 snprintf(name, DOM0_NAME_MAX, "%s-%s",
346 internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
347 /* attach to memory segments of primary process */
348 ret = ioctl(xen_fd, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG, name);
350 RTE_LOG(ERR, EAL,"attach memory segments fail.\n");
354 /* map all segments into memory to make sure we get the addrs */
355 for (s = 0; s < RTE_MAX_MEMSEG; ++s) {
358 * the first memory segment with len==0 is the one that
359 * follows the last valid segment.
361 if (mcfg->memseg[s].len == 0)
364 vir_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
365 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, xen_fd,
367 if (vir_addr == MAP_FAILED) {
368 RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
369 "in %s to requested address [%p]\n",
370 (unsigned long long)mcfg->memseg[s].len, DOM0_MM_DEV,
371 mcfg->memseg[s].addr);