4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <rte_eal_memconfig.h>
41 #include <rte_memory.h>
42 #include <rte_ivshmem.h>
43 #include <rte_string_fns.h>
44 #include <rte_common.h>
46 #include <rte_debug.h>
47 #include <rte_spinlock.h>
48 #include <rte_common.h>
49 #include <rte_malloc.h>
51 #include "rte_ivshmem.h"
53 #define IVSHMEM_CONFIG_FILE_FMT "/var/run/.dpdk_ivshmem_metadata_%s"
54 #define IVSHMEM_QEMU_CMD_LINE_HEADER_FMT "-device ivshmem,size=%" PRIu64 "M,shm=fd%s"
55 #define IVSHMEM_QEMU_CMD_FD_FMT ":%s:0x%" PRIx64 ":0x%" PRIx64
56 #define IVSHMEM_QEMU_CMDLINE_BUFSIZE 1024
57 #define IVSHMEM_MAX_PAGES (1 << 12)
58 #define adjacent(x,y) (((x).phys_addr+(x).len)==(y).phys_addr)
59 #define METADATA_SIZE_ALIGNED \
60 (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
62 #define GET_PAGEMAP_ADDR(in,addr,dlm,err) \
66 addr = strtoull((in), &end, 16); \
67 if (errno != 0 || *end != (dlm)) { \
68 RTE_LOG(ERR, EAL, err); \
76 struct memseg_cache_entry {
77 char filepath[PATH_MAX];
82 struct ivshmem_config {
83 struct rte_ivshmem_metadata * metadata;
84 struct memseg_cache_entry memseg_cache[IVSHMEM_MAX_PAGES];
85 /**< account for multiple files per segment case */
90 static struct ivshmem_config
91 ivshmem_global_config[RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES];
93 static rte_spinlock_t global_cfg_sl;
95 static struct ivshmem_config *
96 get_config_by_name(const char * name)
98 struct rte_ivshmem_metadata * config;
101 for (i = 0; i < RTE_DIM(ivshmem_global_config); i++) {
102 config = ivshmem_global_config[i].metadata;
105 if (strncmp(name, config->name, IVSHMEM_NAME_LEN) == 0)
106 return &ivshmem_global_config[i];
113 overlap(const struct rte_memzone * s1, const struct rte_memzone * s2)
115 uint64_t start1, end1, start2, end2;
117 start1 = s1->addr_64;
118 end1 = s1->addr_64 + s1->len;
119 start2 = s2->addr_64;
120 end2 = s2->addr_64 + s2->len;
122 if (start1 >= start2 && start1 < end2)
124 if (start2 >= start1 && start2 < end1)
130 static struct rte_memzone *
131 get_memzone_by_addr(const void * addr)
133 struct rte_memzone * tmp, * mz;
134 struct rte_mem_config * mcfg;
137 mcfg = rte_eal_get_configuration()->mem_config;
140 /* find memzone for the ring */
141 for (i = 0; i < RTE_MAX_MEMZONE; i++) {
142 tmp = &mcfg->memzone[i];
144 if (tmp->addr_64 == (uint64_t) addr) {
154 entry_compare(const void * a, const void * b)
156 const struct rte_ivshmem_metadata_entry * e1 =
157 (const struct rte_ivshmem_metadata_entry*) a;
158 const struct rte_ivshmem_metadata_entry * e2 =
159 (const struct rte_ivshmem_metadata_entry*) b;
161 /* move unallocated zones to the end */
162 if (e1->mz.addr == NULL && e2->mz.addr == NULL)
164 if (e1->mz.addr == 0)
166 if (e2->mz.addr == 0)
169 return e1->mz.phys_addr > e2->mz.phys_addr;
172 /* fills hugepage cache entry for a given start virt_addr */
174 get_hugefile_by_virt_addr(uint64_t virt_addr, struct memseg_cache_entry * e)
176 uint64_t start_addr, end_addr;
177 char *start,*path_end;
178 char buf[PATH_MAX*2];
185 memset(e->filepath, 0, sizeof(e->filepath));
187 /* open /proc/self/maps */
188 f = fopen("/proc/self/maps", "r");
190 RTE_LOG(ERR, EAL, "cannot open /proc/self/maps!\n");
195 while (fgets(buf, sizeof(buf), f) != NULL) {
197 /* get endptr to end of start addr */
200 GET_PAGEMAP_ADDR(start,start_addr,'-',
201 "Cannot find start address in maps!\n");
203 /* if start address is bigger than our address, skip */
204 if (start_addr > virt_addr)
207 GET_PAGEMAP_ADDR(start,end_addr,' ',
208 "Cannot find end address in maps!\n");
210 /* if end address is less than our address, skip */
211 if (end_addr <= virt_addr)
214 /* find where the path starts */
215 start = strstr(start, "/");
220 /* at this point, we know that this is our map.
221 * now let's find the file */
222 path_end = strstr(start, "\n");
226 if (path_end == NULL) {
227 RTE_LOG(ERR, EAL, "Hugefile path not found!\n");
231 /* calculate offset and copy the file path */
232 snprintf(e->filepath, RTE_PTR_DIFF(path_end, start) + 1, "%s", start);
234 e->offset = virt_addr - start_addr;
245 * This is a complex function. What it does is the following:
246 * 1. Goes through metadata and gets list of hugepages involved
247 * 2. Sorts the hugepages by size (1G first)
248 * 3. Goes through metadata again and writes correct offsets
249 * 4. Goes through pages and finds out their filenames, offsets etc.
252 build_config(struct rte_ivshmem_metadata * metadata)
254 struct rte_ivshmem_metadata_entry * e_local;
255 struct memseg_cache_entry * ms_local;
256 struct rte_memseg pages[IVSHMEM_MAX_PAGES];
257 struct rte_ivshmem_metadata_entry *entry;
258 struct memseg_cache_entry * c_entry, * prev_entry;
259 struct ivshmem_config * config;
260 unsigned i, j, mz_iter, ms_iter;
261 uint64_t biggest_len;
264 /* return error if we try to use an unknown config file */
265 config = get_config_by_name(metadata->name);
266 if (config == NULL) {
267 RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", metadata->name);
271 memset(pages, 0, sizeof(pages));
273 e_local = malloc(sizeof(config->metadata->entry));
276 ms_local = malloc(sizeof(config->memseg_cache));
277 if (ms_local == NULL)
281 /* make local copies before doing anything */
282 memcpy(e_local, config->metadata->entry, sizeof(config->metadata->entry));
283 memcpy(ms_local, config->memseg_cache, sizeof(config->memseg_cache));
285 qsort(e_local, RTE_DIM(config->metadata->entry), sizeof(struct rte_ivshmem_metadata_entry),
288 /* first pass - collect all huge pages */
289 for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
291 entry = &e_local[mz_iter];
293 uint64_t start_addr = RTE_ALIGN_FLOOR(entry->mz.addr_64,
294 entry->mz.hugepage_sz);
295 uint64_t offset = entry->mz.addr_64 - start_addr;
296 uint64_t len = RTE_ALIGN_CEIL(entry->mz.len + offset,
297 entry->mz.hugepage_sz);
299 if (entry->mz.addr_64 == 0 || start_addr == 0 || len == 0)
304 /* find first unused page - mz are phys_addr sorted so we don't have to
305 * look out for holes */
306 for (i = 0; i < RTE_DIM(pages); i++) {
308 /* skip if we already have this page */
309 if (pages[i].addr_64 == start_addr) {
310 start_addr += entry->mz.hugepage_sz;
311 len -= entry->mz.hugepage_sz;
314 /* we found a new page */
315 else if (pages[i].addr_64 == 0) {
320 if (i == RTE_DIM(pages)) {
321 RTE_LOG(ERR, EAL, "Cannot find unused page!\n");
325 /* populate however many pages the memzone has */
326 for (i = start_page; i < RTE_DIM(pages) && len != 0; i++) {
328 pages[i].addr_64 = start_addr;
329 pages[i].len = entry->mz.hugepage_sz;
330 start_addr += entry->mz.hugepage_sz;
331 len -= entry->mz.hugepage_sz;
333 /* if there's still length left */
335 RTE_LOG(ERR, EAL, "Not enough space for pages!\n");
340 /* second pass - sort pages by size */
341 for (i = 0; i < RTE_DIM(pages); i++) {
343 if (pages[i].addr == NULL)
350 * browse all entries starting at 'i', and find the
351 * entry with the smallest addr
353 for (j=i; j< RTE_DIM(pages); j++) {
354 if (pages[j].addr == NULL)
356 if (biggest_len == 0 ||
357 pages[j].len > biggest_len) {
358 biggest_len = pages[j].len;
363 /* should not happen */
364 if (biggest_idx == -1) {
365 RTE_LOG(ERR, EAL, "Error sorting by size!\n");
368 if (i != (unsigned) biggest_idx) {
369 struct rte_memseg tmp;
371 memcpy(&tmp, &pages[biggest_idx], sizeof(struct rte_memseg));
373 /* we don't want to break contiguousness, so instead of just
374 * swapping segments, we move all the preceding segments to the
375 * right and then put the old segment @ biggest_idx in place of
377 for (j = biggest_idx - 1; j >= i; j--) {
378 memcpy(&pages[j+1], &pages[j], sizeof(struct rte_memseg));
379 memset(&pages[j], 0, sizeof(struct rte_memseg));
382 /* put old biggest segment to its new place */
383 memcpy(&pages[i], &tmp, sizeof(struct rte_memseg));
387 /* third pass - write correct offsets */
388 for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
392 entry = &e_local[mz_iter];
394 if (entry->mz.addr_64 == 0)
397 /* find page for current memzone */
398 for (i = 0; i < RTE_DIM(pages); i++) {
399 /* we found our page */
400 if (entry->mz.addr_64 >= pages[i].addr_64 &&
401 entry->mz.addr_64 < pages[i].addr_64 + pages[i].len) {
402 entry->offset = (entry->mz.addr_64 - pages[i].addr_64) +
406 offset += pages[i].len;
408 if (i == RTE_DIM(pages)) {
409 RTE_LOG(ERR, EAL, "Page not found!\n");
417 /* fourth pass - create proper memseg cache */
418 for (i = 0; i < RTE_DIM(pages) &&
419 ms_iter <= RTE_DIM(config->memseg_cache); i++) {
420 if (pages[i].addr_64 == 0)
424 if (ms_iter == RTE_DIM(pages)) {
425 RTE_LOG(ERR, EAL, "The universe has collapsed!\n");
429 c_entry = &ms_local[ms_iter];
430 c_entry->len = pages[i].len;
432 if (get_hugefile_by_virt_addr(pages[i].addr_64, c_entry) < 0)
435 /* if previous entry has the same filename and is contiguous,
436 * clear current entry and increase previous entry's length
438 if (prev_entry != NULL &&
439 strncmp(c_entry->filepath, prev_entry->filepath,
440 sizeof(c_entry->filepath)) == 0 &&
441 prev_entry->offset + prev_entry->len == c_entry->offset) {
442 prev_entry->len += pages[i].len;
443 memset(c_entry, 0, sizeof(struct memseg_cache_entry));
446 prev_entry = c_entry;
451 /* update current configuration with new valid data */
452 memcpy(config->metadata->entry, e_local, sizeof(config->metadata->entry));
453 memcpy(config->memseg_cache, ms_local, sizeof(config->memseg_cache));
468 add_memzone_to_metadata(const struct rte_memzone * mz,
469 struct ivshmem_config * config)
471 struct rte_ivshmem_metadata_entry * entry;
474 rte_spinlock_lock(&config->sl);
476 /* find free slot in this config */
477 for (i = 0; i < RTE_DIM(config->metadata->entry); i++) {
478 entry = &config->metadata->entry[i];
480 if (&entry->mz.addr_64 != 0 && overlap(mz, &entry->mz)) {
481 RTE_LOG(ERR, EAL, "Overlapping memzones!\n");
485 /* if addr is zero, the memzone is probably free */
486 if (entry->mz.addr_64 == 0) {
487 RTE_LOG(DEBUG, EAL, "Adding memzone '%s' at %p to metadata %s\n",
488 mz->name, mz->addr, config->metadata->name);
489 memcpy(&entry->mz, mz, sizeof(struct rte_memzone));
491 /* run config file parser */
492 if (build_config(config->metadata) < 0)
499 /* if we reached the maximum, that means we have no place in config */
500 if (i == RTE_DIM(config->metadata->entry)) {
501 RTE_LOG(ERR, EAL, "No space left in IVSHMEM metadata %s!\n",
502 config->metadata->name);
506 rte_spinlock_unlock(&config->sl);
509 rte_spinlock_unlock(&config->sl);
514 add_ring_to_metadata(const struct rte_ring * r,
515 struct ivshmem_config * config)
517 struct rte_memzone * mz;
519 mz = get_memzone_by_addr(r);
522 RTE_LOG(ERR, EAL, "Cannot find memzone for ring!\n");
526 return add_memzone_to_metadata(mz, config);
530 add_mempool_to_metadata(const struct rte_mempool * mp,
531 struct ivshmem_config * config)
533 struct rte_memzone * mz;
536 mz = get_memzone_by_addr(mp);
540 RTE_LOG(ERR, EAL, "Cannot find memzone for mempool!\n");
544 /* mempool consists of memzone and ring */
545 ret = add_memzone_to_metadata(mz, config);
549 return add_ring_to_metadata(mp->ring, config);
553 rte_ivshmem_metadata_add_ring(const struct rte_ring * r, const char * name)
555 struct ivshmem_config * config;
557 if (name == NULL || r == NULL)
560 config = get_config_by_name(name);
562 if (config == NULL) {
563 RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
567 return add_ring_to_metadata(r, config);
571 rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz, const char * name)
573 struct ivshmem_config * config;
575 if (name == NULL || mz == NULL)
578 config = get_config_by_name(name);
580 if (config == NULL) {
581 RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
585 return add_memzone_to_metadata(mz, config);
589 rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp, const char * name)
591 struct ivshmem_config * config;
593 if (name == NULL || mp == NULL)
596 config = get_config_by_name(name);
598 if (config == NULL) {
599 RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
603 return add_mempool_to_metadata(mp, config);
607 ivshmem_config_path(char *buffer, size_t bufflen, const char *name)
609 snprintf(buffer, bufflen, IVSHMEM_CONFIG_FILE_FMT, name);
615 void *ivshmem_metadata_create(const char *name, size_t size,
620 char pathname[PATH_MAX];
622 ivshmem_config_path(pathname, sizeof(pathname), name);
624 fd = open(pathname, O_RDWR | O_CREAT, 0660);
626 RTE_LOG(ERR, EAL, "Cannot open '%s'\n", pathname);
630 size = METADATA_SIZE_ALIGNED;
632 retval = fcntl(fd, F_SETLK, lock);
635 RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another "
636 "process using it?\n", pathname);
640 retval = ftruncate(fd, size);
643 RTE_LOG(ERR, EAL, "Cannot resize '%s'\n", pathname);
647 metadata_addr = mmap(NULL, size,
648 PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
650 if (metadata_addr == MAP_FAILED){
651 RTE_LOG(ERR, EAL, "Cannot mmap memory for '%s'\n", pathname);
653 /* we don't care if we can't unlock */
654 fcntl(fd, F_UNLCK, lock);
660 return metadata_addr;
663 int rte_ivshmem_metadata_create(const char *name)
665 struct ivshmem_config * ivshmem_config;
669 pagesz = getpagesize();
674 rte_spinlock_lock(&global_cfg_sl);
676 for (index = 0; index < RTE_DIM(ivshmem_global_config); index++) {
677 if (ivshmem_global_config[index].metadata == NULL) {
678 ivshmem_config = &ivshmem_global_config[index];
683 if (index == RTE_DIM(ivshmem_global_config)) {
684 RTE_LOG(ERR, EAL, "Cannot create more ivshmem config files. "
685 "Maximum has been reached\n");
686 rte_spinlock_unlock(&global_cfg_sl);
690 ivshmem_config->lock.l_type = F_WRLCK;
691 ivshmem_config->lock.l_whence = SEEK_SET;
693 ivshmem_config->lock.l_start = 0;
694 ivshmem_config->lock.l_len = METADATA_SIZE_ALIGNED;
696 ivshmem_global_config[index].metadata = ((struct rte_ivshmem_metadata *)
697 ivshmem_metadata_create(
699 sizeof(struct rte_ivshmem_metadata),
700 &ivshmem_config->lock));
702 if (ivshmem_global_config[index].metadata == NULL) {
703 rte_spinlock_unlock(&global_cfg_sl);
708 memset(ivshmem_config->metadata, 0, sizeof(struct rte_ivshmem_metadata));
709 ivshmem_config->metadata->magic_number = IVSHMEM_MAGIC;
710 snprintf(ivshmem_config->metadata->name,
711 sizeof(ivshmem_config->metadata->name), "%s", name);
713 rte_spinlock_unlock(&global_cfg_sl);
719 rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, const char *name)
721 const struct memseg_cache_entry * ms_cache, *entry;
722 struct ivshmem_config * config;
723 char cmdline[IVSHMEM_QEMU_CMDLINE_BUFSIZE], *cmdline_ptr;
724 char cfg_file_path[PATH_MAX];
725 unsigned remaining_len, tmplen, iter;
726 uint64_t shared_mem_size, zero_size, total_size;
728 if (buffer == NULL || name == NULL)
731 config = get_config_by_name(name);
733 if (config == NULL) {
734 RTE_LOG(ERR, EAL, "Config %s not found!\n", name);
738 rte_spinlock_lock(&config->sl);
740 /* prepare metadata file path */
741 snprintf(cfg_file_path, sizeof(cfg_file_path), IVSHMEM_CONFIG_FILE_FMT,
742 config->metadata->name);
744 ms_cache = config->memseg_cache;
746 cmdline_ptr = cmdline;
747 remaining_len = sizeof(cmdline);
752 while ((ms_cache[iter].len != 0) && (iter < RTE_DIM(config->metadata->entry))) {
754 entry = &ms_cache[iter];
756 /* Offset and sizes within the current pathname */
757 tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
758 entry->filepath, entry->offset, entry->len);
760 shared_mem_size += entry->len;
762 cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
763 remaining_len -= tmplen;
765 if (remaining_len == 0) {
766 RTE_LOG(ERR, EAL, "Command line too long!\n");
767 rte_spinlock_unlock(&config->sl);
774 total_size = rte_align64pow2(shared_mem_size + METADATA_SIZE_ALIGNED);
775 zero_size = total_size - shared_mem_size - METADATA_SIZE_ALIGNED;
777 /* add /dev/zero to command-line to fill the space */
778 tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
783 cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
784 remaining_len -= tmplen;
786 if (remaining_len == 0) {
787 RTE_LOG(ERR, EAL, "Command line too long!\n");
788 rte_spinlock_unlock(&config->sl);
792 /* add metadata file to the end of command-line */
793 tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
796 METADATA_SIZE_ALIGNED);
798 cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
799 remaining_len -= tmplen;
801 if (remaining_len == 0) {
802 RTE_LOG(ERR, EAL, "Command line too long!\n");
803 rte_spinlock_unlock(&config->sl);
807 /* if current length of the command line is bigger than the buffer supplied
808 * by the user, or if command-line is bigger than what IVSHMEM accepts */
809 if ((sizeof(cmdline) - remaining_len) > size) {
810 RTE_LOG(ERR, EAL, "Buffer is too short!\n");
811 rte_spinlock_unlock(&config->sl);
814 /* complete the command-line */
815 snprintf(buffer, size,
816 IVSHMEM_QEMU_CMD_LINE_HEADER_FMT,
820 rte_spinlock_unlock(&config->sl);
826 rte_ivshmem_metadata_dump(FILE *f, const char *name)
829 struct ivshmem_config * config;
830 struct rte_ivshmem_metadata_entry *entry;
831 #ifdef RTE_LIBRTE_IVSHMEM_DEBUG
833 uint64_t end, hugepage_sz;
834 struct memseg_cache_entry e;
840 /* return error if we try to use an unknown config file */
841 config = get_config_by_name(name);
842 if (config == NULL) {
843 RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
847 rte_spinlock_lock(&config->sl);
849 entry = &config->metadata->entry[0];
851 while (entry->mz.addr != NULL && i < RTE_DIM(config->metadata->entry)) {
853 fprintf(f, "Entry %u: name:<%-20s>, phys:0x%-15lx, len:0x%-15lx, "
854 "virt:%-15p, off:0x%-15lx\n",
863 #ifdef RTE_LIBRTE_IVSHMEM_DEBUG
864 fprintf(f, "\tHugepage files:\n");
866 hugepage_sz = entry->mz.hugepage_sz;
867 addr = RTE_ALIGN_FLOOR(entry->mz.addr_64, hugepage_sz);
868 end = addr + RTE_ALIGN_CEIL(entry->mz.len + (entry->mz.addr_64 - addr),
871 for (; addr < end; addr += hugepage_sz) {
872 memset(&e, 0, sizeof(e));
874 get_hugefile_by_virt_addr(addr, &e);
876 fprintf(f, "\t0x%"PRIx64 "-0x%" PRIx64 " offset: 0x%" PRIx64 " %s\n",
877 addr, addr + hugepage_sz, e.offset, e.filepath);
883 rte_spinlock_unlock(&config->sl);