4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 #include <rte_eal_memconfig.h>
40 #include <rte_memory.h>
41 #include <rte_ivshmem.h>
42 #include <rte_string_fns.h>
43 #include <rte_common.h>
45 #include <rte_debug.h>
46 #include <rte_spinlock.h>
47 #include <rte_common.h>
48 #include <rte_malloc.h>
50 #include "rte_ivshmem.h"
52 #define IVSHMEM_CONFIG_FILE_FMT "/var/run/.dpdk_ivshmem_metadata_%s"
53 #define IVSHMEM_QEMU_CMD_LINE_HEADER_FMT "-device ivshmem,size=%" PRIu64 "M,shm=fd%s"
54 #define IVSHMEM_QEMU_CMD_FD_FMT ":%s:0x%" PRIx64 ":0x%" PRIx64
55 #define IVSHMEM_QEMU_CMDLINE_BUFSIZE 1024
56 #define IVSHMEM_MAX_PAGES (1 << 12)
57 #define adjacent(x,y) (((x).phys_addr+(x).len)==(y).phys_addr)
58 #define METADATA_SIZE_ALIGNED \
59 (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
61 #define GET_PAGEMAP_ADDR(in,addr,dlm,err) \
65 addr = strtoull((in), &end, 16); \
66 if (errno != 0 || *end != (dlm)) { \
67 RTE_LOG(ERR, EAL, err); \
75 struct memseg_cache_entry {
76 char filepath[PATH_MAX];
81 struct ivshmem_config {
82 struct rte_ivshmem_metadata * metadata;
83 struct memseg_cache_entry memseg_cache[IVSHMEM_MAX_PAGES];
84 /**< account for multiple files per segment case */
89 static struct ivshmem_config
90 ivshmem_global_config[RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES];
92 static rte_spinlock_t global_cfg_sl;
94 static struct ivshmem_config *
95 get_config_by_name(const char * name)
97 struct rte_ivshmem_metadata * config;
100 for (i = 0; i < RTE_DIM(ivshmem_global_config); i++) {
101 config = ivshmem_global_config[i].metadata;
104 if (strncmp(name, config->name, IVSHMEM_NAME_LEN) == 0)
105 return &ivshmem_global_config[i];
112 overlap(const struct rte_memzone * s1, const struct rte_memzone * s2)
114 uint64_t start1, end1, start2, end2;
116 start1 = s1->addr_64;
117 end1 = s1->addr_64 + s1->len;
118 start2 = s2->addr_64;
119 end2 = s2->addr_64 + s2->len;
121 if (start1 >= start2 && start1 < end2)
123 if (start2 >= start1 && start2 < end1)
129 static struct rte_memzone *
130 get_memzone_by_addr(const void * addr)
132 struct rte_memzone * tmp, * mz;
133 struct rte_mem_config * mcfg;
136 mcfg = rte_eal_get_configuration()->mem_config;
139 /* find memzone for the ring */
140 for (i = 0; i < RTE_MAX_MEMZONE; i++) {
141 tmp = &mcfg->memzone[i];
143 if (tmp->addr_64 == (uint64_t) addr) {
153 entry_compare(const void * a, const void * b)
155 const struct rte_ivshmem_metadata_entry * e1 =
156 (const struct rte_ivshmem_metadata_entry*) a;
157 const struct rte_ivshmem_metadata_entry * e2 =
158 (const struct rte_ivshmem_metadata_entry*) b;
160 /* move unallocated zones to the end */
161 if (e1->mz.addr == NULL && e2->mz.addr == NULL)
163 if (e1->mz.addr == 0)
165 if (e2->mz.addr == 0)
168 return e1->mz.phys_addr > e2->mz.phys_addr;
171 /* fills hugepage cache entry for a given start virt_addr */
173 get_hugefile_by_virt_addr(uint64_t virt_addr, struct memseg_cache_entry * e)
175 uint64_t start_addr, end_addr;
176 char *start,*path_end;
177 char buf[PATH_MAX*2];
184 memset(e->filepath, 0, sizeof(e->filepath));
186 /* open /proc/self/maps */
187 f = fopen("/proc/self/maps", "r");
189 RTE_LOG(ERR, EAL, "cannot open /proc/self/maps!\n");
194 while (fgets(buf, sizeof(buf), f) != NULL) {
196 /* get endptr to end of start addr */
199 GET_PAGEMAP_ADDR(start,start_addr,'-',
200 "Cannot find start address in maps!\n");
202 /* if start address is bigger than our address, skip */
203 if (start_addr > virt_addr)
206 GET_PAGEMAP_ADDR(start,end_addr,' ',
207 "Cannot find end address in maps!\n");
209 /* if end address is less than our address, skip */
210 if (end_addr <= virt_addr)
213 /* find where the path starts */
214 start = strstr(start, "/");
219 /* at this point, we know that this is our map.
220 * now let's find the file */
221 path_end = strstr(start, "\n");
225 if (path_end == NULL) {
226 RTE_LOG(ERR, EAL, "Hugefile path not found!\n");
230 /* calculate offset and copy the file path */
231 rte_snprintf(e->filepath, RTE_PTR_DIFF(path_end, start) + 1, "%s", start);
233 e->offset = virt_addr - start_addr;
244 * This is a complex function. What it does is the following:
245 * 1. Goes through metadata and gets list of hugepages involved
246 * 2. Sorts the hugepages by size (1G first)
247 * 3. Goes through metadata again and writes correct offsets
248 * 4. Goes through pages and finds out their filenames, offsets etc.
251 build_config(struct rte_ivshmem_metadata * metadata)
253 struct rte_ivshmem_metadata_entry * e_local;
254 struct memseg_cache_entry * ms_local;
255 struct rte_memseg pages[IVSHMEM_MAX_PAGES];
256 struct rte_ivshmem_metadata_entry *entry;
257 struct memseg_cache_entry * c_entry, * prev_entry;
258 struct ivshmem_config * config;
259 unsigned i, j, mz_iter, ms_iter;
260 uint64_t biggest_len;
263 /* return error if we try to use an unknown config file */
264 config = get_config_by_name(metadata->name);
265 if (config == NULL) {
266 RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", metadata->name);
270 memset(pages, 0, sizeof(pages));
272 e_local = malloc(sizeof(config->metadata->entry));
275 ms_local = malloc(sizeof(config->memseg_cache));
276 if (ms_local == NULL)
280 /* make local copies before doing anything */
281 memcpy(e_local, config->metadata->entry, sizeof(config->metadata->entry));
282 memcpy(ms_local, config->memseg_cache, sizeof(config->memseg_cache));
284 qsort(e_local, RTE_DIM(config->metadata->entry), sizeof(struct rte_ivshmem_metadata_entry),
287 /* first pass - collect all huge pages */
288 for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
290 entry = &e_local[mz_iter];
292 uint64_t start_addr = RTE_ALIGN_FLOOR(entry->mz.addr_64,
293 entry->mz.hugepage_sz);
294 uint64_t offset = entry->mz.addr_64 - start_addr;
295 uint64_t len = RTE_ALIGN_CEIL(entry->mz.len + offset,
296 entry->mz.hugepage_sz);
298 if (entry->mz.addr_64 == 0 || start_addr == 0 || len == 0)
303 /* find first unused page - mz are phys_addr sorted so we don't have to
304 * look out for holes */
305 for (i = 0; i < RTE_DIM(pages); i++) {
307 /* skip if we already have this page */
308 if (pages[i].addr_64 == start_addr) {
309 start_addr += entry->mz.hugepage_sz;
310 len -= entry->mz.hugepage_sz;
313 /* we found a new page */
314 else if (pages[i].addr_64 == 0) {
319 if (i == RTE_DIM(pages)) {
320 RTE_LOG(ERR, EAL, "Cannot find unused page!\n");
324 /* populate however many pages the memzone has */
325 for (i = start_page; i < RTE_DIM(pages) && len != 0; i++) {
327 pages[i].addr_64 = start_addr;
328 pages[i].len = entry->mz.hugepage_sz;
329 start_addr += entry->mz.hugepage_sz;
330 len -= entry->mz.hugepage_sz;
332 /* if there's still length left */
334 RTE_LOG(ERR, EAL, "Not enough space for pages!\n");
339 /* second pass - sort pages by size */
340 for (i = 0; i < RTE_DIM(pages); i++) {
342 if (pages[i].addr == NULL)
349 * browse all entries starting at 'i', and find the
350 * entry with the smallest addr
352 for (j=i; j< RTE_DIM(pages); j++) {
353 if (pages[j].addr == NULL)
355 if (biggest_len == 0 ||
356 pages[j].len > biggest_len) {
357 biggest_len = pages[j].len;
362 /* should not happen */
363 if (biggest_idx == -1) {
364 RTE_LOG(ERR, EAL, "Error sorting by size!\n");
367 if (i != (unsigned) biggest_idx) {
368 struct rte_memseg tmp;
370 memcpy(&tmp, &pages[biggest_idx], sizeof(struct rte_memseg));
372 /* we don't want to break contiguousness, so instead of just
373 * swapping segments, we move all the preceding segments to the
374 * right and then put the old segment @ biggest_idx in place of
376 for (j = biggest_idx - 1; j >= i; j--) {
377 memcpy(&pages[j+1], &pages[j], sizeof(struct rte_memseg));
378 memset(&pages[j], 0, sizeof(struct rte_memseg));
381 /* put old biggest segment to its new place */
382 memcpy(&pages[i], &tmp, sizeof(struct rte_memseg));
386 /* third pass - write correct offsets */
387 for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
391 entry = &e_local[mz_iter];
393 if (entry->mz.addr_64 == 0)
396 /* find page for current memzone */
397 for (i = 0; i < RTE_DIM(pages); i++) {
398 /* we found our page */
399 if (entry->mz.addr_64 >= pages[i].addr_64 &&
400 entry->mz.addr_64 < pages[i].addr_64 + pages[i].len) {
401 entry->offset = (entry->mz.addr_64 - pages[i].addr_64) +
405 offset += pages[i].len;
407 if (i == RTE_DIM(pages)) {
408 RTE_LOG(ERR, EAL, "Page not found!\n");
416 /* fourth pass - create proper memseg cache */
417 for (i = 0; i < RTE_DIM(pages) &&
418 ms_iter <= RTE_DIM(config->memseg_cache); i++) {
419 if (pages[i].addr_64 == 0)
423 if (ms_iter == RTE_DIM(pages)) {
424 RTE_LOG(ERR, EAL, "The universe has collapsed!\n");
428 c_entry = &ms_local[ms_iter];
429 c_entry->len = pages[i].len;
431 if (get_hugefile_by_virt_addr(pages[i].addr_64, c_entry) < 0)
434 /* if previous entry has the same filename and is contiguous,
435 * clear current entry and increase previous entry's length
437 if (prev_entry != NULL &&
438 strncmp(c_entry->filepath, prev_entry->filepath,
439 sizeof(c_entry->filepath)) == 0 &&
440 prev_entry->offset + prev_entry->len == c_entry->offset) {
441 prev_entry->len += pages[i].len;
442 memset(c_entry, 0, sizeof(struct memseg_cache_entry));
445 prev_entry = c_entry;
450 /* update current configuration with new valid data */
451 memcpy(config->metadata->entry, e_local, sizeof(config->metadata->entry));
452 memcpy(config->memseg_cache, ms_local, sizeof(config->memseg_cache));
467 add_memzone_to_metadata(const struct rte_memzone * mz,
468 struct ivshmem_config * config)
470 struct rte_ivshmem_metadata_entry * entry;
473 rte_spinlock_lock(&config->sl);
475 /* find free slot in this config */
476 for (i = 0; i < RTE_DIM(config->metadata->entry); i++) {
477 entry = &config->metadata->entry[i];
479 if (&entry->mz.addr_64 != 0 && overlap(mz, &entry->mz)) {
480 RTE_LOG(ERR, EAL, "Overlapping memzones!\n");
484 /* if addr is zero, the memzone is probably free */
485 if (entry->mz.addr_64 == 0) {
486 RTE_LOG(DEBUG, EAL, "Adding memzone '%s' at %p to metadata %s\n",
487 mz->name, mz->addr, config->metadata->name);
488 memcpy(&entry->mz, mz, sizeof(struct rte_memzone));
490 /* run config file parser */
491 if (build_config(config->metadata) < 0)
498 /* if we reached the maximum, that means we have no place in config */
499 if (i == RTE_DIM(config->metadata->entry)) {
500 RTE_LOG(ERR, EAL, "No space left in IVSHMEM metadata %s!\n",
501 config->metadata->name);
505 rte_spinlock_unlock(&config->sl);
508 rte_spinlock_unlock(&config->sl);
513 add_ring_to_metadata(const struct rte_ring * r,
514 struct ivshmem_config * config)
516 struct rte_memzone * mz;
518 mz = get_memzone_by_addr(r);
521 RTE_LOG(ERR, EAL, "Cannot find memzone for ring!\n");
525 return add_memzone_to_metadata(mz, config);
529 add_mempool_to_metadata(const struct rte_mempool * mp,
530 struct ivshmem_config * config)
532 struct rte_memzone * mz;
535 mz = get_memzone_by_addr(mp);
539 RTE_LOG(ERR, EAL, "Cannot find memzone for mempool!\n");
543 /* mempool consists of memzone and ring */
544 ret = add_memzone_to_metadata(mz, config);
548 return add_ring_to_metadata(mp->ring, config);
552 rte_ivshmem_metadata_add_ring(const struct rte_ring * r, const char * name)
554 struct ivshmem_config * config;
556 if (name == NULL || r == NULL)
559 config = get_config_by_name(name);
561 if (config == NULL) {
562 RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
566 return add_ring_to_metadata(r, config);
570 rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz, const char * name)
572 struct ivshmem_config * config;
574 if (name == NULL || mz == NULL)
577 config = get_config_by_name(name);
579 if (config == NULL) {
580 RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
584 return add_memzone_to_metadata(mz, config);
588 rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp, const char * name)
590 struct ivshmem_config * config;
592 if (name == NULL || mp == NULL)
595 config = get_config_by_name(name);
597 if (config == NULL) {
598 RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
602 return add_mempool_to_metadata(mp, config);
606 ivshmem_config_path(char *buffer, size_t bufflen, const char *name)
608 rte_snprintf(buffer, bufflen, IVSHMEM_CONFIG_FILE_FMT, name);
614 void *ivshmem_metadata_create(const char *name, size_t size,
619 char pathname[PATH_MAX];
621 ivshmem_config_path(pathname, sizeof(pathname), name);
623 fd = open(pathname, O_RDWR | O_CREAT, 0660);
625 RTE_LOG(ERR, EAL, "Cannot open '%s'\n", pathname);
629 size = METADATA_SIZE_ALIGNED;
631 retval = fcntl(fd, F_SETLK, lock);
634 RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another "
635 "process using it?\n", pathname);
639 retval = ftruncate(fd, size);
642 RTE_LOG(ERR, EAL, "Cannot resize '%s'\n", pathname);
646 metadata_addr = mmap(NULL, size,
647 PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
649 if (metadata_addr == MAP_FAILED){
650 RTE_LOG(ERR, EAL, "Cannot mmap memory for '%s'\n", pathname);
652 /* we don't care if we can't unlock */
653 fcntl(fd, F_UNLCK, lock);
659 return metadata_addr;
662 int rte_ivshmem_metadata_create(const char *name)
664 struct ivshmem_config * ivshmem_config;
668 pagesz = getpagesize();
673 rte_spinlock_lock(&global_cfg_sl);
675 for (index = 0; index < RTE_DIM(ivshmem_global_config); index++) {
676 if (ivshmem_global_config[index].metadata == NULL) {
677 ivshmem_config = &ivshmem_global_config[index];
682 if (index == RTE_DIM(ivshmem_global_config)) {
683 RTE_LOG(ERR, EAL, "Cannot create more ivshmem config files. "
684 "Maximum has been reached\n");
685 rte_spinlock_unlock(&global_cfg_sl);
689 ivshmem_config->lock.l_type = F_WRLCK;
690 ivshmem_config->lock.l_whence = SEEK_SET;
692 ivshmem_config->lock.l_start = 0;
693 ivshmem_config->lock.l_len = METADATA_SIZE_ALIGNED;
695 ivshmem_global_config[index].metadata = ((struct rte_ivshmem_metadata *)
696 ivshmem_metadata_create(
698 sizeof(struct rte_ivshmem_metadata),
699 &ivshmem_config->lock));
701 if (ivshmem_global_config[index].metadata == NULL) {
702 rte_spinlock_unlock(&global_cfg_sl);
707 memset(ivshmem_config->metadata, 0, sizeof(struct rte_ivshmem_metadata));
708 ivshmem_config->metadata->magic_number = IVSHMEM_MAGIC;
709 rte_snprintf(ivshmem_config->metadata->name,
710 sizeof(ivshmem_config->metadata->name), "%s", name);
712 rte_spinlock_unlock(&global_cfg_sl);
718 rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, const char *name)
720 const struct memseg_cache_entry * ms_cache, *entry;
721 struct ivshmem_config * config;
722 char cmdline[IVSHMEM_QEMU_CMDLINE_BUFSIZE], *cmdline_ptr;
723 char cfg_file_path[PATH_MAX];
724 unsigned remaining_len, tmplen, iter;
725 uint64_t shared_mem_size, zero_size, total_size;
727 if (buffer == NULL || name == NULL)
730 config = get_config_by_name(name);
732 if (config == NULL) {
733 RTE_LOG(ERR, EAL, "Config %s not found!\n", name);
737 rte_spinlock_lock(&config->sl);
739 /* prepare metadata file path */
740 rte_snprintf(cfg_file_path, sizeof(cfg_file_path), IVSHMEM_CONFIG_FILE_FMT,
741 config->metadata->name);
743 ms_cache = config->memseg_cache;
745 cmdline_ptr = cmdline;
746 remaining_len = sizeof(cmdline);
751 while ((ms_cache[iter].len != 0) && (iter < RTE_DIM(config->metadata->entry))) {
753 entry = &ms_cache[iter];
755 /* Offset and sizes within the current pathname */
756 tmplen = rte_snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
757 entry->filepath, entry->offset, entry->len);
759 shared_mem_size += entry->len;
761 cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
762 remaining_len -= tmplen;
764 if (remaining_len == 0) {
765 RTE_LOG(ERR, EAL, "Command line too long!\n");
766 rte_spinlock_unlock(&config->sl);
773 total_size = rte_align64pow2(shared_mem_size + METADATA_SIZE_ALIGNED);
774 zero_size = total_size - shared_mem_size - METADATA_SIZE_ALIGNED;
776 /* add /dev/zero to command-line to fill the space */
777 tmplen = rte_snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
782 cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
783 remaining_len -= tmplen;
785 if (remaining_len == 0) {
786 RTE_LOG(ERR, EAL, "Command line too long!\n");
787 rte_spinlock_unlock(&config->sl);
791 /* add metadata file to the end of command-line */
792 tmplen = rte_snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
795 METADATA_SIZE_ALIGNED);
797 cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
798 remaining_len -= tmplen;
800 if (remaining_len == 0) {
801 RTE_LOG(ERR, EAL, "Command line too long!\n");
802 rte_spinlock_unlock(&config->sl);
806 /* if current length of the command line is bigger than the buffer supplied
807 * by the user, or if command-line is bigger than what IVSHMEM accepts */
808 if ((sizeof(cmdline) - remaining_len) > size) {
809 RTE_LOG(ERR, EAL, "Buffer is too short!\n");
810 rte_spinlock_unlock(&config->sl);
813 /* complete the command-line */
814 rte_snprintf(buffer, size,
815 IVSHMEM_QEMU_CMD_LINE_HEADER_FMT,
819 rte_spinlock_unlock(&config->sl);
826 rte_ivshmem_metadata_dump(const char *name)
829 struct ivshmem_config * config;
830 struct rte_ivshmem_metadata_entry *entry;
831 #ifdef RTE_LIBRTE_IVSHMEM_DEBUG
833 uint64_t end, hugepage_sz;
834 struct memseg_cache_entry e;
840 /* return error if we try to use an unknown config file */
841 config = get_config_by_name(name);
842 if (config == NULL) {
843 RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
847 rte_spinlock_lock(&config->sl);
849 entry = &config->metadata->entry[0];
851 while (entry->mz.addr != NULL && i < RTE_DIM(config->metadata->entry)) {
853 printf("Entry %u: name:<%-20s>, phys:0x%-15lx, len:0x%-15lx, "
854 "virt:%-15p, off:0x%-15lx\n",
863 #ifdef RTE_LIBRTE_IVSHMEM_DEBUG
864 printf("\tHugepage files:\n");
866 hugepage_sz = entry->mz.hugepage_sz;
867 addr = RTE_ALIGN_FLOOR(entry->mz.addr_64, hugepage_sz);
868 end = addr + RTE_ALIGN_CEIL(entry->mz.len + (entry->mz.addr_64 - addr),
871 for (; addr < end; addr += hugepage_sz) {
872 memset(&e, 0, sizeof(e));
874 get_hugefile_by_virt_addr(addr, &e);
876 printf("\t0x%"PRIx64 "-0x%" PRIx64 " offset: 0x%" PRIx64 " %s\n",
877 addr, addr + hugepage_sz, e.offset, e.filepath);
883 rte_spinlock_unlock(&config->sl);