From: Anatoly Burakov Date: Tue, 26 Feb 2019 17:13:11 +0000 (+0000) Subject: fbarray: add internal tailq for mapped areas X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=5b61c62cfd76;p=dpdk.git fbarray: add internal tailq for mapped areas Currently, there are numerous reliability issues with fbarray, such as: - There is no way to prevent attaching to overlapping memory areas - There is no way to prevent double-detach - Failed destroy leaves fbarray in an invalid state (fbarray itself is valid, but its backing memory area is already detached) In addition, on FreeBSD, doing mmap() on a file descriptor does not keep the lock, so we also need to store the fd in order to keep the lock. This patch improves upon fbarray to address both of these issues by adding an internal tailq to track allocated areas and their respective file descriptors. Signed-off-by: Anatoly Burakov --- diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c index ea0735cb81..819d097bfa 100644 --- a/lib/librte_eal/common/eal_common_fbarray.c +++ b/lib/librte_eal/common/eal_common_fbarray.c @@ -28,6 +28,22 @@ #define MASK_LEN_TO_MOD(x) ((x) - RTE_ALIGN_FLOOR(x, MASK_ALIGN)) #define MASK_GET_IDX(idx, mod) ((idx << MASK_SHIFT) + mod) +/* + * We use this to keep track of created/attached memory areas to prevent user + * errors in API usage. + */ +struct mem_area { + TAILQ_ENTRY(mem_area) next; + void *addr; + size_t len; + int fd; +}; +TAILQ_HEAD(mem_area_head, mem_area); +/* local per-process tailq */ +static struct mem_area_head mem_area_tailq = + TAILQ_HEAD_INITIALIZER(mem_area_tailq); +static rte_spinlock_t mem_area_lock = RTE_SPINLOCK_INITIALIZER; + /* * This is a mask that is always stored at the end of array, to provide fast * way of finding free/used spots without looping through each element. @@ -87,6 +103,22 @@ resize_and_map(int fd, void *addr, size_t len) return 0; } +static int +overlap(const struct mem_area *ma, const void *start, size_t len) +{ + const void *end = RTE_PTR_ADD(start, len); + const void *ma_start = ma->addr; + const void *ma_end = RTE_PTR_ADD(ma->addr, ma->len); + + /* start overlap? */ + if (start >= ma_start && start < ma_end) + return 1; + /* end overlap? */ + if (end >= ma_start && end < ma_end) + return 1; + return 0; +} + static int find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n, bool used) @@ -684,6 +716,7 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, size_t page_sz, mmap_len; char path[PATH_MAX]; struct used_mask *msk; + struct mem_area *ma = NULL; void *data = NULL; int fd = -1; @@ -695,6 +728,13 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, if (fully_validate(name, elt_sz, len)) return -1; + /* allocate mem area before doing anything */ + ma = malloc(sizeof(*ma)); + if (ma == NULL) { + rte_errno = ENOMEM; + return -1; + } + page_sz = sysconf(_SC_PAGESIZE); if (page_sz == (size_t)-1) goto fail; @@ -706,10 +746,14 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, if (data == NULL) goto fail; + rte_spinlock_lock(&mem_area_lock); + + fd = -1; + if (internal_config.no_shconf) { /* remap virtual area as writable */ void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, fd, 0); if (new_data == MAP_FAILED) { RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n", __func__, strerror(errno)); @@ -748,10 +792,13 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, if (resize_and_map(fd, data, mmap_len)) goto fail; - - /* we've mmap'ed the file, we can now close the fd */ - close(fd); } + ma->addr = data; + ma->len = mmap_len; + ma->fd = fd; + + /* do not close fd - keep it until detach/destroy */ + TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next); /* initialize the data */ memset(data, 0, mmap_len); @@ -768,18 +815,24 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, rte_rwlock_init(&arr->rwlock); + rte_spinlock_unlock(&mem_area_lock); + return 0; fail: if (data) munmap(data, mmap_len); if (fd >= 0) close(fd); + free(ma); + + rte_spinlock_unlock(&mem_area_lock); return -1; } int __rte_experimental rte_fbarray_attach(struct rte_fbarray *arr) { + struct mem_area *ma = NULL, *tmp = NULL; size_t page_sz, mmap_len; char path[PATH_MAX]; void *data = NULL; @@ -799,12 +852,30 @@ rte_fbarray_attach(struct rte_fbarray *arr) if (fully_validate(arr->name, arr->elt_sz, arr->len)) return -1; + ma = malloc(sizeof(*ma)); + if (ma == NULL) { + rte_errno = ENOMEM; + return -1; + } + page_sz = sysconf(_SC_PAGESIZE); if (page_sz == (size_t)-1) goto fail; mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len); + /* check the tailq - maybe user has already mapped this address space */ + rte_spinlock_lock(&mem_area_lock); + + TAILQ_FOREACH(tmp, &mem_area_tailq, next) { + if (overlap(tmp, arr->data, mmap_len)) { + rte_errno = EEXIST; + goto fail; + } + } + + /* we know this memory area is unique, so proceed */ + data = eal_get_virtual_area(arr->data, &mmap_len, page_sz, 0, 0); if (data == NULL) goto fail; @@ -826,7 +897,12 @@ rte_fbarray_attach(struct rte_fbarray *arr) if (resize_and_map(fd, data, mmap_len)) goto fail; - close(fd); + /* store our new memory area */ + ma->addr = data; + ma->fd = fd; /* keep fd until detach/destroy */ + ma->len = mmap_len; + + TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next); /* we're done */ @@ -836,12 +912,17 @@ fail: munmap(data, mmap_len); if (fd >= 0) close(fd); + free(ma); return -1; } int __rte_experimental rte_fbarray_detach(struct rte_fbarray *arr) { + struct mem_area *tmp = NULL; + size_t mmap_len; + int ret = -1; + if (arr == NULL) { rte_errno = EINVAL; return -1; @@ -860,49 +941,114 @@ rte_fbarray_detach(struct rte_fbarray *arr) if (page_sz == (size_t)-1) return -1; - /* this may already be unmapped (e.g. repeated call from previously - * failed destroy(), but this is on user, we can't (easily) know if this - * is still mapped. - */ - munmap(arr->data, calc_data_size(page_sz, arr->elt_sz, arr->len)); + mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len); - return 0; + /* does this area exist? */ + rte_spinlock_lock(&mem_area_lock); + + TAILQ_FOREACH(tmp, &mem_area_tailq, next) { + if (tmp->addr == arr->data && tmp->len == mmap_len) + break; + } + if (tmp == NULL) { + rte_errno = ENOENT; + ret = -1; + goto out; + } + + munmap(arr->data, mmap_len); + + /* area is unmapped, close fd and remove the tailq entry */ + if (tmp->fd >= 0) + close(tmp->fd); + TAILQ_REMOVE(&mem_area_tailq, tmp, next); + free(tmp); + + ret = 0; +out: + rte_spinlock_unlock(&mem_area_lock); + return ret; } int __rte_experimental rte_fbarray_destroy(struct rte_fbarray *arr) { + struct mem_area *tmp = NULL; + size_t mmap_len; int fd, ret; char path[PATH_MAX]; - ret = rte_fbarray_detach(arr); - if (ret) - return ret; + if (arr == NULL) { + rte_errno = EINVAL; + return -1; + } - /* with no shconf, there were never any files to begin with */ - if (internal_config.no_shconf) - return 0; + /* + * we don't need to synchronize detach as two values we need (element + * size and total capacity) are constant for the duration of life of + * the array, so the parts we care about will not race. if the user is + * detaching while doing something else in the same process, we can't + * really do anything about it, things will blow up either way. + */ - /* try deleting the file */ - eal_get_fbarray_path(path, sizeof(path), arr->name); + size_t page_sz = sysconf(_SC_PAGESIZE); - fd = open(path, O_RDONLY); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open fbarray file: %s\n", - strerror(errno)); + if (page_sz == (size_t)-1) return -1; + + mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len); + + /* does this area exist? */ + rte_spinlock_lock(&mem_area_lock); + + TAILQ_FOREACH(tmp, &mem_area_tailq, next) { + if (tmp->addr == arr->data && tmp->len == mmap_len) + break; } - if (flock(fd, LOCK_EX | LOCK_NB)) { - RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n"); - rte_errno = EBUSY; + if (tmp == NULL) { + rte_errno = ENOENT; ret = -1; - } else { - ret = 0; - unlink(path); - memset(arr, 0, sizeof(*arr)); + goto out; } - close(fd); + /* with no shconf, there were never any files to begin with */ + if (!internal_config.no_shconf) { + /* + * attempt to get an exclusive lock on the file, to ensure it + * has been detached by all other processes + */ + fd = tmp->fd; + if (flock(fd, LOCK_EX | LOCK_NB)) { + RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n"); + rte_errno = EBUSY; + ret = -1; + goto out; + } + + /* we're OK to destroy the file */ + eal_get_fbarray_path(path, sizeof(path), arr->name); + if (unlink(path)) { + RTE_LOG(DEBUG, EAL, "Cannot unlink fbarray: %s\n", + strerror(errno)); + rte_errno = errno; + /* + * we're still holding an exclusive lock, so drop it to + * shared. + */ + flock(fd, LOCK_SH | LOCK_NB); + + ret = -1; + goto out; + } + close(fd); + } + munmap(arr->data, mmap_len); + /* area is unmapped, remove the tailq entry */ + TAILQ_REMOVE(&mem_area_tailq, tmp, next); + free(tmp); + ret = 0; +out: + rte_spinlock_unlock(&mem_area_lock); return ret; }