1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2017-2018 Intel Corporation
12 #include <rte_common.h>
14 #include <rte_errno.h>
15 #include <rte_spinlock.h>
16 #include <rte_tailq.h>
18 #include "eal_filesystem.h"
19 #include "eal_private.h"
21 #include "rte_fbarray.h"
23 #define MASK_SHIFT 6ULL
24 #define MASK_ALIGN (1 << MASK_SHIFT)
25 #define MASK_LEN_TO_IDX(x) ((x) >> MASK_SHIFT)
26 #define MASK_LEN_TO_MOD(x) ((x) - RTE_ALIGN_FLOOR(x, MASK_ALIGN))
27 #define MASK_GET_IDX(idx, mod) ((idx << MASK_SHIFT) + mod)
30 * This is a mask that is always stored at the end of array, to provide fast
31 * way of finding free/used spots without looping through each element.
40 calc_mask_size(int len)
42 /* mask must be multiple of MASK_ALIGN, even though length of array
43 * itself may not be aligned on that boundary.
45 len = RTE_ALIGN_CEIL(len, MASK_ALIGN);
46 return sizeof(struct used_mask) +
47 sizeof(uint64_t) * MASK_LEN_TO_IDX(len);
51 calc_data_size(size_t page_sz, int elt_sz, int len)
53 size_t data_sz = elt_sz * len;
54 size_t msk_sz = calc_mask_size(len);
55 return RTE_ALIGN_CEIL(data_sz + msk_sz, page_sz);
58 static struct used_mask *
59 get_used_mask(void *data, int elt_sz, int len)
61 return (struct used_mask *) RTE_PTR_ADD(data, elt_sz * len);
65 resize_and_map(int fd, void *addr, size_t len)
70 if (ftruncate(fd, len)) {
71 RTE_LOG(ERR, EAL, "Cannot truncate %s\n", path);
72 /* pass errno up the chain */
77 map_addr = mmap(addr, len, PROT_READ | PROT_WRITE,
78 MAP_SHARED | MAP_FIXED, fd, 0);
79 if (map_addr != addr) {
80 RTE_LOG(ERR, EAL, "mmap() failed: %s\n", strerror(errno));
81 /* pass errno up the chain */
89 find_next_n(const struct rte_fbarray *arr, int start, int n, bool used)
91 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
93 int msk_idx, lookahead_idx, first, first_mod;
94 int last, last_mod, last_msk;
98 * mask only has granularity of MASK_ALIGN, but start may not be aligned
99 * on that boundary, so construct a special mask to exclude anything we
100 * don't want to see to avoid confusing ctz.
102 first = MASK_LEN_TO_IDX(start);
103 first_mod = MASK_LEN_TO_MOD(start);
104 ignore_msk = ~((1ULL << first_mod) - 1);
106 /* array length may not be aligned, so calculate ignore mask for last
109 last = MASK_LEN_TO_IDX(arr->len);
110 last_mod = MASK_LEN_TO_MOD(arr->len);
111 last_msk = ~(-(1ULL) << last_mod);
113 for (msk_idx = first; msk_idx < msk->n_masks; msk_idx++) {
114 uint64_t cur_msk, lookahead_msk;
115 int run_start, clz, left;
118 * The process of getting n consecutive bits for arbitrary n is
119 * a bit involved, but here it is in a nutshell:
121 * 1. let n be the number of consecutive bits we're looking for
122 * 2. check if n can fit in one mask, and if so, do n-1
123 * rshift-ands to see if there is an appropriate run inside
125 * 2a. if we found a run, bail out early
126 * 2b. if we didn't find a run, proceed
127 * 3. invert the mask and count leading zeroes (that is, count
128 * how many consecutive set bits we had starting from the
129 * end of current mask) as k
130 * 3a. if k is 0, continue to next mask
131 * 3b. if k is not 0, we have a potential run
132 * 4. to satisfy our requirements, next mask must have n-k
133 * consecutive set bits right at the start, so we will do
134 * (n-k-1) rshift-ands and check if first bit is set.
136 * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
137 * we either run out of masks, lose the run, or find what we
140 cur_msk = msk->data[msk_idx];
143 /* if we're looking for free spaces, invert the mask */
147 /* combine current ignore mask with last index ignore mask */
149 ignore_msk |= last_msk;
151 /* if we have an ignore mask, ignore once */
153 cur_msk &= ignore_msk;
157 /* if n can fit in within a single mask, do a search */
158 if (n <= MASK_ALIGN) {
159 uint64_t tmp_msk = cur_msk;
161 for (s_idx = 0; s_idx < n - 1; s_idx++)
162 tmp_msk &= tmp_msk >> 1ULL;
163 /* we found what we were looking for */
165 run_start = __builtin_ctzll(tmp_msk);
166 return MASK_GET_IDX(msk_idx, run_start);
171 * we didn't find our run within the mask, or n > MASK_ALIGN,
172 * so we're going for plan B.
175 /* count leading zeroes on inverted mask */
176 clz = __builtin_clzll(~cur_msk);
178 /* if there aren't any runs at the end either, just continue */
182 /* we have a partial run at the end, so try looking ahead */
183 run_start = MASK_ALIGN - clz;
186 for (lookahead_idx = msk_idx + 1; lookahead_idx < msk->n_masks;
189 lookahead_msk = msk->data[lookahead_idx];
191 /* if we're looking for free space, invert the mask */
193 lookahead_msk = ~lookahead_msk;
195 /* figure out how many consecutive bits we need here */
196 need = RTE_MIN(left, MASK_ALIGN);
198 for (s_idx = 0; s_idx < need - 1; s_idx++)
199 lookahead_msk &= lookahead_msk >> 1ULL;
201 /* if first bit is not set, we've lost the run */
202 if ((lookahead_msk & 1) == 0) {
204 * we've scanned this far, so we know there are
205 * no runs in the space we've lookahead-scanned
206 * as well, so skip that on next iteration.
208 ignore_msk = ~((1ULL << need) - 1);
209 msk_idx = lookahead_idx;
215 /* check if we've found what we were looking for */
222 /* we didn't find anything, so continue */
226 return MASK_GET_IDX(msk_idx, run_start);
228 /* we didn't find anything */
229 rte_errno = used ? -ENOENT : -ENOSPC;
234 find_next(const struct rte_fbarray *arr, int start, bool used)
236 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
238 int idx, first, first_mod;
239 int last, last_mod, last_msk;
243 * mask only has granularity of MASK_ALIGN, but start may not be aligned
244 * on that boundary, so construct a special mask to exclude anything we
245 * don't want to see to avoid confusing ctz.
247 first = MASK_LEN_TO_IDX(start);
248 first_mod = MASK_LEN_TO_MOD(start);
249 ignore_msk = ~((1ULL << first_mod) - 1ULL);
251 /* array length may not be aligned, so calculate ignore mask for last
254 last = MASK_LEN_TO_IDX(arr->len);
255 last_mod = MASK_LEN_TO_MOD(arr->len);
256 last_msk = ~(-(1ULL) << last_mod);
258 for (idx = first; idx < msk->n_masks; idx++) {
259 uint64_t cur = msk->data[idx];
262 /* if we're looking for free entries, invert mask */
269 /* ignore everything before start on first iteration */
273 /* check if we have any entries */
278 * find first set bit - that will correspond to whatever it is
279 * that we're looking for.
281 found = __builtin_ctzll(cur);
282 return MASK_GET_IDX(idx, found);
284 /* we didn't find anything */
285 rte_errno = used ? -ENOENT : -ENOSPC;
290 find_contig(const struct rte_fbarray *arr, int start, bool used)
292 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
294 int idx, first, first_mod;
295 int last, last_mod, last_msk;
296 int need_len, result = 0;
298 /* array length may not be aligned, so calculate ignore mask for last
301 last = MASK_LEN_TO_IDX(arr->len);
302 last_mod = MASK_LEN_TO_MOD(arr->len);
303 last_msk = ~(-(1ULL) << last_mod);
305 first = MASK_LEN_TO_IDX(start);
306 first_mod = MASK_LEN_TO_MOD(start);
307 for (idx = first; idx < msk->n_masks; idx++, result += need_len) {
308 uint64_t cur = msk->data[idx];
311 need_len = MASK_ALIGN;
313 /* if we're looking for free entries, invert mask */
317 /* if this is last mask, ignore everything after last bit */
321 /* ignore everything before start on first iteration */
324 /* at the start, we don't need the full mask len */
325 need_len -= first_mod;
328 /* we will be looking for zeroes, so invert the mask */
331 /* if mask is zero, we have a complete run */
336 * see if current run ends before mask end.
338 run_len = __builtin_ctzll(cur);
340 /* add however many zeroes we've had in the last run and quit */
341 if (run_len < need_len) {
350 set_used(struct rte_fbarray *arr, int idx, bool used)
352 struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
353 uint64_t msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
354 int msk_idx = MASK_LEN_TO_IDX(idx);
358 if (arr == NULL || idx < 0 || idx >= arr->len) {
364 /* prevent array from changing under us */
365 rte_rwlock_write_lock(&arr->rwlock);
367 already_used = (msk->data[msk_idx] & msk_bit) != 0;
369 /* nothing to be done */
370 if (used == already_used)
374 msk->data[msk_idx] |= msk_bit;
377 msk->data[msk_idx] &= ~msk_bit;
381 rte_rwlock_write_unlock(&arr->rwlock);
387 fully_validate(const char *name, unsigned int elt_sz, unsigned int len)
389 if (name == NULL || elt_sz == 0 || len == 0) {
394 if (strnlen(name, RTE_FBARRAY_NAME_LEN) == RTE_FBARRAY_NAME_LEN) {
395 rte_errno = ENAMETOOLONG;
401 int __rte_experimental
402 rte_fbarray_init(struct rte_fbarray *arr, const char *name, int len, int elt_sz)
404 size_t page_sz, mmap_len;
406 struct used_mask *msk;
415 if (fully_validate(name, elt_sz, len))
418 page_sz = sysconf(_SC_PAGESIZE);
420 /* calculate our memory limits */
421 mmap_len = calc_data_size(page_sz, elt_sz, len);
423 data = eal_get_virtual_area(NULL, &mmap_len, page_sz, 0, 0);
427 eal_get_fbarray_path(path, sizeof(path), name);
430 * Each fbarray is unique to process namespace, i.e. the filename
431 * depends on process prefix. Try to take out a lock and see if we
432 * succeed. If we don't, someone else is using it already.
434 fd = open(path, O_CREAT | O_RDWR, 0600);
436 RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n", __func__,
437 path, strerror(errno));
440 } else if (flock(fd, LOCK_EX | LOCK_NB)) {
441 RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n", __func__,
442 path, strerror(errno));
447 /* take out a non-exclusive lock, so that other processes could still
448 * attach to it, but no other process could reinitialize it.
450 if (flock(fd, LOCK_SH | LOCK_NB)) {
455 if (resize_and_map(fd, data, mmap_len))
458 /* we've mmap'ed the file, we can now close the fd */
461 /* initialize the data */
462 memset(data, 0, mmap_len);
464 /* populate data structure */
465 snprintf(arr->name, sizeof(arr->name), "%s", name);
468 arr->elt_sz = elt_sz;
471 msk = get_used_mask(data, elt_sz, len);
472 msk->n_masks = MASK_LEN_TO_IDX(RTE_ALIGN_CEIL(len, MASK_ALIGN));
474 rte_rwlock_init(&arr->rwlock);
479 munmap(data, mmap_len);
485 int __rte_experimental
486 rte_fbarray_attach(struct rte_fbarray *arr)
488 size_t page_sz, mmap_len;
499 * we don't need to synchronize attach as two values we need (element
500 * size and array length) are constant for the duration of life of
501 * the array, so the parts we care about will not race.
504 if (fully_validate(arr->name, arr->elt_sz, arr->len))
507 page_sz = sysconf(_SC_PAGESIZE);
509 mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
511 data = eal_get_virtual_area(arr->data, &mmap_len, page_sz, 0, 0);
515 eal_get_fbarray_path(path, sizeof(path), arr->name);
517 fd = open(path, O_RDWR);
523 /* lock the file, to let others know we're using it */
524 if (flock(fd, LOCK_SH | LOCK_NB)) {
529 if (resize_and_map(fd, data, mmap_len))
539 munmap(data, mmap_len);
545 int __rte_experimental
546 rte_fbarray_detach(struct rte_fbarray *arr)
554 * we don't need to synchronize detach as two values we need (element
555 * size and total capacity) are constant for the duration of life of
556 * the array, so the parts we care about will not race. if the user is
557 * detaching while doing something else in the same process, we can't
558 * really do anything about it, things will blow up either way.
561 size_t page_sz = sysconf(_SC_PAGESIZE);
563 /* this may already be unmapped (e.g. repeated call from previously
564 * failed destroy(), but this is on user, we can't (easily) know if this
567 munmap(arr->data, calc_data_size(page_sz, arr->elt_sz, arr->len));
572 int __rte_experimental
573 rte_fbarray_destroy(struct rte_fbarray *arr)
578 ret = rte_fbarray_detach(arr);
582 /* try deleting the file */
583 eal_get_fbarray_path(path, sizeof(path), arr->name);
585 fd = open(path, O_RDONLY);
586 if (flock(fd, LOCK_EX | LOCK_NB)) {
587 RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n");
593 memset(arr, 0, sizeof(*arr));
600 void * __rte_experimental
601 rte_fbarray_get(const struct rte_fbarray *arr, int idx)
604 if (arr == NULL || idx < 0) {
609 if (idx >= arr->len) {
614 ret = RTE_PTR_ADD(arr->data, idx * arr->elt_sz);
619 int __rte_experimental
620 rte_fbarray_set_used(struct rte_fbarray *arr, int idx)
622 return set_used(arr, idx, true);
625 int __rte_experimental
626 rte_fbarray_set_free(struct rte_fbarray *arr, int idx)
628 return set_used(arr, idx, false);
631 int __rte_experimental
632 rte_fbarray_is_used(struct rte_fbarray *arr, int idx)
634 struct used_mask *msk;
639 if (arr == NULL || idx < 0 || idx >= arr->len) {
644 /* prevent array from changing under us */
645 rte_rwlock_read_lock(&arr->rwlock);
647 msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
648 msk_idx = MASK_LEN_TO_IDX(idx);
649 msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
651 ret = (msk->data[msk_idx] & msk_bit) != 0;
653 rte_rwlock_read_unlock(&arr->rwlock);
658 int __rte_experimental
659 rte_fbarray_find_next_free(struct rte_fbarray *arr, int start)
663 if (arr == NULL || start < 0 || start >= arr->len) {
668 /* prevent array from changing under us */
669 rte_rwlock_read_lock(&arr->rwlock);
671 if (arr->len == arr->count) {
676 ret = find_next(arr, start, false);
678 rte_rwlock_read_unlock(&arr->rwlock);
682 int __rte_experimental
683 rte_fbarray_find_next_used(struct rte_fbarray *arr, int start)
687 if (arr == NULL || start < 0 || start >= arr->len) {
692 /* prevent array from changing under us */
693 rte_rwlock_read_lock(&arr->rwlock);
695 if (arr->count == 0) {
700 ret = find_next(arr, start, true);
702 rte_rwlock_read_unlock(&arr->rwlock);
706 int __rte_experimental
707 rte_fbarray_find_next_n_free(struct rte_fbarray *arr, int start, int n)
711 if (arr == NULL || start < 0 || start >= arr->len ||
712 n < 0 || n > arr->len) {
717 /* prevent array from changing under us */
718 rte_rwlock_read_lock(&arr->rwlock);
720 if (arr->len == arr->count || arr->len - arr->count < n) {
725 ret = find_next_n(arr, start, n, false);
727 rte_rwlock_read_unlock(&arr->rwlock);
731 int __rte_experimental
732 rte_fbarray_find_next_n_used(struct rte_fbarray *arr, int start, int n)
736 if (arr == NULL || start < 0 || start >= arr->len ||
737 n < 0 || n > arr->len) {
742 /* prevent array from changing under us */
743 rte_rwlock_read_lock(&arr->rwlock);
745 if (arr->count < n) {
750 ret = find_next_n(arr, start, n, true);
752 rte_rwlock_read_unlock(&arr->rwlock);
756 int __rte_experimental
757 rte_fbarray_find_contig_free(struct rte_fbarray *arr, int start)
761 if (arr == NULL || start < 0 || start >= arr->len) {
766 /* prevent array from changing under us */
767 rte_rwlock_read_lock(&arr->rwlock);
769 if (arr->len == arr->count) {
774 if (arr->count == 0) {
775 ret = arr->len - start;
779 ret = find_contig(arr, start, false);
781 rte_rwlock_read_unlock(&arr->rwlock);
785 int __rte_experimental
786 rte_fbarray_find_contig_used(struct rte_fbarray *arr, int start)
790 if (arr == NULL || start < 0 || start >= arr->len) {
795 /* prevent array from changing under us */
796 rte_rwlock_read_lock(&arr->rwlock);
798 ret = find_contig(arr, start, true);
800 rte_rwlock_read_unlock(&arr->rwlock);
804 int __rte_experimental
805 rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt)
811 * no need to synchronize as it doesn't matter if underlying data
812 * changes - we're doing pointer arithmetic here.
815 if (arr == NULL || elt == NULL) {
819 end = RTE_PTR_ADD(arr->data, arr->elt_sz * arr->len);
820 if (elt < arr->data || elt >= end) {
825 ret = RTE_PTR_DIFF(elt, arr->data) / arr->elt_sz;
830 void __rte_experimental
831 rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f)
833 struct used_mask *msk;
836 if (arr == NULL || f == NULL) {
841 if (fully_validate(arr->name, arr->elt_sz, arr->len)) {
842 fprintf(f, "Invalid file-backed array\n");
846 /* prevent array from changing under us */
847 rte_rwlock_read_lock(&arr->rwlock);
849 fprintf(f, "File-backed array: %s\n", arr->name);
850 fprintf(f, "size: %i occupied: %i elt_sz: %i\n",
851 arr->len, arr->count, arr->elt_sz);
853 msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
855 for (i = 0; i < msk->n_masks; i++)
856 fprintf(f, "msk idx %i: 0x%016" PRIx64 "\n", i, msk->data[i]);
858 rte_rwlock_read_unlock(&arr->rwlock);