1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2017-2018 Intel Corporation
13 #include <rte_common.h>
15 #include <rte_errno.h>
16 #include <rte_spinlock.h>
17 #include <rte_tailq.h>
19 #include "eal_filesystem.h"
20 #include "eal_private.h"
22 #include "rte_fbarray.h"
24 #define MASK_SHIFT 6ULL
25 #define MASK_ALIGN (1ULL << MASK_SHIFT)
26 #define MASK_LEN_TO_IDX(x) ((x) >> MASK_SHIFT)
27 #define MASK_LEN_TO_MOD(x) ((x) - RTE_ALIGN_FLOOR(x, MASK_ALIGN))
28 #define MASK_GET_IDX(idx, mod) ((idx << MASK_SHIFT) + mod)
31 * This is a mask that is always stored at the end of array, to provide fast
32 * way of finding free/used spots without looping through each element.
41 calc_mask_size(unsigned int len)
43 /* mask must be multiple of MASK_ALIGN, even though length of array
44 * itself may not be aligned on that boundary.
46 len = RTE_ALIGN_CEIL(len, MASK_ALIGN);
47 return sizeof(struct used_mask) +
48 sizeof(uint64_t) * MASK_LEN_TO_IDX(len);
52 calc_data_size(size_t page_sz, unsigned int elt_sz, unsigned int len)
54 size_t data_sz = elt_sz * len;
55 size_t msk_sz = calc_mask_size(len);
56 return RTE_ALIGN_CEIL(data_sz + msk_sz, page_sz);
59 static struct used_mask *
60 get_used_mask(void *data, unsigned int elt_sz, unsigned int len)
62 return (struct used_mask *) RTE_PTR_ADD(data, elt_sz * len);
66 resize_and_map(int fd, void *addr, size_t len)
71 if (ftruncate(fd, len)) {
72 RTE_LOG(ERR, EAL, "Cannot truncate %s\n", path);
73 /* pass errno up the chain */
78 map_addr = mmap(addr, len, PROT_READ | PROT_WRITE,
79 MAP_SHARED | MAP_FIXED, fd, 0);
80 if (map_addr != addr) {
81 RTE_LOG(ERR, EAL, "mmap() failed: %s\n", strerror(errno));
82 /* pass errno up the chain */
90 find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
93 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
95 unsigned int msk_idx, lookahead_idx, first, first_mod;
96 unsigned int last, last_mod;
97 uint64_t last_msk, ignore_msk;
100 * mask only has granularity of MASK_ALIGN, but start may not be aligned
101 * on that boundary, so construct a special mask to exclude anything we
102 * don't want to see to avoid confusing ctz.
104 first = MASK_LEN_TO_IDX(start);
105 first_mod = MASK_LEN_TO_MOD(start);
106 ignore_msk = ~((1ULL << first_mod) - 1);
108 /* array length may not be aligned, so calculate ignore mask for last
111 last = MASK_LEN_TO_IDX(arr->len);
112 last_mod = MASK_LEN_TO_MOD(arr->len);
113 last_msk = ~(-1ULL << last_mod);
115 for (msk_idx = first; msk_idx < msk->n_masks; msk_idx++) {
116 uint64_t cur_msk, lookahead_msk;
117 unsigned int run_start, clz, left;
120 * The process of getting n consecutive bits for arbitrary n is
121 * a bit involved, but here it is in a nutshell:
123 * 1. let n be the number of consecutive bits we're looking for
124 * 2. check if n can fit in one mask, and if so, do n-1
125 * rshift-ands to see if there is an appropriate run inside
127 * 2a. if we found a run, bail out early
128 * 2b. if we didn't find a run, proceed
129 * 3. invert the mask and count leading zeroes (that is, count
130 * how many consecutive set bits we had starting from the
131 * end of current mask) as k
132 * 3a. if k is 0, continue to next mask
133 * 3b. if k is not 0, we have a potential run
134 * 4. to satisfy our requirements, next mask must have n-k
135 * consecutive set bits right at the start, so we will do
136 * (n-k-1) rshift-ands and check if first bit is set.
138 * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
139 * we either run out of masks, lose the run, or find what we
142 cur_msk = msk->data[msk_idx];
145 /* if we're looking for free spaces, invert the mask */
149 /* combine current ignore mask with last index ignore mask */
151 ignore_msk |= last_msk;
153 /* if we have an ignore mask, ignore once */
155 cur_msk &= ignore_msk;
159 /* if n can fit in within a single mask, do a search */
160 if (n <= MASK_ALIGN) {
161 uint64_t tmp_msk = cur_msk;
163 for (s_idx = 0; s_idx < n - 1; s_idx++)
164 tmp_msk &= tmp_msk >> 1ULL;
165 /* we found what we were looking for */
167 run_start = __builtin_ctzll(tmp_msk);
168 return MASK_GET_IDX(msk_idx, run_start);
173 * we didn't find our run within the mask, or n > MASK_ALIGN,
174 * so we're going for plan B.
177 /* count leading zeroes on inverted mask */
179 clz = sizeof(cur_msk) * 8;
181 clz = __builtin_clzll(~cur_msk);
183 /* if there aren't any runs at the end either, just continue */
187 /* we have a partial run at the end, so try looking ahead */
188 run_start = MASK_ALIGN - clz;
191 for (lookahead_idx = msk_idx + 1; lookahead_idx < msk->n_masks;
193 unsigned int s_idx, need;
194 lookahead_msk = msk->data[lookahead_idx];
196 /* if we're looking for free space, invert the mask */
198 lookahead_msk = ~lookahead_msk;
200 /* figure out how many consecutive bits we need here */
201 need = RTE_MIN(left, MASK_ALIGN);
203 for (s_idx = 0; s_idx < need - 1; s_idx++)
204 lookahead_msk &= lookahead_msk >> 1ULL;
206 /* if first bit is not set, we've lost the run */
207 if ((lookahead_msk & 1) == 0) {
209 * we've scanned this far, so we know there are
210 * no runs in the space we've lookahead-scanned
211 * as well, so skip that on next iteration.
213 ignore_msk = ~((1ULL << need) - 1);
214 msk_idx = lookahead_idx;
220 /* check if we've found what we were looking for */
227 /* we didn't find anything, so continue */
231 return MASK_GET_IDX(msk_idx, run_start);
233 /* we didn't find anything */
234 rte_errno = used ? ENOENT : ENOSPC;
239 find_next(const struct rte_fbarray *arr, unsigned int start, bool used)
241 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
243 unsigned int idx, first, first_mod;
244 unsigned int last, last_mod;
245 uint64_t last_msk, ignore_msk;
248 * mask only has granularity of MASK_ALIGN, but start may not be aligned
249 * on that boundary, so construct a special mask to exclude anything we
250 * don't want to see to avoid confusing ctz.
252 first = MASK_LEN_TO_IDX(start);
253 first_mod = MASK_LEN_TO_MOD(start);
254 ignore_msk = ~((1ULL << first_mod) - 1ULL);
256 /* array length may not be aligned, so calculate ignore mask for last
259 last = MASK_LEN_TO_IDX(arr->len);
260 last_mod = MASK_LEN_TO_MOD(arr->len);
261 last_msk = ~(-(1ULL) << last_mod);
263 for (idx = first; idx < msk->n_masks; idx++) {
264 uint64_t cur = msk->data[idx];
267 /* if we're looking for free entries, invert mask */
274 /* ignore everything before start on first iteration */
278 /* check if we have any entries */
283 * find first set bit - that will correspond to whatever it is
284 * that we're looking for.
286 found = __builtin_ctzll(cur);
287 return MASK_GET_IDX(idx, found);
289 /* we didn't find anything */
290 rte_errno = used ? ENOENT : ENOSPC;
295 find_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
297 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
299 unsigned int idx, first, first_mod;
300 unsigned int last, last_mod;
302 unsigned int need_len, result = 0;
304 /* array length may not be aligned, so calculate ignore mask for last
307 last = MASK_LEN_TO_IDX(arr->len);
308 last_mod = MASK_LEN_TO_MOD(arr->len);
309 last_msk = ~(-(1ULL) << last_mod);
311 first = MASK_LEN_TO_IDX(start);
312 first_mod = MASK_LEN_TO_MOD(start);
313 for (idx = first; idx < msk->n_masks; idx++, result += need_len) {
314 uint64_t cur = msk->data[idx];
315 unsigned int run_len;
317 need_len = MASK_ALIGN;
319 /* if we're looking for free entries, invert mask */
323 /* if this is last mask, ignore everything after last bit */
327 /* ignore everything before start on first iteration */
330 /* at the start, we don't need the full mask len */
331 need_len -= first_mod;
334 /* we will be looking for zeroes, so invert the mask */
337 /* if mask is zero, we have a complete run */
342 * see if current run ends before mask end.
344 run_len = __builtin_ctzll(cur);
346 /* add however many zeroes we've had in the last run and quit */
347 if (run_len < need_len) {
356 find_prev(const struct rte_fbarray *arr, unsigned int start, bool used)
358 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
360 unsigned int idx, first, first_mod;
364 * mask only has granularity of MASK_ALIGN, but start may not be aligned
365 * on that boundary, so construct a special mask to exclude anything we
366 * don't want to see to avoid confusing clz.
368 first = MASK_LEN_TO_IDX(start);
369 first_mod = MASK_LEN_TO_MOD(start);
370 /* we're going backwards, so mask must start from the top */
371 ignore_msk = first_mod == MASK_ALIGN - 1 ?
372 -1ULL : /* prevent overflow */
373 ~(-1ULL << (first_mod + 1));
375 /* go backwards, include zero */
378 uint64_t cur = msk->data[idx];
381 /* if we're looking for free entries, invert mask */
385 /* ignore everything before start on first iteration */
389 /* check if we have any entries */
394 * find last set bit - that will correspond to whatever it is
395 * that we're looking for. we're counting trailing zeroes, thus
396 * the value we get is counted from end of mask, so calculate
397 * position from start of mask.
399 found = MASK_ALIGN - __builtin_clzll(cur) - 1;
401 return MASK_GET_IDX(idx, found);
402 } while (idx-- != 0); /* decrement after check to include zero*/
404 /* we didn't find anything */
405 rte_errno = used ? ENOENT : ENOSPC;
410 set_used(struct rte_fbarray *arr, unsigned int idx, bool used)
412 struct used_mask *msk;
413 uint64_t msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
414 unsigned int msk_idx = MASK_LEN_TO_IDX(idx);
418 if (arr == NULL || idx >= arr->len) {
422 msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
425 /* prevent array from changing under us */
426 rte_rwlock_write_lock(&arr->rwlock);
428 already_used = (msk->data[msk_idx] & msk_bit) != 0;
430 /* nothing to be done */
431 if (used == already_used)
435 msk->data[msk_idx] |= msk_bit;
438 msk->data[msk_idx] &= ~msk_bit;
442 rte_rwlock_write_unlock(&arr->rwlock);
448 fully_validate(const char *name, unsigned int elt_sz, unsigned int len)
450 if (name == NULL || elt_sz == 0 || len == 0 || len > INT_MAX) {
455 if (strnlen(name, RTE_FBARRAY_NAME_LEN) == RTE_FBARRAY_NAME_LEN) {
456 rte_errno = ENAMETOOLONG;
462 int __rte_experimental
463 rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
466 size_t page_sz, mmap_len;
468 struct used_mask *msk;
477 if (fully_validate(name, elt_sz, len))
480 page_sz = sysconf(_SC_PAGESIZE);
481 if (page_sz == (size_t)-1)
484 /* calculate our memory limits */
485 mmap_len = calc_data_size(page_sz, elt_sz, len);
487 data = eal_get_virtual_area(NULL, &mmap_len, page_sz, 0, 0);
491 eal_get_fbarray_path(path, sizeof(path), name);
494 * Each fbarray is unique to process namespace, i.e. the filename
495 * depends on process prefix. Try to take out a lock and see if we
496 * succeed. If we don't, someone else is using it already.
498 fd = open(path, O_CREAT | O_RDWR, 0600);
500 RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n", __func__,
501 path, strerror(errno));
504 } else if (flock(fd, LOCK_EX | LOCK_NB)) {
505 RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n", __func__,
506 path, strerror(errno));
511 /* take out a non-exclusive lock, so that other processes could still
512 * attach to it, but no other process could reinitialize it.
514 if (flock(fd, LOCK_SH | LOCK_NB)) {
519 if (resize_and_map(fd, data, mmap_len))
522 /* we've mmap'ed the file, we can now close the fd */
525 /* initialize the data */
526 memset(data, 0, mmap_len);
528 /* populate data structure */
529 strlcpy(arr->name, name, sizeof(arr->name));
532 arr->elt_sz = elt_sz;
535 msk = get_used_mask(data, elt_sz, len);
536 msk->n_masks = MASK_LEN_TO_IDX(RTE_ALIGN_CEIL(len, MASK_ALIGN));
538 rte_rwlock_init(&arr->rwlock);
543 munmap(data, mmap_len);
549 int __rte_experimental
550 rte_fbarray_attach(struct rte_fbarray *arr)
552 size_t page_sz, mmap_len;
563 * we don't need to synchronize attach as two values we need (element
564 * size and array length) are constant for the duration of life of
565 * the array, so the parts we care about will not race.
568 if (fully_validate(arr->name, arr->elt_sz, arr->len))
571 page_sz = sysconf(_SC_PAGESIZE);
572 if (page_sz == (size_t)-1)
575 mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
577 data = eal_get_virtual_area(arr->data, &mmap_len, page_sz, 0, 0);
581 eal_get_fbarray_path(path, sizeof(path), arr->name);
583 fd = open(path, O_RDWR);
589 /* lock the file, to let others know we're using it */
590 if (flock(fd, LOCK_SH | LOCK_NB)) {
595 if (resize_and_map(fd, data, mmap_len))
605 munmap(data, mmap_len);
611 int __rte_experimental
612 rte_fbarray_detach(struct rte_fbarray *arr)
620 * we don't need to synchronize detach as two values we need (element
621 * size and total capacity) are constant for the duration of life of
622 * the array, so the parts we care about will not race. if the user is
623 * detaching while doing something else in the same process, we can't
624 * really do anything about it, things will blow up either way.
627 size_t page_sz = sysconf(_SC_PAGESIZE);
629 if (page_sz == (size_t)-1)
632 /* this may already be unmapped (e.g. repeated call from previously
633 * failed destroy(), but this is on user, we can't (easily) know if this
636 munmap(arr->data, calc_data_size(page_sz, arr->elt_sz, arr->len));
641 int __rte_experimental
642 rte_fbarray_destroy(struct rte_fbarray *arr)
647 ret = rte_fbarray_detach(arr);
651 /* try deleting the file */
652 eal_get_fbarray_path(path, sizeof(path), arr->name);
654 fd = open(path, O_RDONLY);
656 RTE_LOG(ERR, EAL, "Could not open fbarray file: %s\n",
660 if (flock(fd, LOCK_EX | LOCK_NB)) {
661 RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n");
667 memset(arr, 0, sizeof(*arr));
674 void * __rte_experimental
675 rte_fbarray_get(const struct rte_fbarray *arr, unsigned int idx)
683 if (idx >= arr->len) {
688 ret = RTE_PTR_ADD(arr->data, idx * arr->elt_sz);
693 int __rte_experimental
694 rte_fbarray_set_used(struct rte_fbarray *arr, unsigned int idx)
696 return set_used(arr, idx, true);
699 int __rte_experimental
700 rte_fbarray_set_free(struct rte_fbarray *arr, unsigned int idx)
702 return set_used(arr, idx, false);
705 int __rte_experimental
706 rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx)
708 struct used_mask *msk;
713 if (arr == NULL || idx >= arr->len) {
718 /* prevent array from changing under us */
719 rte_rwlock_read_lock(&arr->rwlock);
721 msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
722 msk_idx = MASK_LEN_TO_IDX(idx);
723 msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
725 ret = (msk->data[msk_idx] & msk_bit) != 0;
727 rte_rwlock_read_unlock(&arr->rwlock);
733 fbarray_find(struct rte_fbarray *arr, unsigned int start, bool next, bool used)
737 if (arr == NULL || start >= arr->len) {
742 /* prevent array from changing under us */
743 rte_rwlock_read_lock(&arr->rwlock);
745 /* cheap checks to prevent doing useless work */
747 if (arr->len == arr->count) {
751 if (arr->count == 0) {
756 if (arr->count == 0) {
760 if (arr->len == arr->count) {
766 ret = find_next(arr, start, used);
768 ret = find_prev(arr, start, used);
770 rte_rwlock_read_unlock(&arr->rwlock);
774 int __rte_experimental
775 rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
777 return fbarray_find(arr, start, true, false);
780 int __rte_experimental
781 rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start)
783 return fbarray_find(arr, start, true, true);
786 int __rte_experimental
787 rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start)
789 return fbarray_find(arr, start, false, false);
792 int __rte_experimental
793 rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start)
795 return fbarray_find(arr, start, false, true);
799 fbarray_find_n(struct rte_fbarray *arr, unsigned int start, unsigned int n,
804 if (arr == NULL || start >= arr->len || n > arr->len || n == 0) {
808 if (arr->len - start < n) {
809 rte_errno = used ? ENOENT : ENOSPC;
813 /* prevent array from changing under us */
814 rte_rwlock_read_lock(&arr->rwlock);
816 /* cheap checks to prevent doing useless work */
818 if (arr->len == arr->count || arr->len - arr->count < n) {
822 if (arr->count == 0) {
827 if (arr->count < n) {
831 if (arr->count == arr->len) {
837 ret = find_next_n(arr, start, n, used);
839 rte_rwlock_read_unlock(&arr->rwlock);
843 int __rte_experimental
844 rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start,
847 return fbarray_find_n(arr, start, n, false);
850 int __rte_experimental
851 rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start,
854 return fbarray_find_n(arr, start, n, true);
858 fbarray_find_contig(struct rte_fbarray *arr, unsigned int start, bool used)
862 if (arr == NULL || start >= arr->len) {
867 /* prevent array from changing under us */
868 rte_rwlock_read_lock(&arr->rwlock);
870 /* cheap checks to prevent doing useless work */
872 if (arr->count == 0) {
876 if (arr->len == arr->count) {
877 ret = arr->len - start;
881 if (arr->len == arr->count) {
885 if (arr->count == 0) {
886 ret = arr->len - start;
891 ret = find_contig(arr, start, false);
893 rte_rwlock_read_unlock(&arr->rwlock);
897 int __rte_experimental
898 rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
900 return fbarray_find_contig(arr, start, false);
903 int __rte_experimental
904 rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start)
906 return fbarray_find_contig(arr, start, true);
909 int __rte_experimental
910 rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt)
916 * no need to synchronize as it doesn't matter if underlying data
917 * changes - we're doing pointer arithmetic here.
920 if (arr == NULL || elt == NULL) {
924 end = RTE_PTR_ADD(arr->data, arr->elt_sz * arr->len);
925 if (elt < arr->data || elt >= end) {
930 ret = RTE_PTR_DIFF(elt, arr->data) / arr->elt_sz;
935 void __rte_experimental
936 rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f)
938 struct used_mask *msk;
941 if (arr == NULL || f == NULL) {
946 if (fully_validate(arr->name, arr->elt_sz, arr->len)) {
947 fprintf(f, "Invalid file-backed array\n");
951 /* prevent array from changing under us */
952 rte_rwlock_read_lock(&arr->rwlock);
954 fprintf(f, "File-backed array: %s\n", arr->name);
955 fprintf(f, "size: %i occupied: %i elt_sz: %i\n",
956 arr->len, arr->count, arr->elt_sz);
958 msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
960 for (i = 0; i < msk->n_masks; i++)
961 fprintf(f, "msk idx %i: 0x%016" PRIx64 "\n", i, msk->data[i]);
963 rte_rwlock_read_unlock(&arr->rwlock);