1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2019 Mellanox Technologies, Ltd
5 #include <rte_malloc.h>
7 #include <mlx5_malloc.h>
9 #include "mlx5_utils.h"
12 /********************* MLX5 list ************************/
14 static struct mlx5_list_entry *
15 mlx5_list_default_create_cb(struct mlx5_list *list,
16 struct mlx5_list_entry *entry __rte_unused,
17 void *ctx __rte_unused)
19 return mlx5_malloc(MLX5_MEM_ZERO, list->entry_sz, 0, SOCKET_ID_ANY);
23 mlx5_list_default_remove_cb(struct mlx5_list *list __rte_unused,
24 struct mlx5_list_entry *entry)
30 mlx5_list_create(struct mlx5_list *list, const char *name,
31 uint32_t entry_size, void *ctx,
32 mlx5_list_create_cb cb_create,
33 mlx5_list_match_cb cb_match,
34 mlx5_list_remove_cb cb_remove)
37 if (!cb_match || (!cb_create ^ !cb_remove))
40 snprintf(list->name, sizeof(list->name), "%s", name);
41 list->entry_sz = entry_size;
43 list->cb_create = cb_create ? cb_create : mlx5_list_default_create_cb;
44 list->cb_match = cb_match;
45 list->cb_remove = cb_remove ? cb_remove : mlx5_list_default_remove_cb;
46 rte_rwlock_init(&list->lock);
47 DRV_LOG(DEBUG, "mlx5 list %s initialized.", list->name);
48 LIST_INIT(&list->head);
52 static struct mlx5_list_entry *
53 __list_lookup(struct mlx5_list *list, void *ctx, bool reuse)
55 struct mlx5_list_entry *entry;
57 LIST_FOREACH(entry, &list->head, next) {
58 if (list->cb_match(list, entry, ctx))
61 __atomic_add_fetch(&entry->ref_cnt, 1,
63 DRV_LOG(DEBUG, "mlx5 list %s entry %p ref++: %u.",
64 list->name, (void *)entry, entry->ref_cnt);
71 static struct mlx5_list_entry *
72 list_lookup(struct mlx5_list *list, void *ctx, bool reuse)
74 struct mlx5_list_entry *entry;
76 rte_rwlock_read_lock(&list->lock);
77 entry = __list_lookup(list, ctx, reuse);
78 rte_rwlock_read_unlock(&list->lock);
82 struct mlx5_list_entry *
83 mlx5_list_lookup(struct mlx5_list *list, void *ctx)
85 return list_lookup(list, ctx, false);
88 struct mlx5_list_entry *
89 mlx5_list_register(struct mlx5_list *list, void *ctx)
91 struct mlx5_list_entry *entry;
92 uint32_t prev_gen_cnt = 0;
95 prev_gen_cnt = __atomic_load_n(&list->gen_cnt, __ATOMIC_ACQUIRE);
96 /* Lookup with read lock, reuse if found. */
97 entry = list_lookup(list, ctx, true);
100 /* Not found, append with write lock - block read from other threads. */
101 rte_rwlock_write_lock(&list->lock);
102 /* If list changed by other threads before lock, search again. */
103 if (prev_gen_cnt != __atomic_load_n(&list->gen_cnt, __ATOMIC_ACQUIRE)) {
104 /* Lookup and reuse w/o read lock. */
105 entry = __list_lookup(list, ctx, true);
109 entry = list->cb_create(list, entry, ctx);
111 DRV_LOG(ERR, "Failed to init mlx5 list %s entry %p.",
112 list->name, (void *)entry);
116 LIST_INSERT_HEAD(&list->head, entry, next);
117 __atomic_add_fetch(&list->gen_cnt, 1, __ATOMIC_RELEASE);
118 __atomic_add_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
119 DRV_LOG(DEBUG, "mlx5 list %s entry %p new: %u.",
120 list->name, (void *)entry, entry->ref_cnt);
122 rte_rwlock_write_unlock(&list->lock);
127 mlx5_list_unregister(struct mlx5_list *list,
128 struct mlx5_list_entry *entry)
130 rte_rwlock_write_lock(&list->lock);
131 MLX5_ASSERT(entry && entry->next.le_prev);
132 DRV_LOG(DEBUG, "mlx5 list %s entry %p ref--: %u.",
133 list->name, (void *)entry, entry->ref_cnt);
134 if (--entry->ref_cnt) {
135 rte_rwlock_write_unlock(&list->lock);
138 __atomic_add_fetch(&list->gen_cnt, 1, __ATOMIC_ACQUIRE);
139 __atomic_sub_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
140 LIST_REMOVE(entry, next);
141 list->cb_remove(list, entry);
142 rte_rwlock_write_unlock(&list->lock);
143 DRV_LOG(DEBUG, "mlx5 list %s entry %p removed.",
144 list->name, (void *)entry);
149 mlx5_list_destroy(struct mlx5_list *list)
151 struct mlx5_list_entry *entry;
154 /* no LIST_FOREACH_SAFE, using while instead */
155 while (!LIST_EMPTY(&list->head)) {
156 entry = LIST_FIRST(&list->head);
157 LIST_REMOVE(entry, next);
158 list->cb_remove(list, entry);
159 DRV_LOG(DEBUG, "mlx5 list %s entry %p destroyed.",
160 list->name, (void *)entry);
162 memset(list, 0, sizeof(*list));
166 mlx5_list_get_entry_num(struct mlx5_list *list)
169 return __atomic_load_n(&list->count, __ATOMIC_RELAXED);
172 /********************* Indexed pool **********************/
175 mlx5_ipool_lock(struct mlx5_indexed_pool *pool)
177 if (pool->cfg.need_lock)
178 rte_spinlock_lock(&pool->rsz_lock);
182 mlx5_ipool_unlock(struct mlx5_indexed_pool *pool)
184 if (pool->cfg.need_lock)
185 rte_spinlock_unlock(&pool->rsz_lock);
188 static inline uint32_t
189 mlx5_trunk_idx_get(struct mlx5_indexed_pool *pool, uint32_t entry_idx)
191 struct mlx5_indexed_pool_config *cfg = &pool->cfg;
192 uint32_t trunk_idx = 0;
195 if (!cfg->grow_trunk)
196 return entry_idx / cfg->trunk_size;
197 if (entry_idx >= pool->grow_tbl[cfg->grow_trunk - 1]) {
198 trunk_idx = (entry_idx - pool->grow_tbl[cfg->grow_trunk - 1]) /
199 (cfg->trunk_size << (cfg->grow_shift *
200 cfg->grow_trunk)) + cfg->grow_trunk;
202 for (i = 0; i < cfg->grow_trunk; i++) {
203 if (entry_idx < pool->grow_tbl[i])
211 static inline uint32_t
212 mlx5_trunk_size_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
214 struct mlx5_indexed_pool_config *cfg = &pool->cfg;
216 return cfg->trunk_size << (cfg->grow_shift *
217 (trunk_idx > cfg->grow_trunk ? cfg->grow_trunk : trunk_idx));
220 static inline uint32_t
221 mlx5_trunk_idx_offset_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
223 struct mlx5_indexed_pool_config *cfg = &pool->cfg;
228 if (!cfg->grow_trunk)
229 return cfg->trunk_size * trunk_idx;
230 if (trunk_idx < cfg->grow_trunk)
231 offset = pool->grow_tbl[trunk_idx - 1];
233 offset = pool->grow_tbl[cfg->grow_trunk - 1] +
234 (cfg->trunk_size << (cfg->grow_shift *
235 cfg->grow_trunk)) * (trunk_idx - cfg->grow_trunk);
239 struct mlx5_indexed_pool *
240 mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg)
242 struct mlx5_indexed_pool *pool;
245 if (!cfg || (!cfg->malloc ^ !cfg->free) ||
246 (cfg->per_core_cache && cfg->release_mem_en) ||
247 (cfg->trunk_size && ((cfg->trunk_size & (cfg->trunk_size - 1)) ||
248 ((__builtin_ffs(cfg->trunk_size) + TRUNK_IDX_BITS) > 32))))
250 pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool) + cfg->grow_trunk *
251 sizeof(pool->grow_tbl[0]), RTE_CACHE_LINE_SIZE,
256 if (!pool->cfg.trunk_size)
257 pool->cfg.trunk_size = MLX5_IPOOL_DEFAULT_TRUNK_SIZE;
258 if (!cfg->malloc && !cfg->free) {
259 pool->cfg.malloc = mlx5_malloc;
260 pool->cfg.free = mlx5_free;
262 if (pool->cfg.need_lock)
263 rte_spinlock_init(&pool->rsz_lock);
265 * Initialize the dynamic grow trunk size lookup table to have a quick
266 * lookup for the trunk entry index offset.
268 for (i = 0; i < cfg->grow_trunk; i++) {
269 pool->grow_tbl[i] = cfg->trunk_size << (cfg->grow_shift * i);
271 pool->grow_tbl[i] += pool->grow_tbl[i - 1];
273 if (!pool->cfg.max_idx)
275 mlx5_trunk_idx_offset_get(pool, TRUNK_MAX_IDX + 1);
276 if (!cfg->per_core_cache)
277 pool->free_list = TRUNK_INVALID;
278 rte_spinlock_init(&pool->lcore_lock);
283 mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
285 struct mlx5_indexed_trunk *trunk;
286 struct mlx5_indexed_trunk **trunk_tmp;
287 struct mlx5_indexed_trunk **p;
288 size_t trunk_size = 0;
291 uint32_t idx, cur_max_idx, i;
293 cur_max_idx = mlx5_trunk_idx_offset_get(pool, pool->n_trunk_valid);
294 if (pool->n_trunk_valid == TRUNK_MAX_IDX ||
295 cur_max_idx >= pool->cfg.max_idx)
297 if (pool->n_trunk_valid == pool->n_trunk) {
298 /* No free trunk flags, expand trunk list. */
299 int n_grow = pool->n_trunk_valid ? pool->n_trunk :
300 RTE_CACHE_LINE_SIZE / sizeof(void *);
302 p = pool->cfg.malloc(0, (pool->n_trunk_valid + n_grow) *
303 sizeof(struct mlx5_indexed_trunk *),
304 RTE_CACHE_LINE_SIZE, rte_socket_id());
308 memcpy(p, pool->trunks, pool->n_trunk_valid *
309 sizeof(struct mlx5_indexed_trunk *));
310 memset(RTE_PTR_ADD(p, pool->n_trunk_valid * sizeof(void *)), 0,
311 n_grow * sizeof(void *));
312 trunk_tmp = pool->trunks;
315 pool->cfg.free(trunk_tmp);
316 pool->n_trunk += n_grow;
318 if (!pool->cfg.release_mem_en) {
319 idx = pool->n_trunk_valid;
321 /* Find the first available slot in trunk list */
322 for (idx = 0; idx < pool->n_trunk; idx++)
323 if (pool->trunks[idx] == NULL)
326 trunk_size += sizeof(*trunk);
327 data_size = mlx5_trunk_size_get(pool, idx);
328 bmp_size = rte_bitmap_get_memory_footprint(data_size);
329 /* rte_bitmap requires memory cacheline aligned. */
330 trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
331 trunk_size += bmp_size;
332 trunk = pool->cfg.malloc(0, trunk_size,
333 RTE_CACHE_LINE_SIZE, rte_socket_id());
336 pool->trunks[idx] = trunk;
338 trunk->free = data_size;
339 trunk->prev = TRUNK_INVALID;
340 trunk->next = TRUNK_INVALID;
341 MLX5_ASSERT(pool->free_list == TRUNK_INVALID);
342 pool->free_list = idx;
343 /* Mark all entries as available. */
344 trunk->bmp = rte_bitmap_init_with_all_set(data_size, &trunk->data
345 [RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size)],
347 /* Clear the overhead bits in the trunk if it happens. */
348 if (cur_max_idx + data_size > pool->cfg.max_idx) {
349 for (i = pool->cfg.max_idx - cur_max_idx; i < data_size; i++)
350 rte_bitmap_clear(trunk->bmp, i);
352 MLX5_ASSERT(trunk->bmp);
353 pool->n_trunk_valid++;
361 static inline struct mlx5_indexed_cache *
362 mlx5_ipool_update_global_cache(struct mlx5_indexed_pool *pool, int cidx)
364 struct mlx5_indexed_cache *gc, *lc, *olc = NULL;
366 lc = pool->cache[cidx]->lc;
367 gc = __atomic_load_n(&pool->gc, __ATOMIC_RELAXED);
368 if (gc && lc != gc) {
369 mlx5_ipool_lock(pool);
370 if (lc && !(--lc->ref_cnt))
374 pool->cache[cidx]->lc = lc;
375 mlx5_ipool_unlock(pool);
383 mlx5_ipool_allocate_from_global(struct mlx5_indexed_pool *pool, int cidx)
385 struct mlx5_indexed_trunk *trunk;
386 struct mlx5_indexed_cache *p, *lc, *olc = NULL;
387 size_t trunk_size = 0;
389 uint32_t cur_max_idx, trunk_idx, trunk_n;
390 uint32_t fetch_size, ts_idx, i;
397 * Fetch new index from global if possible. First round local
398 * cache will be NULL.
400 lc = pool->cache[cidx]->lc;
401 mlx5_ipool_lock(pool);
402 /* Try to update local cache first. */
403 if (likely(pool->gc)) {
404 if (lc != pool->gc) {
405 if (lc && !(--lc->ref_cnt))
409 pool->cache[cidx]->lc = lc;
412 /* Use the updated local cache to fetch index. */
413 fetch_size = pool->cfg.per_core_cache >> 2;
414 if (lc->len < fetch_size)
415 fetch_size = lc->len;
416 lc->len -= fetch_size;
417 memcpy(pool->cache[cidx]->idx, &lc->idx[lc->len],
418 sizeof(uint32_t) * fetch_size);
421 mlx5_ipool_unlock(pool);
427 pool->cache[cidx]->len = fetch_size - 1;
428 return pool->cache[cidx]->idx[pool->cache[cidx]->len];
430 trunk_idx = lc ? __atomic_load_n(&lc->n_trunk_valid,
431 __ATOMIC_ACQUIRE) : 0;
432 trunk_n = lc ? lc->n_trunk : 0;
433 cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx);
434 /* Check if index reach maximum. */
435 if (trunk_idx == TRUNK_MAX_IDX ||
436 cur_max_idx >= pool->cfg.max_idx)
438 /* No enough space in trunk array, resize the trunks array. */
439 if (trunk_idx == trunk_n) {
440 n_grow = trunk_idx ? trunk_idx :
441 RTE_CACHE_LINE_SIZE / sizeof(void *);
442 cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_n + n_grow);
443 /* Resize the trunk array. */
444 p = pool->cfg.malloc(0, ((trunk_idx + n_grow) *
445 sizeof(struct mlx5_indexed_trunk *)) +
446 (cur_max_idx * sizeof(uint32_t)) + sizeof(*p),
447 RTE_CACHE_LINE_SIZE, rte_socket_id());
450 p->trunks = (struct mlx5_indexed_trunk **)&p->idx[cur_max_idx];
452 memcpy(p->trunks, lc->trunks, trunk_idx *
453 sizeof(struct mlx5_indexed_trunk *));
454 #ifdef RTE_LIBRTE_MLX5_DEBUG
455 memset(RTE_PTR_ADD(p->trunks, trunk_idx * sizeof(void *)), 0,
456 n_grow * sizeof(void *));
458 p->n_trunk_valid = trunk_idx;
459 p->n_trunk = trunk_n + n_grow;
462 /* Prepare the new trunk. */
463 trunk_size = sizeof(*trunk);
464 data_size = mlx5_trunk_size_get(pool, trunk_idx);
465 trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
466 trunk = pool->cfg.malloc(0, trunk_size,
467 RTE_CACHE_LINE_SIZE, rte_socket_id());
468 if (unlikely(!trunk)) {
472 trunk->idx = trunk_idx;
473 trunk->free = data_size;
474 mlx5_ipool_lock(pool);
476 * Double check if trunks has been updated or have available index.
477 * During the new trunk allocate, index may still be flushed to the
478 * global cache. So also need to check the pool->gc->len.
480 if (pool->gc && (lc != pool->gc ||
481 lc->n_trunk_valid != trunk_idx ||
483 mlx5_ipool_unlock(pool);
486 pool->cfg.free(trunk);
489 /* Resize the trunk array and update local cache first. */
491 if (lc && !(--lc->ref_cnt))
495 pool->cache[cidx]->lc = lc;
496 __atomic_store_n(&pool->gc, p, __ATOMIC_RELAXED);
498 /* Add trunk to trunks array. */
499 lc->trunks[trunk_idx] = trunk;
500 __atomic_fetch_add(&lc->n_trunk_valid, 1, __ATOMIC_RELAXED);
501 /* Enqueue half of the index to global. */
502 ts_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx) + 1;
503 fetch_size = trunk->free >> 1;
504 for (i = 0; i < fetch_size; i++)
505 lc->idx[i] = ts_idx + i;
506 lc->len = fetch_size;
507 mlx5_ipool_unlock(pool);
508 /* Copy left half - 1 to local cache index array. */
509 pool->cache[cidx]->len = trunk->free - fetch_size - 1;
510 ts_idx += fetch_size;
511 for (i = 0; i < pool->cache[cidx]->len; i++)
512 pool->cache[cidx]->idx[i] = ts_idx + i;
519 _mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
521 struct mlx5_indexed_trunk *trunk;
522 struct mlx5_indexed_cache *lc;
527 if (unlikely(!pool->cache[cidx])) {
528 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
529 sizeof(struct mlx5_ipool_per_lcore) +
530 (pool->cfg.per_core_cache * sizeof(uint32_t)),
531 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
532 if (!pool->cache[cidx]) {
533 DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
537 lc = mlx5_ipool_update_global_cache(pool, cidx);
539 trunk_idx = mlx5_trunk_idx_get(pool, idx);
540 trunk = lc->trunks[trunk_idx];
542 entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk_idx);
543 return &trunk->data[entry_idx * pool->cfg.size];
547 mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
552 cidx = rte_lcore_index(rte_lcore_id());
553 if (unlikely(cidx == -1)) {
554 cidx = RTE_MAX_LCORE;
555 rte_spinlock_lock(&pool->lcore_lock);
557 entry = _mlx5_ipool_get_cache(pool, cidx, idx);
558 if (unlikely(cidx == RTE_MAX_LCORE))
559 rte_spinlock_unlock(&pool->lcore_lock);
565 _mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, int cidx,
568 if (unlikely(!pool->cache[cidx])) {
569 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
570 sizeof(struct mlx5_ipool_per_lcore) +
571 (pool->cfg.per_core_cache * sizeof(uint32_t)),
572 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
573 if (!pool->cache[cidx]) {
574 DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
577 } else if (pool->cache[cidx]->len) {
578 pool->cache[cidx]->len--;
579 *idx = pool->cache[cidx]->idx[pool->cache[cidx]->len];
580 return _mlx5_ipool_get_cache(pool, cidx, *idx);
582 /* Not enough idx in global cache. Keep fetching from global. */
583 *idx = mlx5_ipool_allocate_from_global(pool, cidx);
584 if (unlikely(!(*idx)))
586 return _mlx5_ipool_get_cache(pool, cidx, *idx);
590 mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, uint32_t *idx)
595 cidx = rte_lcore_index(rte_lcore_id());
596 if (unlikely(cidx == -1)) {
597 cidx = RTE_MAX_LCORE;
598 rte_spinlock_lock(&pool->lcore_lock);
600 entry = _mlx5_ipool_malloc_cache(pool, cidx, idx);
601 if (unlikely(cidx == RTE_MAX_LCORE))
602 rte_spinlock_unlock(&pool->lcore_lock);
607 _mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
609 struct mlx5_ipool_per_lcore *ilc;
610 struct mlx5_indexed_cache *gc, *olc = NULL;
611 uint32_t reclaim_num = 0;
615 * When index was allocated on core A but freed on core B. In this
616 * case check if local cache on core B was allocated before.
618 if (unlikely(!pool->cache[cidx])) {
619 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
620 sizeof(struct mlx5_ipool_per_lcore) +
621 (pool->cfg.per_core_cache * sizeof(uint32_t)),
622 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
623 if (!pool->cache[cidx]) {
624 DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
628 /* Try to enqueue to local index cache. */
629 if (pool->cache[cidx]->len < pool->cfg.per_core_cache) {
630 pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
631 pool->cache[cidx]->len++;
634 ilc = pool->cache[cidx];
635 reclaim_num = pool->cfg.per_core_cache >> 2;
636 ilc->len -= reclaim_num;
637 /* Local index cache full, try with global index cache. */
638 mlx5_ipool_lock(pool);
641 if (!(--ilc->lc->ref_cnt))
646 memcpy(&gc->idx[gc->len], &ilc->idx[ilc->len],
647 reclaim_num * sizeof(uint32_t));
648 gc->len += reclaim_num;
649 mlx5_ipool_unlock(pool);
652 pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
653 pool->cache[cidx]->len++;
657 mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
661 cidx = rte_lcore_index(rte_lcore_id());
662 if (unlikely(cidx == -1)) {
663 cidx = RTE_MAX_LCORE;
664 rte_spinlock_lock(&pool->lcore_lock);
666 _mlx5_ipool_free_cache(pool, cidx, idx);
667 if (unlikely(cidx == RTE_MAX_LCORE))
668 rte_spinlock_unlock(&pool->lcore_lock);
672 mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
674 struct mlx5_indexed_trunk *trunk;
679 if (pool->cfg.per_core_cache)
680 return mlx5_ipool_malloc_cache(pool, idx);
681 mlx5_ipool_lock(pool);
682 if (pool->free_list == TRUNK_INVALID) {
683 /* If no available trunks, grow new. */
684 if (mlx5_ipool_grow(pool)) {
685 mlx5_ipool_unlock(pool);
689 MLX5_ASSERT(pool->free_list != TRUNK_INVALID);
690 trunk = pool->trunks[pool->free_list];
691 MLX5_ASSERT(trunk->free);
692 if (!rte_bitmap_scan(trunk->bmp, &iidx, &slab)) {
693 mlx5_ipool_unlock(pool);
697 iidx += __builtin_ctzll(slab);
698 MLX5_ASSERT(iidx != UINT32_MAX);
699 MLX5_ASSERT(iidx < mlx5_trunk_size_get(pool, trunk->idx));
700 rte_bitmap_clear(trunk->bmp, iidx);
701 p = &trunk->data[iidx * pool->cfg.size];
703 * The ipool index should grow continually from small to big,
704 * some features as metering only accept limited bits of index.
705 * Random index with MSB set may be rejected.
707 iidx += mlx5_trunk_idx_offset_get(pool, trunk->idx);
708 iidx += 1; /* non-zero index. */
714 /* Full trunk will be removed from free list in imalloc. */
715 MLX5_ASSERT(pool->free_list == trunk->idx);
716 pool->free_list = trunk->next;
717 if (trunk->next != TRUNK_INVALID)
718 pool->trunks[trunk->next]->prev = TRUNK_INVALID;
719 trunk->prev = TRUNK_INVALID;
720 trunk->next = TRUNK_INVALID;
727 mlx5_ipool_unlock(pool);
732 mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
734 void *entry = mlx5_ipool_malloc(pool, idx);
736 if (entry && pool->cfg.size)
737 memset(entry, 0, pool->cfg.size);
742 mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx)
744 struct mlx5_indexed_trunk *trunk;
750 if (pool->cfg.per_core_cache) {
751 mlx5_ipool_free_cache(pool, idx);
755 mlx5_ipool_lock(pool);
756 trunk_idx = mlx5_trunk_idx_get(pool, idx);
757 if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
758 (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
760 trunk = pool->trunks[trunk_idx];
763 entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
764 if (trunk_idx != trunk->idx ||
765 rte_bitmap_get(trunk->bmp, entry_idx))
767 rte_bitmap_set(trunk->bmp, entry_idx);
769 if (pool->cfg.release_mem_en && trunk->free == mlx5_trunk_size_get
770 (pool, trunk->idx)) {
771 if (pool->free_list == trunk->idx)
772 pool->free_list = trunk->next;
773 if (trunk->next != TRUNK_INVALID)
774 pool->trunks[trunk->next]->prev = trunk->prev;
775 if (trunk->prev != TRUNK_INVALID)
776 pool->trunks[trunk->prev]->next = trunk->next;
777 pool->cfg.free(trunk);
778 pool->trunks[trunk_idx] = NULL;
779 pool->n_trunk_valid--;
784 if (pool->n_trunk_valid == 0) {
785 pool->cfg.free(pool->trunks);
789 } else if (trunk->free == 1) {
790 /* Put into free trunk list head. */
791 MLX5_ASSERT(pool->free_list != trunk->idx);
792 trunk->next = pool->free_list;
793 trunk->prev = TRUNK_INVALID;
794 if (pool->free_list != TRUNK_INVALID)
795 pool->trunks[pool->free_list]->prev = trunk->idx;
796 pool->free_list = trunk->idx;
806 mlx5_ipool_unlock(pool);
810 mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx)
812 struct mlx5_indexed_trunk *trunk;
819 if (pool->cfg.per_core_cache)
820 return mlx5_ipool_get_cache(pool, idx);
822 mlx5_ipool_lock(pool);
823 trunk_idx = mlx5_trunk_idx_get(pool, idx);
824 if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
825 (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
827 trunk = pool->trunks[trunk_idx];
830 entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
831 if (trunk_idx != trunk->idx ||
832 rte_bitmap_get(trunk->bmp, entry_idx))
834 p = &trunk->data[entry_idx * pool->cfg.size];
836 mlx5_ipool_unlock(pool);
841 mlx5_ipool_destroy(struct mlx5_indexed_pool *pool)
843 struct mlx5_indexed_trunk **trunks = NULL;
844 struct mlx5_indexed_cache *gc = pool->gc;
845 uint32_t i, n_trunk_valid = 0;
848 mlx5_ipool_lock(pool);
849 if (pool->cfg.per_core_cache) {
850 for (i = 0; i <= RTE_MAX_LCORE; i++) {
852 * Free only old global cache. Pool gc will be
855 if (pool->cache[i]) {
856 if (pool->cache[i]->lc &&
857 pool->cache[i]->lc != pool->gc &&
858 (!(--pool->cache[i]->lc->ref_cnt)))
859 pool->cfg.free(pool->cache[i]->lc);
860 pool->cfg.free(pool->cache[i]);
865 n_trunk_valid = gc->n_trunk_valid;
869 trunks = pool->trunks;
870 n_trunk_valid = pool->n_trunk_valid;
872 for (i = 0; i < n_trunk_valid; i++) {
874 pool->cfg.free(trunks[i]);
877 pool->cfg.free(trunks);
880 mlx5_ipool_unlock(pool);
886 mlx5_ipool_flush_cache(struct mlx5_indexed_pool *pool)
889 struct mlx5_indexed_cache *gc;
890 struct rte_bitmap *ibmp;
891 uint32_t bmp_num, mem_size;
893 if (!pool->cfg.per_core_cache)
899 bmp_num = mlx5_trunk_idx_offset_get(pool, gc->n_trunk_valid);
900 mem_size = rte_bitmap_get_memory_footprint(bmp_num);
901 pool->bmp_mem = pool->cfg.malloc(MLX5_MEM_ZERO, mem_size,
902 RTE_CACHE_LINE_SIZE, rte_socket_id());
903 if (!pool->bmp_mem) {
904 DRV_LOG(ERR, "Ipool bitmap mem allocate failed.\n");
907 ibmp = rte_bitmap_init_with_all_set(bmp_num, pool->bmp_mem, mem_size);
909 pool->cfg.free(pool->bmp_mem);
910 pool->bmp_mem = NULL;
911 DRV_LOG(ERR, "Ipool bitmap create failed.\n");
915 /* Clear global cache. */
916 for (i = 0; i < gc->len; i++)
917 rte_bitmap_clear(ibmp, gc->idx[i] - 1);
918 /* Clear core cache. */
919 for (i = 0; i < RTE_MAX_LCORE + 1; i++) {
920 struct mlx5_ipool_per_lcore *ilc = pool->cache[i];
924 for (j = 0; j < ilc->len; j++)
925 rte_bitmap_clear(ibmp, ilc->idx[j] - 1);
930 mlx5_ipool_get_next_cache(struct mlx5_indexed_pool *pool, uint32_t *pos)
932 struct rte_bitmap *ibmp;
934 uint32_t iidx = *pos;
937 if (!ibmp || !rte_bitmap_scan(ibmp, &iidx, &slab)) {
939 pool->cfg.free(pool->bmp_mem);
940 pool->bmp_mem = NULL;
945 iidx += __builtin_ctzll(slab);
946 rte_bitmap_clear(ibmp, iidx);
949 return mlx5_ipool_get_cache(pool, iidx);
953 mlx5_ipool_get_next(struct mlx5_indexed_pool *pool, uint32_t *pos)
958 if (pool->cfg.per_core_cache)
959 return mlx5_ipool_get_next_cache(pool, pos);
960 while (idx <= mlx5_trunk_idx_offset_get(pool, pool->n_trunk)) {
961 entry = mlx5_ipool_get(pool, idx);
972 mlx5_ipool_dump(struct mlx5_indexed_pool *pool)
974 printf("Pool %s entry size %u, trunks %u, %d entry per trunk, "
976 pool->cfg.type, pool->cfg.size, pool->n_trunk_valid,
977 pool->cfg.trunk_size, pool->n_trunk_valid);
979 printf("Pool %s entry %u, trunk alloc %u, empty: %u, "
980 "available %u free %u\n",
981 pool->cfg.type, pool->n_entry, pool->trunk_new,
982 pool->trunk_empty, pool->trunk_avail, pool->trunk_free);
986 struct mlx5_l3t_tbl *
987 mlx5_l3t_create(enum mlx5_l3t_type type)
989 struct mlx5_l3t_tbl *tbl;
990 struct mlx5_indexed_pool_config l3t_ip_cfg = {
996 .malloc = mlx5_malloc,
1000 if (type >= MLX5_L3T_TYPE_MAX) {
1004 tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_l3t_tbl), 1,
1012 case MLX5_L3T_TYPE_WORD:
1013 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word);
1014 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
1016 case MLX5_L3T_TYPE_DWORD:
1017 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword);
1018 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
1020 case MLX5_L3T_TYPE_QWORD:
1021 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword);
1022 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
1025 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr);
1026 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
1029 rte_spinlock_init(&tbl->sl);
1030 tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
1040 mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl)
1042 struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1049 for (i = 0; i < MLX5_L3T_GT_SIZE; i++) {
1050 m_tbl = g_tbl->tbl[i];
1053 for (j = 0; j < MLX5_L3T_MT_SIZE; j++) {
1056 MLX5_ASSERT(!((struct mlx5_l3t_entry_word *)
1057 m_tbl->tbl[j])->ref_cnt);
1058 mlx5_ipool_free(tbl->eip,
1059 ((struct mlx5_l3t_entry_word *)
1060 m_tbl->tbl[j])->idx);
1062 if (!(--m_tbl->ref_cnt))
1065 MLX5_ASSERT(!m_tbl->ref_cnt);
1066 mlx5_free(g_tbl->tbl[i]);
1068 if (!(--g_tbl->ref_cnt))
1071 MLX5_ASSERT(!g_tbl->ref_cnt);
1072 mlx5_free(tbl->tbl);
1075 mlx5_ipool_destroy(tbl->eip);
1080 __l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1081 union mlx5_l3t_data *data)
1083 struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1084 struct mlx5_l3t_entry_word *w_e_tbl;
1085 struct mlx5_l3t_entry_dword *dw_e_tbl;
1086 struct mlx5_l3t_entry_qword *qw_e_tbl;
1087 struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1094 m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1097 e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1100 entry_idx = idx & MLX5_L3T_ET_MASK;
1101 switch (tbl->type) {
1102 case MLX5_L3T_TYPE_WORD:
1103 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1104 data->word = w_e_tbl->entry[entry_idx].data;
1105 if (w_e_tbl->entry[entry_idx].data)
1106 w_e_tbl->entry[entry_idx].ref_cnt++;
1108 case MLX5_L3T_TYPE_DWORD:
1109 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1110 data->dword = dw_e_tbl->entry[entry_idx].data;
1111 if (dw_e_tbl->entry[entry_idx].data)
1112 dw_e_tbl->entry[entry_idx].ref_cnt++;
1114 case MLX5_L3T_TYPE_QWORD:
1115 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1116 data->qword = qw_e_tbl->entry[entry_idx].data;
1117 if (qw_e_tbl->entry[entry_idx].data)
1118 qw_e_tbl->entry[entry_idx].ref_cnt++;
1121 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1122 data->ptr = ptr_e_tbl->entry[entry_idx].data;
1123 if (ptr_e_tbl->entry[entry_idx].data)
1124 ptr_e_tbl->entry[entry_idx].ref_cnt++;
1131 mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1132 union mlx5_l3t_data *data)
1136 rte_spinlock_lock(&tbl->sl);
1137 ret = __l3t_get_entry(tbl, idx, data);
1138 rte_spinlock_unlock(&tbl->sl);
1143 mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
1145 struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1146 struct mlx5_l3t_entry_word *w_e_tbl;
1147 struct mlx5_l3t_entry_dword *dw_e_tbl;
1148 struct mlx5_l3t_entry_qword *qw_e_tbl;
1149 struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1155 rte_spinlock_lock(&tbl->sl);
1159 m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1162 e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1165 entry_idx = idx & MLX5_L3T_ET_MASK;
1166 switch (tbl->type) {
1167 case MLX5_L3T_TYPE_WORD:
1168 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1169 MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt);
1170 ret = --w_e_tbl->entry[entry_idx].ref_cnt;
1173 w_e_tbl->entry[entry_idx].data = 0;
1174 ref_cnt = --w_e_tbl->ref_cnt;
1176 case MLX5_L3T_TYPE_DWORD:
1177 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1178 MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt);
1179 ret = --dw_e_tbl->entry[entry_idx].ref_cnt;
1182 dw_e_tbl->entry[entry_idx].data = 0;
1183 ref_cnt = --dw_e_tbl->ref_cnt;
1185 case MLX5_L3T_TYPE_QWORD:
1186 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1187 MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt);
1188 ret = --qw_e_tbl->entry[entry_idx].ref_cnt;
1191 qw_e_tbl->entry[entry_idx].data = 0;
1192 ref_cnt = --qw_e_tbl->ref_cnt;
1195 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1196 MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt);
1197 ret = --ptr_e_tbl->entry[entry_idx].ref_cnt;
1200 ptr_e_tbl->entry[entry_idx].data = NULL;
1201 ref_cnt = --ptr_e_tbl->ref_cnt;
1205 mlx5_ipool_free(tbl->eip,
1206 ((struct mlx5_l3t_entry_word *)e_tbl)->idx);
1207 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1209 if (!(--m_tbl->ref_cnt)) {
1212 [(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] = NULL;
1213 if (!(--g_tbl->ref_cnt)) {
1220 rte_spinlock_unlock(&tbl->sl);
1225 __l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1226 union mlx5_l3t_data *data)
1228 struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1229 struct mlx5_l3t_entry_word *w_e_tbl;
1230 struct mlx5_l3t_entry_dword *dw_e_tbl;
1231 struct mlx5_l3t_entry_qword *qw_e_tbl;
1232 struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1234 uint32_t entry_idx, tbl_idx = 0;
1236 /* Check the global table, create it if empty. */
1239 g_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1240 sizeof(struct mlx5_l3t_level_tbl) +
1241 sizeof(void *) * MLX5_L3T_GT_SIZE, 1,
1250 * Check the middle table, create it if empty. Ref_cnt will be
1251 * increased if new sub table created.
1253 m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1255 m_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1256 sizeof(struct mlx5_l3t_level_tbl) +
1257 sizeof(void *) * MLX5_L3T_MT_SIZE, 1,
1263 g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] =
1268 * Check the entry table, create it if empty. Ref_cnt will be
1269 * increased if new sub entry table created.
1271 e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1273 e_tbl = mlx5_ipool_zmalloc(tbl->eip, &tbl_idx);
1278 ((struct mlx5_l3t_entry_word *)e_tbl)->idx = tbl_idx;
1279 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1283 entry_idx = idx & MLX5_L3T_ET_MASK;
1284 switch (tbl->type) {
1285 case MLX5_L3T_TYPE_WORD:
1286 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1287 if (w_e_tbl->entry[entry_idx].data) {
1288 data->word = w_e_tbl->entry[entry_idx].data;
1289 w_e_tbl->entry[entry_idx].ref_cnt++;
1293 w_e_tbl->entry[entry_idx].data = data->word;
1294 w_e_tbl->entry[entry_idx].ref_cnt = 1;
1297 case MLX5_L3T_TYPE_DWORD:
1298 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1299 if (dw_e_tbl->entry[entry_idx].data) {
1300 data->dword = dw_e_tbl->entry[entry_idx].data;
1301 dw_e_tbl->entry[entry_idx].ref_cnt++;
1305 dw_e_tbl->entry[entry_idx].data = data->dword;
1306 dw_e_tbl->entry[entry_idx].ref_cnt = 1;
1307 dw_e_tbl->ref_cnt++;
1309 case MLX5_L3T_TYPE_QWORD:
1310 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1311 if (qw_e_tbl->entry[entry_idx].data) {
1312 data->qword = qw_e_tbl->entry[entry_idx].data;
1313 qw_e_tbl->entry[entry_idx].ref_cnt++;
1317 qw_e_tbl->entry[entry_idx].data = data->qword;
1318 qw_e_tbl->entry[entry_idx].ref_cnt = 1;
1319 qw_e_tbl->ref_cnt++;
1322 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1323 if (ptr_e_tbl->entry[entry_idx].data) {
1324 data->ptr = ptr_e_tbl->entry[entry_idx].data;
1325 ptr_e_tbl->entry[entry_idx].ref_cnt++;
1329 ptr_e_tbl->entry[entry_idx].data = data->ptr;
1330 ptr_e_tbl->entry[entry_idx].ref_cnt = 1;
1331 ptr_e_tbl->ref_cnt++;
1338 mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1339 union mlx5_l3t_data *data)
1343 rte_spinlock_lock(&tbl->sl);
1344 ret = __l3t_set_entry(tbl, idx, data);
1345 rte_spinlock_unlock(&tbl->sl);
1350 mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1351 union mlx5_l3t_data *data,
1352 mlx5_l3t_alloc_callback_fn cb, void *ctx)
1356 rte_spinlock_lock(&tbl->sl);
1357 /* Check if entry data is ready. */
1358 ret = __l3t_get_entry(tbl, idx, data);
1360 switch (tbl->type) {
1361 case MLX5_L3T_TYPE_WORD:
1365 case MLX5_L3T_TYPE_DWORD:
1369 case MLX5_L3T_TYPE_QWORD:
1379 /* Entry data is not ready, use user callback to create it. */
1380 ret = cb(ctx, data);
1383 /* Save the new allocated data to entry. */
1384 ret = __l3t_set_entry(tbl, idx, data);
1386 rte_spinlock_unlock(&tbl->sl);