1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2019 Mellanox Technologies, Ltd
5 #include <rte_malloc.h>
7 #include <mlx5_malloc.h>
9 #include "mlx5_utils.h"
12 /********************* mlx5 list ************************/
15 mlx5_list_create(struct mlx5_list *list, const char *name, void *ctx,
16 mlx5_list_create_cb cb_create,
17 mlx5_list_match_cb cb_match,
18 mlx5_list_remove_cb cb_remove,
19 mlx5_list_clone_cb cb_clone,
20 mlx5_list_clone_free_cb cb_clone_free)
25 if (!cb_match || !cb_create || !cb_remove || !cb_clone ||
29 snprintf(list->name, sizeof(list->name), "%s", name);
31 list->cb_create = cb_create;
32 list->cb_match = cb_match;
33 list->cb_remove = cb_remove;
34 list->cb_clone = cb_clone;
35 list->cb_clone_free = cb_clone_free;
36 rte_rwlock_init(&list->lock);
37 DRV_LOG(DEBUG, "mlx5 list %s initialized.", list->name);
38 for (i = 0; i <= RTE_MAX_LCORE; i++)
39 LIST_INIT(&list->cache[i].h);
43 static struct mlx5_list_entry *
44 __list_lookup(struct mlx5_list *list, int lcore_index, void *ctx, bool reuse)
46 struct mlx5_list_entry *entry = LIST_FIRST(&list->cache[lcore_index].h);
49 while (entry != NULL) {
50 struct mlx5_list_entry *nentry = LIST_NEXT(entry, next);
52 if (list->cb_match(list, entry, ctx)) {
53 if (lcore_index < RTE_MAX_LCORE) {
54 ret = __atomic_load_n(&entry->ref_cnt,
57 LIST_REMOVE(entry, next);
58 list->cb_clone_free(list, entry);
65 ret = __atomic_add_fetch(&entry->ref_cnt, 1,
68 /* Entry was invalid before, free it. */
69 LIST_REMOVE(entry, next);
70 list->cb_clone_free(list, entry);
74 DRV_LOG(DEBUG, "mlx5 list %s entry %p ref++: %u.",
75 list->name, (void *)entry, entry->ref_cnt);
82 struct mlx5_list_entry *
83 mlx5_list_lookup(struct mlx5_list *list, void *ctx)
85 struct mlx5_list_entry *entry = NULL;
88 rte_rwlock_read_lock(&list->lock);
89 for (i = 0; i < RTE_MAX_LCORE; i++) {
90 entry = __list_lookup(list, i, ctx, false);
94 rte_rwlock_read_unlock(&list->lock);
98 static struct mlx5_list_entry *
99 mlx5_list_cache_insert(struct mlx5_list *list, int lcore_index,
100 struct mlx5_list_entry *gentry, void *ctx)
102 struct mlx5_list_entry *lentry = list->cb_clone(list, gentry, ctx);
106 lentry->ref_cnt = 1u;
107 lentry->gentry = gentry;
108 LIST_INSERT_HEAD(&list->cache[lcore_index].h, lentry, next);
112 struct mlx5_list_entry *
113 mlx5_list_register(struct mlx5_list *list, void *ctx)
115 struct mlx5_list_entry *entry, *lentry;
116 uint32_t prev_gen_cnt = 0;
117 int lcore_index = rte_lcore_index(rte_lcore_id());
120 MLX5_ASSERT(lcore_index < RTE_MAX_LCORE);
121 if (unlikely(lcore_index == -1)) {
125 /* Lookup in local cache. */
126 lentry = __list_lookup(list, lcore_index, ctx, true);
129 /* Lookup with read lock, reuse if found. */
130 rte_rwlock_read_lock(&list->lock);
131 entry = __list_lookup(list, RTE_MAX_LCORE, ctx, true);
133 prev_gen_cnt = __atomic_load_n(&list->gen_cnt,
135 rte_rwlock_read_unlock(&list->lock);
137 rte_rwlock_read_unlock(&list->lock);
138 return mlx5_list_cache_insert(list, lcore_index, entry, ctx);
140 /* Not found, append with write lock - block read from other threads. */
141 rte_rwlock_write_lock(&list->lock);
142 /* If list changed by other threads before lock, search again. */
143 if (prev_gen_cnt != __atomic_load_n(&list->gen_cnt, __ATOMIC_ACQUIRE)) {
144 /* Lookup and reuse w/o read lock. */
145 entry = __list_lookup(list, RTE_MAX_LCORE, ctx, true);
147 rte_rwlock_write_unlock(&list->lock);
148 return mlx5_list_cache_insert(list, lcore_index, entry,
152 entry = list->cb_create(list, entry, ctx);
154 lentry = mlx5_list_cache_insert(list, lcore_index, entry, ctx);
156 list->cb_remove(list, entry);
159 LIST_INSERT_HEAD(&list->cache[RTE_MAX_LCORE].h, entry,
161 __atomic_add_fetch(&list->gen_cnt, 1, __ATOMIC_RELEASE);
162 __atomic_add_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
163 DRV_LOG(DEBUG, "mlx5 list %s entry %p new: %u.",
164 list->name, (void *)entry, entry->ref_cnt);
168 rte_rwlock_write_unlock(&list->lock);
173 mlx5_list_unregister(struct mlx5_list *list,
174 struct mlx5_list_entry *entry)
176 struct mlx5_list_entry *gentry = entry->gentry;
178 if (__atomic_sub_fetch(&entry->ref_cnt, 1, __ATOMIC_ACQUIRE) != 0)
180 if (__atomic_sub_fetch(&gentry->ref_cnt, 1, __ATOMIC_ACQUIRE) != 0)
182 rte_rwlock_write_lock(&list->lock);
183 if (__atomic_load_n(&gentry->ref_cnt, __ATOMIC_ACQUIRE) == 0) {
184 __atomic_add_fetch(&list->gen_cnt, 1, __ATOMIC_ACQUIRE);
185 __atomic_sub_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
186 LIST_REMOVE(gentry, next);
187 list->cb_remove(list, gentry);
188 rte_rwlock_write_unlock(&list->lock);
189 DRV_LOG(DEBUG, "mlx5 list %s entry %p removed.",
190 list->name, (void *)gentry);
193 rte_rwlock_write_unlock(&list->lock);
198 mlx5_list_destroy(struct mlx5_list *list)
200 struct mlx5_list_entry *entry;
204 for (i = 0; i <= RTE_MAX_LCORE; i++) {
205 while (!LIST_EMPTY(&list->cache[i].h)) {
206 entry = LIST_FIRST(&list->cache[i].h);
207 LIST_REMOVE(entry, next);
208 if (i == RTE_MAX_LCORE) {
209 list->cb_remove(list, entry);
210 DRV_LOG(DEBUG, "mlx5 list %s entry %p "
211 "destroyed.", list->name,
214 list->cb_clone_free(list, entry);
218 memset(list, 0, sizeof(*list));
222 mlx5_list_get_entry_num(struct mlx5_list *list)
225 return __atomic_load_n(&list->count, __ATOMIC_RELAXED);
228 /********************* Indexed pool **********************/
231 mlx5_ipool_lock(struct mlx5_indexed_pool *pool)
233 if (pool->cfg.need_lock)
234 rte_spinlock_lock(&pool->rsz_lock);
238 mlx5_ipool_unlock(struct mlx5_indexed_pool *pool)
240 if (pool->cfg.need_lock)
241 rte_spinlock_unlock(&pool->rsz_lock);
244 static inline uint32_t
245 mlx5_trunk_idx_get(struct mlx5_indexed_pool *pool, uint32_t entry_idx)
247 struct mlx5_indexed_pool_config *cfg = &pool->cfg;
248 uint32_t trunk_idx = 0;
251 if (!cfg->grow_trunk)
252 return entry_idx / cfg->trunk_size;
253 if (entry_idx >= pool->grow_tbl[cfg->grow_trunk - 1]) {
254 trunk_idx = (entry_idx - pool->grow_tbl[cfg->grow_trunk - 1]) /
255 (cfg->trunk_size << (cfg->grow_shift *
256 cfg->grow_trunk)) + cfg->grow_trunk;
258 for (i = 0; i < cfg->grow_trunk; i++) {
259 if (entry_idx < pool->grow_tbl[i])
267 static inline uint32_t
268 mlx5_trunk_size_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
270 struct mlx5_indexed_pool_config *cfg = &pool->cfg;
272 return cfg->trunk_size << (cfg->grow_shift *
273 (trunk_idx > cfg->grow_trunk ? cfg->grow_trunk : trunk_idx));
276 static inline uint32_t
277 mlx5_trunk_idx_offset_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
279 struct mlx5_indexed_pool_config *cfg = &pool->cfg;
284 if (!cfg->grow_trunk)
285 return cfg->trunk_size * trunk_idx;
286 if (trunk_idx < cfg->grow_trunk)
287 offset = pool->grow_tbl[trunk_idx - 1];
289 offset = pool->grow_tbl[cfg->grow_trunk - 1] +
290 (cfg->trunk_size << (cfg->grow_shift *
291 cfg->grow_trunk)) * (trunk_idx - cfg->grow_trunk);
295 struct mlx5_indexed_pool *
296 mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg)
298 struct mlx5_indexed_pool *pool;
301 if (!cfg || (!cfg->malloc ^ !cfg->free) ||
302 (cfg->per_core_cache && cfg->release_mem_en) ||
303 (cfg->trunk_size && ((cfg->trunk_size & (cfg->trunk_size - 1)) ||
304 ((__builtin_ffs(cfg->trunk_size) + TRUNK_IDX_BITS) > 32))))
306 pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool) + cfg->grow_trunk *
307 sizeof(pool->grow_tbl[0]), RTE_CACHE_LINE_SIZE,
312 if (!pool->cfg.trunk_size)
313 pool->cfg.trunk_size = MLX5_IPOOL_DEFAULT_TRUNK_SIZE;
314 if (!cfg->malloc && !cfg->free) {
315 pool->cfg.malloc = mlx5_malloc;
316 pool->cfg.free = mlx5_free;
318 if (pool->cfg.need_lock)
319 rte_spinlock_init(&pool->rsz_lock);
321 * Initialize the dynamic grow trunk size lookup table to have a quick
322 * lookup for the trunk entry index offset.
324 for (i = 0; i < cfg->grow_trunk; i++) {
325 pool->grow_tbl[i] = cfg->trunk_size << (cfg->grow_shift * i);
327 pool->grow_tbl[i] += pool->grow_tbl[i - 1];
329 if (!pool->cfg.max_idx)
331 mlx5_trunk_idx_offset_get(pool, TRUNK_MAX_IDX + 1);
332 if (!cfg->per_core_cache)
333 pool->free_list = TRUNK_INVALID;
334 rte_spinlock_init(&pool->lcore_lock);
339 mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
341 struct mlx5_indexed_trunk *trunk;
342 struct mlx5_indexed_trunk **trunk_tmp;
343 struct mlx5_indexed_trunk **p;
344 size_t trunk_size = 0;
347 uint32_t idx, cur_max_idx, i;
349 cur_max_idx = mlx5_trunk_idx_offset_get(pool, pool->n_trunk_valid);
350 if (pool->n_trunk_valid == TRUNK_MAX_IDX ||
351 cur_max_idx >= pool->cfg.max_idx)
353 if (pool->n_trunk_valid == pool->n_trunk) {
354 /* No free trunk flags, expand trunk list. */
355 int n_grow = pool->n_trunk_valid ? pool->n_trunk :
356 RTE_CACHE_LINE_SIZE / sizeof(void *);
358 p = pool->cfg.malloc(0, (pool->n_trunk_valid + n_grow) *
359 sizeof(struct mlx5_indexed_trunk *),
360 RTE_CACHE_LINE_SIZE, rte_socket_id());
364 memcpy(p, pool->trunks, pool->n_trunk_valid *
365 sizeof(struct mlx5_indexed_trunk *));
366 memset(RTE_PTR_ADD(p, pool->n_trunk_valid * sizeof(void *)), 0,
367 n_grow * sizeof(void *));
368 trunk_tmp = pool->trunks;
371 pool->cfg.free(trunk_tmp);
372 pool->n_trunk += n_grow;
374 if (!pool->cfg.release_mem_en) {
375 idx = pool->n_trunk_valid;
377 /* Find the first available slot in trunk list */
378 for (idx = 0; idx < pool->n_trunk; idx++)
379 if (pool->trunks[idx] == NULL)
382 trunk_size += sizeof(*trunk);
383 data_size = mlx5_trunk_size_get(pool, idx);
384 bmp_size = rte_bitmap_get_memory_footprint(data_size);
385 /* rte_bitmap requires memory cacheline aligned. */
386 trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
387 trunk_size += bmp_size;
388 trunk = pool->cfg.malloc(0, trunk_size,
389 RTE_CACHE_LINE_SIZE, rte_socket_id());
392 pool->trunks[idx] = trunk;
394 trunk->free = data_size;
395 trunk->prev = TRUNK_INVALID;
396 trunk->next = TRUNK_INVALID;
397 MLX5_ASSERT(pool->free_list == TRUNK_INVALID);
398 pool->free_list = idx;
399 /* Mark all entries as available. */
400 trunk->bmp = rte_bitmap_init_with_all_set(data_size, &trunk->data
401 [RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size)],
403 /* Clear the overhead bits in the trunk if it happens. */
404 if (cur_max_idx + data_size > pool->cfg.max_idx) {
405 for (i = pool->cfg.max_idx - cur_max_idx; i < data_size; i++)
406 rte_bitmap_clear(trunk->bmp, i);
408 MLX5_ASSERT(trunk->bmp);
409 pool->n_trunk_valid++;
417 static inline struct mlx5_indexed_cache *
418 mlx5_ipool_update_global_cache(struct mlx5_indexed_pool *pool, int cidx)
420 struct mlx5_indexed_cache *gc, *lc, *olc = NULL;
422 lc = pool->cache[cidx]->lc;
423 gc = __atomic_load_n(&pool->gc, __ATOMIC_RELAXED);
424 if (gc && lc != gc) {
425 mlx5_ipool_lock(pool);
426 if (lc && !(--lc->ref_cnt))
430 pool->cache[cidx]->lc = lc;
431 mlx5_ipool_unlock(pool);
439 mlx5_ipool_allocate_from_global(struct mlx5_indexed_pool *pool, int cidx)
441 struct mlx5_indexed_trunk *trunk;
442 struct mlx5_indexed_cache *p, *lc, *olc = NULL;
443 size_t trunk_size = 0;
445 uint32_t cur_max_idx, trunk_idx, trunk_n;
446 uint32_t fetch_size, ts_idx, i;
453 * Fetch new index from global if possible. First round local
454 * cache will be NULL.
456 lc = pool->cache[cidx]->lc;
457 mlx5_ipool_lock(pool);
458 /* Try to update local cache first. */
459 if (likely(pool->gc)) {
460 if (lc != pool->gc) {
461 if (lc && !(--lc->ref_cnt))
465 pool->cache[cidx]->lc = lc;
468 /* Use the updated local cache to fetch index. */
469 fetch_size = pool->cfg.per_core_cache >> 2;
470 if (lc->len < fetch_size)
471 fetch_size = lc->len;
472 lc->len -= fetch_size;
473 memcpy(pool->cache[cidx]->idx, &lc->idx[lc->len],
474 sizeof(uint32_t) * fetch_size);
477 mlx5_ipool_unlock(pool);
483 pool->cache[cidx]->len = fetch_size - 1;
484 return pool->cache[cidx]->idx[pool->cache[cidx]->len];
486 trunk_idx = lc ? __atomic_load_n(&lc->n_trunk_valid,
487 __ATOMIC_ACQUIRE) : 0;
488 trunk_n = lc ? lc->n_trunk : 0;
489 cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx);
490 /* Check if index reach maximum. */
491 if (trunk_idx == TRUNK_MAX_IDX ||
492 cur_max_idx >= pool->cfg.max_idx)
494 /* No enough space in trunk array, resize the trunks array. */
495 if (trunk_idx == trunk_n) {
496 n_grow = trunk_idx ? trunk_idx :
497 RTE_CACHE_LINE_SIZE / sizeof(void *);
498 cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_n + n_grow);
499 /* Resize the trunk array. */
500 p = pool->cfg.malloc(0, ((trunk_idx + n_grow) *
501 sizeof(struct mlx5_indexed_trunk *)) +
502 (cur_max_idx * sizeof(uint32_t)) + sizeof(*p),
503 RTE_CACHE_LINE_SIZE, rte_socket_id());
506 p->trunks = (struct mlx5_indexed_trunk **)&p->idx[cur_max_idx];
508 memcpy(p->trunks, lc->trunks, trunk_idx *
509 sizeof(struct mlx5_indexed_trunk *));
510 #ifdef RTE_LIBRTE_MLX5_DEBUG
511 memset(RTE_PTR_ADD(p->trunks, trunk_idx * sizeof(void *)), 0,
512 n_grow * sizeof(void *));
514 p->n_trunk_valid = trunk_idx;
515 p->n_trunk = trunk_n + n_grow;
518 /* Prepare the new trunk. */
519 trunk_size = sizeof(*trunk);
520 data_size = mlx5_trunk_size_get(pool, trunk_idx);
521 trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
522 trunk = pool->cfg.malloc(0, trunk_size,
523 RTE_CACHE_LINE_SIZE, rte_socket_id());
524 if (unlikely(!trunk)) {
528 trunk->idx = trunk_idx;
529 trunk->free = data_size;
530 mlx5_ipool_lock(pool);
532 * Double check if trunks has been updated or have available index.
533 * During the new trunk allocate, index may still be flushed to the
534 * global cache. So also need to check the pool->gc->len.
536 if (pool->gc && (lc != pool->gc ||
537 lc->n_trunk_valid != trunk_idx ||
539 mlx5_ipool_unlock(pool);
542 pool->cfg.free(trunk);
545 /* Resize the trunk array and update local cache first. */
547 if (lc && !(--lc->ref_cnt))
551 pool->cache[cidx]->lc = lc;
552 __atomic_store_n(&pool->gc, p, __ATOMIC_RELAXED);
554 /* Add trunk to trunks array. */
555 lc->trunks[trunk_idx] = trunk;
556 __atomic_fetch_add(&lc->n_trunk_valid, 1, __ATOMIC_RELAXED);
557 /* Enqueue half of the index to global. */
558 ts_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx) + 1;
559 fetch_size = trunk->free >> 1;
560 for (i = 0; i < fetch_size; i++)
561 lc->idx[i] = ts_idx + i;
562 lc->len = fetch_size;
563 mlx5_ipool_unlock(pool);
564 /* Copy left half - 1 to local cache index array. */
565 pool->cache[cidx]->len = trunk->free - fetch_size - 1;
566 ts_idx += fetch_size;
567 for (i = 0; i < pool->cache[cidx]->len; i++)
568 pool->cache[cidx]->idx[i] = ts_idx + i;
575 _mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
577 struct mlx5_indexed_trunk *trunk;
578 struct mlx5_indexed_cache *lc;
583 if (unlikely(!pool->cache[cidx])) {
584 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
585 sizeof(struct mlx5_ipool_per_lcore) +
586 (pool->cfg.per_core_cache * sizeof(uint32_t)),
587 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
588 if (!pool->cache[cidx]) {
589 DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
593 lc = mlx5_ipool_update_global_cache(pool, cidx);
595 trunk_idx = mlx5_trunk_idx_get(pool, idx);
596 trunk = lc->trunks[trunk_idx];
598 entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk_idx);
599 return &trunk->data[entry_idx * pool->cfg.size];
603 mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
608 cidx = rte_lcore_index(rte_lcore_id());
609 if (unlikely(cidx == -1)) {
610 cidx = RTE_MAX_LCORE;
611 rte_spinlock_lock(&pool->lcore_lock);
613 entry = _mlx5_ipool_get_cache(pool, cidx, idx);
614 if (unlikely(cidx == RTE_MAX_LCORE))
615 rte_spinlock_unlock(&pool->lcore_lock);
621 _mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, int cidx,
624 if (unlikely(!pool->cache[cidx])) {
625 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
626 sizeof(struct mlx5_ipool_per_lcore) +
627 (pool->cfg.per_core_cache * sizeof(uint32_t)),
628 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
629 if (!pool->cache[cidx]) {
630 DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
633 } else if (pool->cache[cidx]->len) {
634 pool->cache[cidx]->len--;
635 *idx = pool->cache[cidx]->idx[pool->cache[cidx]->len];
636 return _mlx5_ipool_get_cache(pool, cidx, *idx);
638 /* Not enough idx in global cache. Keep fetching from global. */
639 *idx = mlx5_ipool_allocate_from_global(pool, cidx);
640 if (unlikely(!(*idx)))
642 return _mlx5_ipool_get_cache(pool, cidx, *idx);
646 mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, uint32_t *idx)
651 cidx = rte_lcore_index(rte_lcore_id());
652 if (unlikely(cidx == -1)) {
653 cidx = RTE_MAX_LCORE;
654 rte_spinlock_lock(&pool->lcore_lock);
656 entry = _mlx5_ipool_malloc_cache(pool, cidx, idx);
657 if (unlikely(cidx == RTE_MAX_LCORE))
658 rte_spinlock_unlock(&pool->lcore_lock);
663 _mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
665 struct mlx5_ipool_per_lcore *ilc;
666 struct mlx5_indexed_cache *gc, *olc = NULL;
667 uint32_t reclaim_num = 0;
671 * When index was allocated on core A but freed on core B. In this
672 * case check if local cache on core B was allocated before.
674 if (unlikely(!pool->cache[cidx])) {
675 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
676 sizeof(struct mlx5_ipool_per_lcore) +
677 (pool->cfg.per_core_cache * sizeof(uint32_t)),
678 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
679 if (!pool->cache[cidx]) {
680 DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
684 /* Try to enqueue to local index cache. */
685 if (pool->cache[cidx]->len < pool->cfg.per_core_cache) {
686 pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
687 pool->cache[cidx]->len++;
690 ilc = pool->cache[cidx];
691 reclaim_num = pool->cfg.per_core_cache >> 2;
692 ilc->len -= reclaim_num;
693 /* Local index cache full, try with global index cache. */
694 mlx5_ipool_lock(pool);
697 if (!(--ilc->lc->ref_cnt))
702 memcpy(&gc->idx[gc->len], &ilc->idx[ilc->len],
703 reclaim_num * sizeof(uint32_t));
704 gc->len += reclaim_num;
705 mlx5_ipool_unlock(pool);
708 pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
709 pool->cache[cidx]->len++;
713 mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
717 cidx = rte_lcore_index(rte_lcore_id());
718 if (unlikely(cidx == -1)) {
719 cidx = RTE_MAX_LCORE;
720 rte_spinlock_lock(&pool->lcore_lock);
722 _mlx5_ipool_free_cache(pool, cidx, idx);
723 if (unlikely(cidx == RTE_MAX_LCORE))
724 rte_spinlock_unlock(&pool->lcore_lock);
728 mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
730 struct mlx5_indexed_trunk *trunk;
735 if (pool->cfg.per_core_cache)
736 return mlx5_ipool_malloc_cache(pool, idx);
737 mlx5_ipool_lock(pool);
738 if (pool->free_list == TRUNK_INVALID) {
739 /* If no available trunks, grow new. */
740 if (mlx5_ipool_grow(pool)) {
741 mlx5_ipool_unlock(pool);
745 MLX5_ASSERT(pool->free_list != TRUNK_INVALID);
746 trunk = pool->trunks[pool->free_list];
747 MLX5_ASSERT(trunk->free);
748 if (!rte_bitmap_scan(trunk->bmp, &iidx, &slab)) {
749 mlx5_ipool_unlock(pool);
753 iidx += __builtin_ctzll(slab);
754 MLX5_ASSERT(iidx != UINT32_MAX);
755 MLX5_ASSERT(iidx < mlx5_trunk_size_get(pool, trunk->idx));
756 rte_bitmap_clear(trunk->bmp, iidx);
757 p = &trunk->data[iidx * pool->cfg.size];
759 * The ipool index should grow continually from small to big,
760 * some features as metering only accept limited bits of index.
761 * Random index with MSB set may be rejected.
763 iidx += mlx5_trunk_idx_offset_get(pool, trunk->idx);
764 iidx += 1; /* non-zero index. */
770 /* Full trunk will be removed from free list in imalloc. */
771 MLX5_ASSERT(pool->free_list == trunk->idx);
772 pool->free_list = trunk->next;
773 if (trunk->next != TRUNK_INVALID)
774 pool->trunks[trunk->next]->prev = TRUNK_INVALID;
775 trunk->prev = TRUNK_INVALID;
776 trunk->next = TRUNK_INVALID;
783 mlx5_ipool_unlock(pool);
788 mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
790 void *entry = mlx5_ipool_malloc(pool, idx);
792 if (entry && pool->cfg.size)
793 memset(entry, 0, pool->cfg.size);
798 mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx)
800 struct mlx5_indexed_trunk *trunk;
806 if (pool->cfg.per_core_cache) {
807 mlx5_ipool_free_cache(pool, idx);
811 mlx5_ipool_lock(pool);
812 trunk_idx = mlx5_trunk_idx_get(pool, idx);
813 if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
814 (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
816 trunk = pool->trunks[trunk_idx];
819 entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
820 if (trunk_idx != trunk->idx ||
821 rte_bitmap_get(trunk->bmp, entry_idx))
823 rte_bitmap_set(trunk->bmp, entry_idx);
825 if (pool->cfg.release_mem_en && trunk->free == mlx5_trunk_size_get
826 (pool, trunk->idx)) {
827 if (pool->free_list == trunk->idx)
828 pool->free_list = trunk->next;
829 if (trunk->next != TRUNK_INVALID)
830 pool->trunks[trunk->next]->prev = trunk->prev;
831 if (trunk->prev != TRUNK_INVALID)
832 pool->trunks[trunk->prev]->next = trunk->next;
833 pool->cfg.free(trunk);
834 pool->trunks[trunk_idx] = NULL;
835 pool->n_trunk_valid--;
840 if (pool->n_trunk_valid == 0) {
841 pool->cfg.free(pool->trunks);
845 } else if (trunk->free == 1) {
846 /* Put into free trunk list head. */
847 MLX5_ASSERT(pool->free_list != trunk->idx);
848 trunk->next = pool->free_list;
849 trunk->prev = TRUNK_INVALID;
850 if (pool->free_list != TRUNK_INVALID)
851 pool->trunks[pool->free_list]->prev = trunk->idx;
852 pool->free_list = trunk->idx;
862 mlx5_ipool_unlock(pool);
866 mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx)
868 struct mlx5_indexed_trunk *trunk;
875 if (pool->cfg.per_core_cache)
876 return mlx5_ipool_get_cache(pool, idx);
878 mlx5_ipool_lock(pool);
879 trunk_idx = mlx5_trunk_idx_get(pool, idx);
880 if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
881 (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
883 trunk = pool->trunks[trunk_idx];
886 entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
887 if (trunk_idx != trunk->idx ||
888 rte_bitmap_get(trunk->bmp, entry_idx))
890 p = &trunk->data[entry_idx * pool->cfg.size];
892 mlx5_ipool_unlock(pool);
897 mlx5_ipool_destroy(struct mlx5_indexed_pool *pool)
899 struct mlx5_indexed_trunk **trunks = NULL;
900 struct mlx5_indexed_cache *gc = pool->gc;
901 uint32_t i, n_trunk_valid = 0;
904 mlx5_ipool_lock(pool);
905 if (pool->cfg.per_core_cache) {
906 for (i = 0; i <= RTE_MAX_LCORE; i++) {
908 * Free only old global cache. Pool gc will be
911 if (pool->cache[i]) {
912 if (pool->cache[i]->lc &&
913 pool->cache[i]->lc != pool->gc &&
914 (!(--pool->cache[i]->lc->ref_cnt)))
915 pool->cfg.free(pool->cache[i]->lc);
916 pool->cfg.free(pool->cache[i]);
921 n_trunk_valid = gc->n_trunk_valid;
925 trunks = pool->trunks;
926 n_trunk_valid = pool->n_trunk_valid;
928 for (i = 0; i < n_trunk_valid; i++) {
930 pool->cfg.free(trunks[i]);
933 pool->cfg.free(trunks);
936 mlx5_ipool_unlock(pool);
942 mlx5_ipool_flush_cache(struct mlx5_indexed_pool *pool)
945 struct mlx5_indexed_cache *gc;
946 struct rte_bitmap *ibmp;
947 uint32_t bmp_num, mem_size;
949 if (!pool->cfg.per_core_cache)
955 bmp_num = mlx5_trunk_idx_offset_get(pool, gc->n_trunk_valid);
956 mem_size = rte_bitmap_get_memory_footprint(bmp_num);
957 pool->bmp_mem = pool->cfg.malloc(MLX5_MEM_ZERO, mem_size,
958 RTE_CACHE_LINE_SIZE, rte_socket_id());
959 if (!pool->bmp_mem) {
960 DRV_LOG(ERR, "Ipool bitmap mem allocate failed.\n");
963 ibmp = rte_bitmap_init_with_all_set(bmp_num, pool->bmp_mem, mem_size);
965 pool->cfg.free(pool->bmp_mem);
966 pool->bmp_mem = NULL;
967 DRV_LOG(ERR, "Ipool bitmap create failed.\n");
971 /* Clear global cache. */
972 for (i = 0; i < gc->len; i++)
973 rte_bitmap_clear(ibmp, gc->idx[i] - 1);
974 /* Clear core cache. */
975 for (i = 0; i < RTE_MAX_LCORE + 1; i++) {
976 struct mlx5_ipool_per_lcore *ilc = pool->cache[i];
980 for (j = 0; j < ilc->len; j++)
981 rte_bitmap_clear(ibmp, ilc->idx[j] - 1);
986 mlx5_ipool_get_next_cache(struct mlx5_indexed_pool *pool, uint32_t *pos)
988 struct rte_bitmap *ibmp;
990 uint32_t iidx = *pos;
993 if (!ibmp || !rte_bitmap_scan(ibmp, &iidx, &slab)) {
995 pool->cfg.free(pool->bmp_mem);
996 pool->bmp_mem = NULL;
1001 iidx += __builtin_ctzll(slab);
1002 rte_bitmap_clear(ibmp, iidx);
1005 return mlx5_ipool_get_cache(pool, iidx);
1009 mlx5_ipool_get_next(struct mlx5_indexed_pool *pool, uint32_t *pos)
1011 uint32_t idx = *pos;
1014 if (pool->cfg.per_core_cache)
1015 return mlx5_ipool_get_next_cache(pool, pos);
1016 while (idx <= mlx5_trunk_idx_offset_get(pool, pool->n_trunk)) {
1017 entry = mlx5_ipool_get(pool, idx);
1028 mlx5_ipool_dump(struct mlx5_indexed_pool *pool)
1030 printf("Pool %s entry size %u, trunks %u, %d entry per trunk, "
1032 pool->cfg.type, pool->cfg.size, pool->n_trunk_valid,
1033 pool->cfg.trunk_size, pool->n_trunk_valid);
1035 printf("Pool %s entry %u, trunk alloc %u, empty: %u, "
1036 "available %u free %u\n",
1037 pool->cfg.type, pool->n_entry, pool->trunk_new,
1038 pool->trunk_empty, pool->trunk_avail, pool->trunk_free);
1042 struct mlx5_l3t_tbl *
1043 mlx5_l3t_create(enum mlx5_l3t_type type)
1045 struct mlx5_l3t_tbl *tbl;
1046 struct mlx5_indexed_pool_config l3t_ip_cfg = {
1051 .release_mem_en = 1,
1052 .malloc = mlx5_malloc,
1056 if (type >= MLX5_L3T_TYPE_MAX) {
1060 tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_l3t_tbl), 1,
1068 case MLX5_L3T_TYPE_WORD:
1069 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word);
1070 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
1072 case MLX5_L3T_TYPE_DWORD:
1073 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword);
1074 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
1076 case MLX5_L3T_TYPE_QWORD:
1077 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword);
1078 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
1081 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr);
1082 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
1085 rte_spinlock_init(&tbl->sl);
1086 tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
1096 mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl)
1098 struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1105 for (i = 0; i < MLX5_L3T_GT_SIZE; i++) {
1106 m_tbl = g_tbl->tbl[i];
1109 for (j = 0; j < MLX5_L3T_MT_SIZE; j++) {
1112 MLX5_ASSERT(!((struct mlx5_l3t_entry_word *)
1113 m_tbl->tbl[j])->ref_cnt);
1114 mlx5_ipool_free(tbl->eip,
1115 ((struct mlx5_l3t_entry_word *)
1116 m_tbl->tbl[j])->idx);
1118 if (!(--m_tbl->ref_cnt))
1121 MLX5_ASSERT(!m_tbl->ref_cnt);
1122 mlx5_free(g_tbl->tbl[i]);
1124 if (!(--g_tbl->ref_cnt))
1127 MLX5_ASSERT(!g_tbl->ref_cnt);
1128 mlx5_free(tbl->tbl);
1131 mlx5_ipool_destroy(tbl->eip);
1136 __l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1137 union mlx5_l3t_data *data)
1139 struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1140 struct mlx5_l3t_entry_word *w_e_tbl;
1141 struct mlx5_l3t_entry_dword *dw_e_tbl;
1142 struct mlx5_l3t_entry_qword *qw_e_tbl;
1143 struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1150 m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1153 e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1156 entry_idx = idx & MLX5_L3T_ET_MASK;
1157 switch (tbl->type) {
1158 case MLX5_L3T_TYPE_WORD:
1159 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1160 data->word = w_e_tbl->entry[entry_idx].data;
1161 if (w_e_tbl->entry[entry_idx].data)
1162 w_e_tbl->entry[entry_idx].ref_cnt++;
1164 case MLX5_L3T_TYPE_DWORD:
1165 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1166 data->dword = dw_e_tbl->entry[entry_idx].data;
1167 if (dw_e_tbl->entry[entry_idx].data)
1168 dw_e_tbl->entry[entry_idx].ref_cnt++;
1170 case MLX5_L3T_TYPE_QWORD:
1171 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1172 data->qword = qw_e_tbl->entry[entry_idx].data;
1173 if (qw_e_tbl->entry[entry_idx].data)
1174 qw_e_tbl->entry[entry_idx].ref_cnt++;
1177 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1178 data->ptr = ptr_e_tbl->entry[entry_idx].data;
1179 if (ptr_e_tbl->entry[entry_idx].data)
1180 ptr_e_tbl->entry[entry_idx].ref_cnt++;
1187 mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1188 union mlx5_l3t_data *data)
1192 rte_spinlock_lock(&tbl->sl);
1193 ret = __l3t_get_entry(tbl, idx, data);
1194 rte_spinlock_unlock(&tbl->sl);
1199 mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
1201 struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1202 struct mlx5_l3t_entry_word *w_e_tbl;
1203 struct mlx5_l3t_entry_dword *dw_e_tbl;
1204 struct mlx5_l3t_entry_qword *qw_e_tbl;
1205 struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1211 rte_spinlock_lock(&tbl->sl);
1215 m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1218 e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1221 entry_idx = idx & MLX5_L3T_ET_MASK;
1222 switch (tbl->type) {
1223 case MLX5_L3T_TYPE_WORD:
1224 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1225 MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt);
1226 ret = --w_e_tbl->entry[entry_idx].ref_cnt;
1229 w_e_tbl->entry[entry_idx].data = 0;
1230 ref_cnt = --w_e_tbl->ref_cnt;
1232 case MLX5_L3T_TYPE_DWORD:
1233 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1234 MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt);
1235 ret = --dw_e_tbl->entry[entry_idx].ref_cnt;
1238 dw_e_tbl->entry[entry_idx].data = 0;
1239 ref_cnt = --dw_e_tbl->ref_cnt;
1241 case MLX5_L3T_TYPE_QWORD:
1242 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1243 MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt);
1244 ret = --qw_e_tbl->entry[entry_idx].ref_cnt;
1247 qw_e_tbl->entry[entry_idx].data = 0;
1248 ref_cnt = --qw_e_tbl->ref_cnt;
1251 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1252 MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt);
1253 ret = --ptr_e_tbl->entry[entry_idx].ref_cnt;
1256 ptr_e_tbl->entry[entry_idx].data = NULL;
1257 ref_cnt = --ptr_e_tbl->ref_cnt;
1261 mlx5_ipool_free(tbl->eip,
1262 ((struct mlx5_l3t_entry_word *)e_tbl)->idx);
1263 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1265 if (!(--m_tbl->ref_cnt)) {
1268 [(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] = NULL;
1269 if (!(--g_tbl->ref_cnt)) {
1276 rte_spinlock_unlock(&tbl->sl);
1281 __l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1282 union mlx5_l3t_data *data)
1284 struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1285 struct mlx5_l3t_entry_word *w_e_tbl;
1286 struct mlx5_l3t_entry_dword *dw_e_tbl;
1287 struct mlx5_l3t_entry_qword *qw_e_tbl;
1288 struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1290 uint32_t entry_idx, tbl_idx = 0;
1292 /* Check the global table, create it if empty. */
1295 g_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1296 sizeof(struct mlx5_l3t_level_tbl) +
1297 sizeof(void *) * MLX5_L3T_GT_SIZE, 1,
1306 * Check the middle table, create it if empty. Ref_cnt will be
1307 * increased if new sub table created.
1309 m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1311 m_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1312 sizeof(struct mlx5_l3t_level_tbl) +
1313 sizeof(void *) * MLX5_L3T_MT_SIZE, 1,
1319 g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] =
1324 * Check the entry table, create it if empty. Ref_cnt will be
1325 * increased if new sub entry table created.
1327 e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1329 e_tbl = mlx5_ipool_zmalloc(tbl->eip, &tbl_idx);
1334 ((struct mlx5_l3t_entry_word *)e_tbl)->idx = tbl_idx;
1335 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1339 entry_idx = idx & MLX5_L3T_ET_MASK;
1340 switch (tbl->type) {
1341 case MLX5_L3T_TYPE_WORD:
1342 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1343 if (w_e_tbl->entry[entry_idx].data) {
1344 data->word = w_e_tbl->entry[entry_idx].data;
1345 w_e_tbl->entry[entry_idx].ref_cnt++;
1349 w_e_tbl->entry[entry_idx].data = data->word;
1350 w_e_tbl->entry[entry_idx].ref_cnt = 1;
1353 case MLX5_L3T_TYPE_DWORD:
1354 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1355 if (dw_e_tbl->entry[entry_idx].data) {
1356 data->dword = dw_e_tbl->entry[entry_idx].data;
1357 dw_e_tbl->entry[entry_idx].ref_cnt++;
1361 dw_e_tbl->entry[entry_idx].data = data->dword;
1362 dw_e_tbl->entry[entry_idx].ref_cnt = 1;
1363 dw_e_tbl->ref_cnt++;
1365 case MLX5_L3T_TYPE_QWORD:
1366 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1367 if (qw_e_tbl->entry[entry_idx].data) {
1368 data->qword = qw_e_tbl->entry[entry_idx].data;
1369 qw_e_tbl->entry[entry_idx].ref_cnt++;
1373 qw_e_tbl->entry[entry_idx].data = data->qword;
1374 qw_e_tbl->entry[entry_idx].ref_cnt = 1;
1375 qw_e_tbl->ref_cnt++;
1378 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1379 if (ptr_e_tbl->entry[entry_idx].data) {
1380 data->ptr = ptr_e_tbl->entry[entry_idx].data;
1381 ptr_e_tbl->entry[entry_idx].ref_cnt++;
1385 ptr_e_tbl->entry[entry_idx].data = data->ptr;
1386 ptr_e_tbl->entry[entry_idx].ref_cnt = 1;
1387 ptr_e_tbl->ref_cnt++;
1394 mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1395 union mlx5_l3t_data *data)
1399 rte_spinlock_lock(&tbl->sl);
1400 ret = __l3t_set_entry(tbl, idx, data);
1401 rte_spinlock_unlock(&tbl->sl);
1406 mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1407 union mlx5_l3t_data *data,
1408 mlx5_l3t_alloc_callback_fn cb, void *ctx)
1412 rte_spinlock_lock(&tbl->sl);
1413 /* Check if entry data is ready. */
1414 ret = __l3t_get_entry(tbl, idx, data);
1416 switch (tbl->type) {
1417 case MLX5_L3T_TYPE_WORD:
1421 case MLX5_L3T_TYPE_DWORD:
1425 case MLX5_L3T_TYPE_QWORD:
1435 /* Entry data is not ready, use user callback to create it. */
1436 ret = cb(ctx, data);
1439 /* Save the new allocated data to entry. */
1440 ret = __l3t_set_entry(tbl, idx, data);
1442 rte_spinlock_unlock(&tbl->sl);