1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2019 Mellanox Technologies, Ltd
5 #include <rte_malloc.h>
7 #include <mlx5_malloc.h>
9 #include "mlx5_utils.h"
12 /********************* mlx5 list ************************/
15 mlx5_list_create(struct mlx5_list *list, const char *name, void *ctx,
16 mlx5_list_create_cb cb_create,
17 mlx5_list_match_cb cb_match,
18 mlx5_list_remove_cb cb_remove,
19 mlx5_list_clone_cb cb_clone,
20 mlx5_list_clone_free_cb cb_clone_free)
25 if (!cb_match || !cb_create || !cb_remove || !cb_clone ||
29 snprintf(list->name, sizeof(list->name), "%s", name);
31 list->cb_create = cb_create;
32 list->cb_match = cb_match;
33 list->cb_remove = cb_remove;
34 list->cb_clone = cb_clone;
35 list->cb_clone_free = cb_clone_free;
36 rte_rwlock_init(&list->lock);
37 DRV_LOG(DEBUG, "mlx5 list %s initialized.", list->name);
38 for (i = 0; i <= RTE_MAX_LCORE; i++)
39 LIST_INIT(&list->cache[i].h);
43 static struct mlx5_list_entry *
44 __list_lookup(struct mlx5_list *list, int lcore_index, void *ctx, bool reuse)
46 struct mlx5_list_entry *entry = LIST_FIRST(&list->cache[lcore_index].h);
49 while (entry != NULL) {
50 struct mlx5_list_entry *nentry = LIST_NEXT(entry, next);
52 if (list->cb_match(list, entry, ctx)) {
53 if (lcore_index < RTE_MAX_LCORE) {
54 ret = __atomic_load_n(&entry->ref_cnt,
57 LIST_REMOVE(entry, next);
58 list->cb_clone_free(list, entry);
65 ret = __atomic_add_fetch(&entry->ref_cnt, 1,
68 /* Entry was invalid before, free it. */
69 LIST_REMOVE(entry, next);
70 list->cb_clone_free(list, entry);
74 DRV_LOG(DEBUG, "mlx5 list %s entry %p ref++: %u.",
75 list->name, (void *)entry, entry->ref_cnt);
82 struct mlx5_list_entry *
83 mlx5_list_lookup(struct mlx5_list *list, void *ctx)
85 struct mlx5_list_entry *entry = NULL;
88 rte_rwlock_read_lock(&list->lock);
89 for (i = 0; i < RTE_MAX_LCORE; i++) {
90 entry = __list_lookup(list, i, ctx, false);
94 rte_rwlock_read_unlock(&list->lock);
98 static struct mlx5_list_entry *
99 mlx5_list_cache_insert(struct mlx5_list *list, int lcore_index,
100 struct mlx5_list_entry *gentry, void *ctx)
102 struct mlx5_list_entry *lentry = list->cb_clone(list, gentry, ctx);
104 if (unlikely(!lentry))
106 lentry->ref_cnt = 1u;
107 lentry->gentry = gentry;
108 LIST_INSERT_HEAD(&list->cache[lcore_index].h, lentry, next);
112 struct mlx5_list_entry *
113 mlx5_list_register(struct mlx5_list *list, void *ctx)
115 struct mlx5_list_entry *entry, *local_entry;
116 volatile uint32_t prev_gen_cnt = 0;
117 int lcore_index = rte_lcore_index(rte_lcore_id());
120 MLX5_ASSERT(lcore_index < RTE_MAX_LCORE);
121 if (unlikely(lcore_index == -1)) {
125 /* 1. Lookup in local cache. */
126 local_entry = __list_lookup(list, lcore_index, ctx, true);
129 /* 2. Lookup with read lock on global list, reuse if found. */
130 rte_rwlock_read_lock(&list->lock);
131 entry = __list_lookup(list, RTE_MAX_LCORE, ctx, true);
133 rte_rwlock_read_unlock(&list->lock);
134 return mlx5_list_cache_insert(list, lcore_index, entry, ctx);
136 prev_gen_cnt = list->gen_cnt;
137 rte_rwlock_read_unlock(&list->lock);
138 /* 3. Prepare new entry for global list and for cache. */
139 entry = list->cb_create(list, entry, ctx);
140 if (unlikely(!entry))
142 local_entry = list->cb_clone(list, entry, ctx);
143 if (unlikely(!local_entry)) {
144 list->cb_remove(list, entry);
148 local_entry->ref_cnt = 1u;
149 local_entry->gentry = entry;
150 rte_rwlock_write_lock(&list->lock);
151 /* 4. Make sure the same entry was not created before the write lock. */
152 if (unlikely(prev_gen_cnt != list->gen_cnt)) {
153 struct mlx5_list_entry *oentry = __list_lookup(list,
157 if (unlikely(oentry)) {
158 /* 4.5. Found real race!!, reuse the old entry. */
159 rte_rwlock_write_unlock(&list->lock);
160 list->cb_remove(list, entry);
161 list->cb_clone_free(list, local_entry);
162 return mlx5_list_cache_insert(list, lcore_index, oentry,
166 /* 5. Update lists. */
167 LIST_INSERT_HEAD(&list->cache[RTE_MAX_LCORE].h, entry, next);
169 rte_rwlock_write_unlock(&list->lock);
170 LIST_INSERT_HEAD(&list->cache[lcore_index].h, local_entry, next);
171 __atomic_add_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
172 DRV_LOG(DEBUG, "mlx5 list %s entry %p new: %u.",
173 list->name, (void *)entry, entry->ref_cnt);
178 mlx5_list_unregister(struct mlx5_list *list,
179 struct mlx5_list_entry *entry)
181 struct mlx5_list_entry *gentry = entry->gentry;
183 if (__atomic_sub_fetch(&entry->ref_cnt, 1, __ATOMIC_ACQUIRE) != 0)
185 if (__atomic_sub_fetch(&gentry->ref_cnt, 1, __ATOMIC_ACQUIRE) != 0)
187 rte_rwlock_write_lock(&list->lock);
188 if (likely(gentry->ref_cnt == 0)) {
189 LIST_REMOVE(gentry, next);
190 rte_rwlock_write_unlock(&list->lock);
191 list->cb_remove(list, gentry);
192 __atomic_sub_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
193 DRV_LOG(DEBUG, "mlx5 list %s entry %p removed.",
194 list->name, (void *)gentry);
197 rte_rwlock_write_unlock(&list->lock);
202 mlx5_list_destroy(struct mlx5_list *list)
204 struct mlx5_list_entry *entry;
208 for (i = 0; i <= RTE_MAX_LCORE; i++) {
209 while (!LIST_EMPTY(&list->cache[i].h)) {
210 entry = LIST_FIRST(&list->cache[i].h);
211 LIST_REMOVE(entry, next);
212 if (i == RTE_MAX_LCORE) {
213 list->cb_remove(list, entry);
214 DRV_LOG(DEBUG, "mlx5 list %s entry %p "
215 "destroyed.", list->name,
218 list->cb_clone_free(list, entry);
222 memset(list, 0, sizeof(*list));
226 mlx5_list_get_entry_num(struct mlx5_list *list)
229 return __atomic_load_n(&list->count, __ATOMIC_RELAXED);
232 /********************* Indexed pool **********************/
235 mlx5_ipool_lock(struct mlx5_indexed_pool *pool)
237 if (pool->cfg.need_lock)
238 rte_spinlock_lock(&pool->rsz_lock);
242 mlx5_ipool_unlock(struct mlx5_indexed_pool *pool)
244 if (pool->cfg.need_lock)
245 rte_spinlock_unlock(&pool->rsz_lock);
248 static inline uint32_t
249 mlx5_trunk_idx_get(struct mlx5_indexed_pool *pool, uint32_t entry_idx)
251 struct mlx5_indexed_pool_config *cfg = &pool->cfg;
252 uint32_t trunk_idx = 0;
255 if (!cfg->grow_trunk)
256 return entry_idx / cfg->trunk_size;
257 if (entry_idx >= pool->grow_tbl[cfg->grow_trunk - 1]) {
258 trunk_idx = (entry_idx - pool->grow_tbl[cfg->grow_trunk - 1]) /
259 (cfg->trunk_size << (cfg->grow_shift *
260 cfg->grow_trunk)) + cfg->grow_trunk;
262 for (i = 0; i < cfg->grow_trunk; i++) {
263 if (entry_idx < pool->grow_tbl[i])
271 static inline uint32_t
272 mlx5_trunk_size_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
274 struct mlx5_indexed_pool_config *cfg = &pool->cfg;
276 return cfg->trunk_size << (cfg->grow_shift *
277 (trunk_idx > cfg->grow_trunk ? cfg->grow_trunk : trunk_idx));
280 static inline uint32_t
281 mlx5_trunk_idx_offset_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
283 struct mlx5_indexed_pool_config *cfg = &pool->cfg;
288 if (!cfg->grow_trunk)
289 return cfg->trunk_size * trunk_idx;
290 if (trunk_idx < cfg->grow_trunk)
291 offset = pool->grow_tbl[trunk_idx - 1];
293 offset = pool->grow_tbl[cfg->grow_trunk - 1] +
294 (cfg->trunk_size << (cfg->grow_shift *
295 cfg->grow_trunk)) * (trunk_idx - cfg->grow_trunk);
299 struct mlx5_indexed_pool *
300 mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg)
302 struct mlx5_indexed_pool *pool;
305 if (!cfg || (!cfg->malloc ^ !cfg->free) ||
306 (cfg->per_core_cache && cfg->release_mem_en) ||
307 (cfg->trunk_size && ((cfg->trunk_size & (cfg->trunk_size - 1)) ||
308 ((__builtin_ffs(cfg->trunk_size) + TRUNK_IDX_BITS) > 32))))
310 pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool) + cfg->grow_trunk *
311 sizeof(pool->grow_tbl[0]), RTE_CACHE_LINE_SIZE,
316 if (!pool->cfg.trunk_size)
317 pool->cfg.trunk_size = MLX5_IPOOL_DEFAULT_TRUNK_SIZE;
318 if (!cfg->malloc && !cfg->free) {
319 pool->cfg.malloc = mlx5_malloc;
320 pool->cfg.free = mlx5_free;
322 if (pool->cfg.need_lock)
323 rte_spinlock_init(&pool->rsz_lock);
325 * Initialize the dynamic grow trunk size lookup table to have a quick
326 * lookup for the trunk entry index offset.
328 for (i = 0; i < cfg->grow_trunk; i++) {
329 pool->grow_tbl[i] = cfg->trunk_size << (cfg->grow_shift * i);
331 pool->grow_tbl[i] += pool->grow_tbl[i - 1];
333 if (!pool->cfg.max_idx)
335 mlx5_trunk_idx_offset_get(pool, TRUNK_MAX_IDX + 1);
336 if (!cfg->per_core_cache)
337 pool->free_list = TRUNK_INVALID;
338 rte_spinlock_init(&pool->lcore_lock);
343 mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
345 struct mlx5_indexed_trunk *trunk;
346 struct mlx5_indexed_trunk **trunk_tmp;
347 struct mlx5_indexed_trunk **p;
348 size_t trunk_size = 0;
351 uint32_t idx, cur_max_idx, i;
353 cur_max_idx = mlx5_trunk_idx_offset_get(pool, pool->n_trunk_valid);
354 if (pool->n_trunk_valid == TRUNK_MAX_IDX ||
355 cur_max_idx >= pool->cfg.max_idx)
357 if (pool->n_trunk_valid == pool->n_trunk) {
358 /* No free trunk flags, expand trunk list. */
359 int n_grow = pool->n_trunk_valid ? pool->n_trunk :
360 RTE_CACHE_LINE_SIZE / sizeof(void *);
362 p = pool->cfg.malloc(0, (pool->n_trunk_valid + n_grow) *
363 sizeof(struct mlx5_indexed_trunk *),
364 RTE_CACHE_LINE_SIZE, rte_socket_id());
368 memcpy(p, pool->trunks, pool->n_trunk_valid *
369 sizeof(struct mlx5_indexed_trunk *));
370 memset(RTE_PTR_ADD(p, pool->n_trunk_valid * sizeof(void *)), 0,
371 n_grow * sizeof(void *));
372 trunk_tmp = pool->trunks;
375 pool->cfg.free(trunk_tmp);
376 pool->n_trunk += n_grow;
378 if (!pool->cfg.release_mem_en) {
379 idx = pool->n_trunk_valid;
381 /* Find the first available slot in trunk list */
382 for (idx = 0; idx < pool->n_trunk; idx++)
383 if (pool->trunks[idx] == NULL)
386 trunk_size += sizeof(*trunk);
387 data_size = mlx5_trunk_size_get(pool, idx);
388 bmp_size = rte_bitmap_get_memory_footprint(data_size);
389 /* rte_bitmap requires memory cacheline aligned. */
390 trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
391 trunk_size += bmp_size;
392 trunk = pool->cfg.malloc(0, trunk_size,
393 RTE_CACHE_LINE_SIZE, rte_socket_id());
396 pool->trunks[idx] = trunk;
398 trunk->free = data_size;
399 trunk->prev = TRUNK_INVALID;
400 trunk->next = TRUNK_INVALID;
401 MLX5_ASSERT(pool->free_list == TRUNK_INVALID);
402 pool->free_list = idx;
403 /* Mark all entries as available. */
404 trunk->bmp = rte_bitmap_init_with_all_set(data_size, &trunk->data
405 [RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size)],
407 /* Clear the overhead bits in the trunk if it happens. */
408 if (cur_max_idx + data_size > pool->cfg.max_idx) {
409 for (i = pool->cfg.max_idx - cur_max_idx; i < data_size; i++)
410 rte_bitmap_clear(trunk->bmp, i);
412 MLX5_ASSERT(trunk->bmp);
413 pool->n_trunk_valid++;
421 static inline struct mlx5_indexed_cache *
422 mlx5_ipool_update_global_cache(struct mlx5_indexed_pool *pool, int cidx)
424 struct mlx5_indexed_cache *gc, *lc, *olc = NULL;
426 lc = pool->cache[cidx]->lc;
427 gc = __atomic_load_n(&pool->gc, __ATOMIC_RELAXED);
428 if (gc && lc != gc) {
429 mlx5_ipool_lock(pool);
430 if (lc && !(--lc->ref_cnt))
434 pool->cache[cidx]->lc = lc;
435 mlx5_ipool_unlock(pool);
443 mlx5_ipool_allocate_from_global(struct mlx5_indexed_pool *pool, int cidx)
445 struct mlx5_indexed_trunk *trunk;
446 struct mlx5_indexed_cache *p, *lc, *olc = NULL;
447 size_t trunk_size = 0;
449 uint32_t cur_max_idx, trunk_idx, trunk_n;
450 uint32_t fetch_size, ts_idx, i;
457 * Fetch new index from global if possible. First round local
458 * cache will be NULL.
460 lc = pool->cache[cidx]->lc;
461 mlx5_ipool_lock(pool);
462 /* Try to update local cache first. */
463 if (likely(pool->gc)) {
464 if (lc != pool->gc) {
465 if (lc && !(--lc->ref_cnt))
469 pool->cache[cidx]->lc = lc;
472 /* Use the updated local cache to fetch index. */
473 fetch_size = pool->cfg.per_core_cache >> 2;
474 if (lc->len < fetch_size)
475 fetch_size = lc->len;
476 lc->len -= fetch_size;
477 memcpy(pool->cache[cidx]->idx, &lc->idx[lc->len],
478 sizeof(uint32_t) * fetch_size);
481 mlx5_ipool_unlock(pool);
487 pool->cache[cidx]->len = fetch_size - 1;
488 return pool->cache[cidx]->idx[pool->cache[cidx]->len];
490 trunk_idx = lc ? __atomic_load_n(&lc->n_trunk_valid,
491 __ATOMIC_ACQUIRE) : 0;
492 trunk_n = lc ? lc->n_trunk : 0;
493 cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx);
494 /* Check if index reach maximum. */
495 if (trunk_idx == TRUNK_MAX_IDX ||
496 cur_max_idx >= pool->cfg.max_idx)
498 /* No enough space in trunk array, resize the trunks array. */
499 if (trunk_idx == trunk_n) {
500 n_grow = trunk_idx ? trunk_idx :
501 RTE_CACHE_LINE_SIZE / sizeof(void *);
502 cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_n + n_grow);
503 /* Resize the trunk array. */
504 p = pool->cfg.malloc(0, ((trunk_idx + n_grow) *
505 sizeof(struct mlx5_indexed_trunk *)) +
506 (cur_max_idx * sizeof(uint32_t)) + sizeof(*p),
507 RTE_CACHE_LINE_SIZE, rte_socket_id());
510 p->trunks = (struct mlx5_indexed_trunk **)&p->idx[cur_max_idx];
512 memcpy(p->trunks, lc->trunks, trunk_idx *
513 sizeof(struct mlx5_indexed_trunk *));
514 #ifdef RTE_LIBRTE_MLX5_DEBUG
515 memset(RTE_PTR_ADD(p->trunks, trunk_idx * sizeof(void *)), 0,
516 n_grow * sizeof(void *));
518 p->n_trunk_valid = trunk_idx;
519 p->n_trunk = trunk_n + n_grow;
522 /* Prepare the new trunk. */
523 trunk_size = sizeof(*trunk);
524 data_size = mlx5_trunk_size_get(pool, trunk_idx);
525 trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
526 trunk = pool->cfg.malloc(0, trunk_size,
527 RTE_CACHE_LINE_SIZE, rte_socket_id());
528 if (unlikely(!trunk)) {
532 trunk->idx = trunk_idx;
533 trunk->free = data_size;
534 mlx5_ipool_lock(pool);
536 * Double check if trunks has been updated or have available index.
537 * During the new trunk allocate, index may still be flushed to the
538 * global cache. So also need to check the pool->gc->len.
540 if (pool->gc && (lc != pool->gc ||
541 lc->n_trunk_valid != trunk_idx ||
543 mlx5_ipool_unlock(pool);
546 pool->cfg.free(trunk);
549 /* Resize the trunk array and update local cache first. */
551 if (lc && !(--lc->ref_cnt))
555 pool->cache[cidx]->lc = lc;
556 __atomic_store_n(&pool->gc, p, __ATOMIC_RELAXED);
558 /* Add trunk to trunks array. */
559 lc->trunks[trunk_idx] = trunk;
560 __atomic_fetch_add(&lc->n_trunk_valid, 1, __ATOMIC_RELAXED);
561 /* Enqueue half of the index to global. */
562 ts_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx) + 1;
563 fetch_size = trunk->free >> 1;
564 for (i = 0; i < fetch_size; i++)
565 lc->idx[i] = ts_idx + i;
566 lc->len = fetch_size;
567 mlx5_ipool_unlock(pool);
568 /* Copy left half - 1 to local cache index array. */
569 pool->cache[cidx]->len = trunk->free - fetch_size - 1;
570 ts_idx += fetch_size;
571 for (i = 0; i < pool->cache[cidx]->len; i++)
572 pool->cache[cidx]->idx[i] = ts_idx + i;
579 _mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
581 struct mlx5_indexed_trunk *trunk;
582 struct mlx5_indexed_cache *lc;
587 if (unlikely(!pool->cache[cidx])) {
588 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
589 sizeof(struct mlx5_ipool_per_lcore) +
590 (pool->cfg.per_core_cache * sizeof(uint32_t)),
591 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
592 if (!pool->cache[cidx]) {
593 DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
597 lc = mlx5_ipool_update_global_cache(pool, cidx);
599 trunk_idx = mlx5_trunk_idx_get(pool, idx);
600 trunk = lc->trunks[trunk_idx];
602 entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk_idx);
603 return &trunk->data[entry_idx * pool->cfg.size];
607 mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
612 cidx = rte_lcore_index(rte_lcore_id());
613 if (unlikely(cidx == -1)) {
614 cidx = RTE_MAX_LCORE;
615 rte_spinlock_lock(&pool->lcore_lock);
617 entry = _mlx5_ipool_get_cache(pool, cidx, idx);
618 if (unlikely(cidx == RTE_MAX_LCORE))
619 rte_spinlock_unlock(&pool->lcore_lock);
625 _mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, int cidx,
628 if (unlikely(!pool->cache[cidx])) {
629 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
630 sizeof(struct mlx5_ipool_per_lcore) +
631 (pool->cfg.per_core_cache * sizeof(uint32_t)),
632 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
633 if (!pool->cache[cidx]) {
634 DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
637 } else if (pool->cache[cidx]->len) {
638 pool->cache[cidx]->len--;
639 *idx = pool->cache[cidx]->idx[pool->cache[cidx]->len];
640 return _mlx5_ipool_get_cache(pool, cidx, *idx);
642 /* Not enough idx in global cache. Keep fetching from global. */
643 *idx = mlx5_ipool_allocate_from_global(pool, cidx);
644 if (unlikely(!(*idx)))
646 return _mlx5_ipool_get_cache(pool, cidx, *idx);
650 mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, uint32_t *idx)
655 cidx = rte_lcore_index(rte_lcore_id());
656 if (unlikely(cidx == -1)) {
657 cidx = RTE_MAX_LCORE;
658 rte_spinlock_lock(&pool->lcore_lock);
660 entry = _mlx5_ipool_malloc_cache(pool, cidx, idx);
661 if (unlikely(cidx == RTE_MAX_LCORE))
662 rte_spinlock_unlock(&pool->lcore_lock);
667 _mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
669 struct mlx5_ipool_per_lcore *ilc;
670 struct mlx5_indexed_cache *gc, *olc = NULL;
671 uint32_t reclaim_num = 0;
675 * When index was allocated on core A but freed on core B. In this
676 * case check if local cache on core B was allocated before.
678 if (unlikely(!pool->cache[cidx])) {
679 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
680 sizeof(struct mlx5_ipool_per_lcore) +
681 (pool->cfg.per_core_cache * sizeof(uint32_t)),
682 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
683 if (!pool->cache[cidx]) {
684 DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
688 /* Try to enqueue to local index cache. */
689 if (pool->cache[cidx]->len < pool->cfg.per_core_cache) {
690 pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
691 pool->cache[cidx]->len++;
694 ilc = pool->cache[cidx];
695 reclaim_num = pool->cfg.per_core_cache >> 2;
696 ilc->len -= reclaim_num;
697 /* Local index cache full, try with global index cache. */
698 mlx5_ipool_lock(pool);
701 if (!(--ilc->lc->ref_cnt))
706 memcpy(&gc->idx[gc->len], &ilc->idx[ilc->len],
707 reclaim_num * sizeof(uint32_t));
708 gc->len += reclaim_num;
709 mlx5_ipool_unlock(pool);
712 pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
713 pool->cache[cidx]->len++;
717 mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
721 cidx = rte_lcore_index(rte_lcore_id());
722 if (unlikely(cidx == -1)) {
723 cidx = RTE_MAX_LCORE;
724 rte_spinlock_lock(&pool->lcore_lock);
726 _mlx5_ipool_free_cache(pool, cidx, idx);
727 if (unlikely(cidx == RTE_MAX_LCORE))
728 rte_spinlock_unlock(&pool->lcore_lock);
732 mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
734 struct mlx5_indexed_trunk *trunk;
739 if (pool->cfg.per_core_cache)
740 return mlx5_ipool_malloc_cache(pool, idx);
741 mlx5_ipool_lock(pool);
742 if (pool->free_list == TRUNK_INVALID) {
743 /* If no available trunks, grow new. */
744 if (mlx5_ipool_grow(pool)) {
745 mlx5_ipool_unlock(pool);
749 MLX5_ASSERT(pool->free_list != TRUNK_INVALID);
750 trunk = pool->trunks[pool->free_list];
751 MLX5_ASSERT(trunk->free);
752 if (!rte_bitmap_scan(trunk->bmp, &iidx, &slab)) {
753 mlx5_ipool_unlock(pool);
757 iidx += __builtin_ctzll(slab);
758 MLX5_ASSERT(iidx != UINT32_MAX);
759 MLX5_ASSERT(iidx < mlx5_trunk_size_get(pool, trunk->idx));
760 rte_bitmap_clear(trunk->bmp, iidx);
761 p = &trunk->data[iidx * pool->cfg.size];
763 * The ipool index should grow continually from small to big,
764 * some features as metering only accept limited bits of index.
765 * Random index with MSB set may be rejected.
767 iidx += mlx5_trunk_idx_offset_get(pool, trunk->idx);
768 iidx += 1; /* non-zero index. */
774 /* Full trunk will be removed from free list in imalloc. */
775 MLX5_ASSERT(pool->free_list == trunk->idx);
776 pool->free_list = trunk->next;
777 if (trunk->next != TRUNK_INVALID)
778 pool->trunks[trunk->next]->prev = TRUNK_INVALID;
779 trunk->prev = TRUNK_INVALID;
780 trunk->next = TRUNK_INVALID;
787 mlx5_ipool_unlock(pool);
792 mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
794 void *entry = mlx5_ipool_malloc(pool, idx);
796 if (entry && pool->cfg.size)
797 memset(entry, 0, pool->cfg.size);
802 mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx)
804 struct mlx5_indexed_trunk *trunk;
810 if (pool->cfg.per_core_cache) {
811 mlx5_ipool_free_cache(pool, idx);
815 mlx5_ipool_lock(pool);
816 trunk_idx = mlx5_trunk_idx_get(pool, idx);
817 if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
818 (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
820 trunk = pool->trunks[trunk_idx];
823 entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
824 if (trunk_idx != trunk->idx ||
825 rte_bitmap_get(trunk->bmp, entry_idx))
827 rte_bitmap_set(trunk->bmp, entry_idx);
829 if (pool->cfg.release_mem_en && trunk->free == mlx5_trunk_size_get
830 (pool, trunk->idx)) {
831 if (pool->free_list == trunk->idx)
832 pool->free_list = trunk->next;
833 if (trunk->next != TRUNK_INVALID)
834 pool->trunks[trunk->next]->prev = trunk->prev;
835 if (trunk->prev != TRUNK_INVALID)
836 pool->trunks[trunk->prev]->next = trunk->next;
837 pool->cfg.free(trunk);
838 pool->trunks[trunk_idx] = NULL;
839 pool->n_trunk_valid--;
844 if (pool->n_trunk_valid == 0) {
845 pool->cfg.free(pool->trunks);
849 } else if (trunk->free == 1) {
850 /* Put into free trunk list head. */
851 MLX5_ASSERT(pool->free_list != trunk->idx);
852 trunk->next = pool->free_list;
853 trunk->prev = TRUNK_INVALID;
854 if (pool->free_list != TRUNK_INVALID)
855 pool->trunks[pool->free_list]->prev = trunk->idx;
856 pool->free_list = trunk->idx;
866 mlx5_ipool_unlock(pool);
870 mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx)
872 struct mlx5_indexed_trunk *trunk;
879 if (pool->cfg.per_core_cache)
880 return mlx5_ipool_get_cache(pool, idx);
882 mlx5_ipool_lock(pool);
883 trunk_idx = mlx5_trunk_idx_get(pool, idx);
884 if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
885 (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
887 trunk = pool->trunks[trunk_idx];
890 entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
891 if (trunk_idx != trunk->idx ||
892 rte_bitmap_get(trunk->bmp, entry_idx))
894 p = &trunk->data[entry_idx * pool->cfg.size];
896 mlx5_ipool_unlock(pool);
901 mlx5_ipool_destroy(struct mlx5_indexed_pool *pool)
903 struct mlx5_indexed_trunk **trunks = NULL;
904 struct mlx5_indexed_cache *gc = pool->gc;
905 uint32_t i, n_trunk_valid = 0;
908 mlx5_ipool_lock(pool);
909 if (pool->cfg.per_core_cache) {
910 for (i = 0; i <= RTE_MAX_LCORE; i++) {
912 * Free only old global cache. Pool gc will be
915 if (pool->cache[i]) {
916 if (pool->cache[i]->lc &&
917 pool->cache[i]->lc != pool->gc &&
918 (!(--pool->cache[i]->lc->ref_cnt)))
919 pool->cfg.free(pool->cache[i]->lc);
920 pool->cfg.free(pool->cache[i]);
925 n_trunk_valid = gc->n_trunk_valid;
929 trunks = pool->trunks;
930 n_trunk_valid = pool->n_trunk_valid;
932 for (i = 0; i < n_trunk_valid; i++) {
934 pool->cfg.free(trunks[i]);
937 pool->cfg.free(trunks);
940 mlx5_ipool_unlock(pool);
946 mlx5_ipool_flush_cache(struct mlx5_indexed_pool *pool)
949 struct mlx5_indexed_cache *gc;
950 struct rte_bitmap *ibmp;
951 uint32_t bmp_num, mem_size;
953 if (!pool->cfg.per_core_cache)
959 bmp_num = mlx5_trunk_idx_offset_get(pool, gc->n_trunk_valid);
960 mem_size = rte_bitmap_get_memory_footprint(bmp_num);
961 pool->bmp_mem = pool->cfg.malloc(MLX5_MEM_ZERO, mem_size,
962 RTE_CACHE_LINE_SIZE, rte_socket_id());
963 if (!pool->bmp_mem) {
964 DRV_LOG(ERR, "Ipool bitmap mem allocate failed.\n");
967 ibmp = rte_bitmap_init_with_all_set(bmp_num, pool->bmp_mem, mem_size);
969 pool->cfg.free(pool->bmp_mem);
970 pool->bmp_mem = NULL;
971 DRV_LOG(ERR, "Ipool bitmap create failed.\n");
975 /* Clear global cache. */
976 for (i = 0; i < gc->len; i++)
977 rte_bitmap_clear(ibmp, gc->idx[i] - 1);
978 /* Clear core cache. */
979 for (i = 0; i < RTE_MAX_LCORE + 1; i++) {
980 struct mlx5_ipool_per_lcore *ilc = pool->cache[i];
984 for (j = 0; j < ilc->len; j++)
985 rte_bitmap_clear(ibmp, ilc->idx[j] - 1);
990 mlx5_ipool_get_next_cache(struct mlx5_indexed_pool *pool, uint32_t *pos)
992 struct rte_bitmap *ibmp;
994 uint32_t iidx = *pos;
997 if (!ibmp || !rte_bitmap_scan(ibmp, &iidx, &slab)) {
999 pool->cfg.free(pool->bmp_mem);
1000 pool->bmp_mem = NULL;
1005 iidx += __builtin_ctzll(slab);
1006 rte_bitmap_clear(ibmp, iidx);
1009 return mlx5_ipool_get_cache(pool, iidx);
1013 mlx5_ipool_get_next(struct mlx5_indexed_pool *pool, uint32_t *pos)
1015 uint32_t idx = *pos;
1018 if (pool->cfg.per_core_cache)
1019 return mlx5_ipool_get_next_cache(pool, pos);
1020 while (idx <= mlx5_trunk_idx_offset_get(pool, pool->n_trunk)) {
1021 entry = mlx5_ipool_get(pool, idx);
1032 mlx5_ipool_dump(struct mlx5_indexed_pool *pool)
1034 printf("Pool %s entry size %u, trunks %u, %d entry per trunk, "
1036 pool->cfg.type, pool->cfg.size, pool->n_trunk_valid,
1037 pool->cfg.trunk_size, pool->n_trunk_valid);
1039 printf("Pool %s entry %u, trunk alloc %u, empty: %u, "
1040 "available %u free %u\n",
1041 pool->cfg.type, pool->n_entry, pool->trunk_new,
1042 pool->trunk_empty, pool->trunk_avail, pool->trunk_free);
1046 struct mlx5_l3t_tbl *
1047 mlx5_l3t_create(enum mlx5_l3t_type type)
1049 struct mlx5_l3t_tbl *tbl;
1050 struct mlx5_indexed_pool_config l3t_ip_cfg = {
1055 .release_mem_en = 1,
1056 .malloc = mlx5_malloc,
1060 if (type >= MLX5_L3T_TYPE_MAX) {
1064 tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_l3t_tbl), 1,
1072 case MLX5_L3T_TYPE_WORD:
1073 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word);
1074 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
1076 case MLX5_L3T_TYPE_DWORD:
1077 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword);
1078 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
1080 case MLX5_L3T_TYPE_QWORD:
1081 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword);
1082 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
1085 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr);
1086 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
1089 rte_spinlock_init(&tbl->sl);
1090 tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
1100 mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl)
1102 struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1109 for (i = 0; i < MLX5_L3T_GT_SIZE; i++) {
1110 m_tbl = g_tbl->tbl[i];
1113 for (j = 0; j < MLX5_L3T_MT_SIZE; j++) {
1116 MLX5_ASSERT(!((struct mlx5_l3t_entry_word *)
1117 m_tbl->tbl[j])->ref_cnt);
1118 mlx5_ipool_free(tbl->eip,
1119 ((struct mlx5_l3t_entry_word *)
1120 m_tbl->tbl[j])->idx);
1122 if (!(--m_tbl->ref_cnt))
1125 MLX5_ASSERT(!m_tbl->ref_cnt);
1126 mlx5_free(g_tbl->tbl[i]);
1128 if (!(--g_tbl->ref_cnt))
1131 MLX5_ASSERT(!g_tbl->ref_cnt);
1132 mlx5_free(tbl->tbl);
1135 mlx5_ipool_destroy(tbl->eip);
1140 __l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1141 union mlx5_l3t_data *data)
1143 struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1144 struct mlx5_l3t_entry_word *w_e_tbl;
1145 struct mlx5_l3t_entry_dword *dw_e_tbl;
1146 struct mlx5_l3t_entry_qword *qw_e_tbl;
1147 struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1154 m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1157 e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1160 entry_idx = idx & MLX5_L3T_ET_MASK;
1161 switch (tbl->type) {
1162 case MLX5_L3T_TYPE_WORD:
1163 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1164 data->word = w_e_tbl->entry[entry_idx].data;
1165 if (w_e_tbl->entry[entry_idx].data)
1166 w_e_tbl->entry[entry_idx].ref_cnt++;
1168 case MLX5_L3T_TYPE_DWORD:
1169 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1170 data->dword = dw_e_tbl->entry[entry_idx].data;
1171 if (dw_e_tbl->entry[entry_idx].data)
1172 dw_e_tbl->entry[entry_idx].ref_cnt++;
1174 case MLX5_L3T_TYPE_QWORD:
1175 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1176 data->qword = qw_e_tbl->entry[entry_idx].data;
1177 if (qw_e_tbl->entry[entry_idx].data)
1178 qw_e_tbl->entry[entry_idx].ref_cnt++;
1181 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1182 data->ptr = ptr_e_tbl->entry[entry_idx].data;
1183 if (ptr_e_tbl->entry[entry_idx].data)
1184 ptr_e_tbl->entry[entry_idx].ref_cnt++;
1191 mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1192 union mlx5_l3t_data *data)
1196 rte_spinlock_lock(&tbl->sl);
1197 ret = __l3t_get_entry(tbl, idx, data);
1198 rte_spinlock_unlock(&tbl->sl);
1203 mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
1205 struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1206 struct mlx5_l3t_entry_word *w_e_tbl;
1207 struct mlx5_l3t_entry_dword *dw_e_tbl;
1208 struct mlx5_l3t_entry_qword *qw_e_tbl;
1209 struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1215 rte_spinlock_lock(&tbl->sl);
1219 m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1222 e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1225 entry_idx = idx & MLX5_L3T_ET_MASK;
1226 switch (tbl->type) {
1227 case MLX5_L3T_TYPE_WORD:
1228 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1229 MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt);
1230 ret = --w_e_tbl->entry[entry_idx].ref_cnt;
1233 w_e_tbl->entry[entry_idx].data = 0;
1234 ref_cnt = --w_e_tbl->ref_cnt;
1236 case MLX5_L3T_TYPE_DWORD:
1237 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1238 MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt);
1239 ret = --dw_e_tbl->entry[entry_idx].ref_cnt;
1242 dw_e_tbl->entry[entry_idx].data = 0;
1243 ref_cnt = --dw_e_tbl->ref_cnt;
1245 case MLX5_L3T_TYPE_QWORD:
1246 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1247 MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt);
1248 ret = --qw_e_tbl->entry[entry_idx].ref_cnt;
1251 qw_e_tbl->entry[entry_idx].data = 0;
1252 ref_cnt = --qw_e_tbl->ref_cnt;
1255 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1256 MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt);
1257 ret = --ptr_e_tbl->entry[entry_idx].ref_cnt;
1260 ptr_e_tbl->entry[entry_idx].data = NULL;
1261 ref_cnt = --ptr_e_tbl->ref_cnt;
1265 mlx5_ipool_free(tbl->eip,
1266 ((struct mlx5_l3t_entry_word *)e_tbl)->idx);
1267 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1269 if (!(--m_tbl->ref_cnt)) {
1272 [(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] = NULL;
1273 if (!(--g_tbl->ref_cnt)) {
1280 rte_spinlock_unlock(&tbl->sl);
1285 __l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1286 union mlx5_l3t_data *data)
1288 struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1289 struct mlx5_l3t_entry_word *w_e_tbl;
1290 struct mlx5_l3t_entry_dword *dw_e_tbl;
1291 struct mlx5_l3t_entry_qword *qw_e_tbl;
1292 struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1294 uint32_t entry_idx, tbl_idx = 0;
1296 /* Check the global table, create it if empty. */
1299 g_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1300 sizeof(struct mlx5_l3t_level_tbl) +
1301 sizeof(void *) * MLX5_L3T_GT_SIZE, 1,
1310 * Check the middle table, create it if empty. Ref_cnt will be
1311 * increased if new sub table created.
1313 m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1315 m_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1316 sizeof(struct mlx5_l3t_level_tbl) +
1317 sizeof(void *) * MLX5_L3T_MT_SIZE, 1,
1323 g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] =
1328 * Check the entry table, create it if empty. Ref_cnt will be
1329 * increased if new sub entry table created.
1331 e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1333 e_tbl = mlx5_ipool_zmalloc(tbl->eip, &tbl_idx);
1338 ((struct mlx5_l3t_entry_word *)e_tbl)->idx = tbl_idx;
1339 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1343 entry_idx = idx & MLX5_L3T_ET_MASK;
1344 switch (tbl->type) {
1345 case MLX5_L3T_TYPE_WORD:
1346 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1347 if (w_e_tbl->entry[entry_idx].data) {
1348 data->word = w_e_tbl->entry[entry_idx].data;
1349 w_e_tbl->entry[entry_idx].ref_cnt++;
1353 w_e_tbl->entry[entry_idx].data = data->word;
1354 w_e_tbl->entry[entry_idx].ref_cnt = 1;
1357 case MLX5_L3T_TYPE_DWORD:
1358 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1359 if (dw_e_tbl->entry[entry_idx].data) {
1360 data->dword = dw_e_tbl->entry[entry_idx].data;
1361 dw_e_tbl->entry[entry_idx].ref_cnt++;
1365 dw_e_tbl->entry[entry_idx].data = data->dword;
1366 dw_e_tbl->entry[entry_idx].ref_cnt = 1;
1367 dw_e_tbl->ref_cnt++;
1369 case MLX5_L3T_TYPE_QWORD:
1370 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1371 if (qw_e_tbl->entry[entry_idx].data) {
1372 data->qword = qw_e_tbl->entry[entry_idx].data;
1373 qw_e_tbl->entry[entry_idx].ref_cnt++;
1377 qw_e_tbl->entry[entry_idx].data = data->qword;
1378 qw_e_tbl->entry[entry_idx].ref_cnt = 1;
1379 qw_e_tbl->ref_cnt++;
1382 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1383 if (ptr_e_tbl->entry[entry_idx].data) {
1384 data->ptr = ptr_e_tbl->entry[entry_idx].data;
1385 ptr_e_tbl->entry[entry_idx].ref_cnt++;
1389 ptr_e_tbl->entry[entry_idx].data = data->ptr;
1390 ptr_e_tbl->entry[entry_idx].ref_cnt = 1;
1391 ptr_e_tbl->ref_cnt++;
1398 mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1399 union mlx5_l3t_data *data)
1403 rte_spinlock_lock(&tbl->sl);
1404 ret = __l3t_set_entry(tbl, idx, data);
1405 rte_spinlock_unlock(&tbl->sl);
1410 mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1411 union mlx5_l3t_data *data,
1412 mlx5_l3t_alloc_callback_fn cb, void *ctx)
1416 rte_spinlock_lock(&tbl->sl);
1417 /* Check if entry data is ready. */
1418 ret = __l3t_get_entry(tbl, idx, data);
1420 switch (tbl->type) {
1421 case MLX5_L3T_TYPE_WORD:
1425 case MLX5_L3T_TYPE_DWORD:
1429 case MLX5_L3T_TYPE_QWORD:
1439 /* Entry data is not ready, use user callback to create it. */
1440 ret = cb(ctx, data);
1443 /* Save the new allocated data to entry. */
1444 ret = __l3t_set_entry(tbl, idx, data);
1446 rte_spinlock_unlock(&tbl->sl);