0be778935fadf8d53d5e0bb58f6daab69a2c8a35
[dpdk.git] / drivers / net / mlx5 / mlx5_utils.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4
5 #include <rte_malloc.h>
6
7 #include <mlx5_malloc.h>
8
9 #include "mlx5_utils.h"
10
11
12 /********************* mlx5 list ************************/
13
14 struct mlx5_list *
15 mlx5_list_create(const char *name, void *ctx,
16                  mlx5_list_create_cb cb_create,
17                  mlx5_list_match_cb cb_match,
18                  mlx5_list_remove_cb cb_remove,
19                  mlx5_list_clone_cb cb_clone,
20                  mlx5_list_clone_free_cb cb_clone_free)
21 {
22         struct mlx5_list *list;
23         int i;
24
25         if (!cb_match || !cb_create || !cb_remove || !cb_clone ||
26             !cb_clone_free) {
27                 rte_errno = EINVAL;
28                 return NULL;
29         }
30         list = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*list), 0, SOCKET_ID_ANY);
31         if (!list)
32                 return NULL;
33         if (name)
34                 snprintf(list->name, sizeof(list->name), "%s", name);
35         list->ctx = ctx;
36         list->cb_create = cb_create;
37         list->cb_match = cb_match;
38         list->cb_remove = cb_remove;
39         list->cb_clone = cb_clone;
40         list->cb_clone_free = cb_clone_free;
41         rte_rwlock_init(&list->lock);
42         DRV_LOG(DEBUG, "mlx5 list %s initialized.", list->name);
43         for (i = 0; i <= RTE_MAX_LCORE; i++)
44                 LIST_INIT(&list->cache[i].h);
45         return list;
46 }
47
48 static struct mlx5_list_entry *
49 __list_lookup(struct mlx5_list *list, int lcore_index, void *ctx, bool reuse)
50 {
51         struct mlx5_list_entry *entry = LIST_FIRST(&list->cache[lcore_index].h);
52         uint32_t ret;
53
54         while (entry != NULL) {
55                 if (list->cb_match(list, entry, ctx) == 0) {
56                         if (reuse) {
57                                 ret = __atomic_add_fetch(&entry->ref_cnt, 1,
58                                                          __ATOMIC_RELAXED) - 1;
59                                 DRV_LOG(DEBUG, "mlx5 list %s entry %p ref: %u.",
60                                         list->name, (void *)entry,
61                                         entry->ref_cnt);
62                         } else if (lcore_index < RTE_MAX_LCORE) {
63                                 ret = __atomic_load_n(&entry->ref_cnt,
64                                                       __ATOMIC_RELAXED);
65                         }
66                         if (likely(ret != 0 || lcore_index == RTE_MAX_LCORE))
67                                 return entry;
68                         if (reuse && ret == 0)
69                                 entry->ref_cnt--; /* Invalid entry. */
70                 }
71                 entry = LIST_NEXT(entry, next);
72         }
73         return NULL;
74 }
75
76 struct mlx5_list_entry *
77 mlx5_list_lookup(struct mlx5_list *list, void *ctx)
78 {
79         struct mlx5_list_entry *entry = NULL;
80         int i;
81
82         rte_rwlock_read_lock(&list->lock);
83         for (i = 0; i < RTE_MAX_LCORE; i++) {
84                 entry = __list_lookup(list, i, ctx, false);
85                 if (entry)
86                         break;
87         }
88         rte_rwlock_read_unlock(&list->lock);
89         return entry;
90 }
91
92 static struct mlx5_list_entry *
93 mlx5_list_cache_insert(struct mlx5_list *list, int lcore_index,
94                        struct mlx5_list_entry *gentry, void *ctx)
95 {
96         struct mlx5_list_entry *lentry = list->cb_clone(list, gentry, ctx);
97
98         if (unlikely(!lentry))
99                 return NULL;
100         lentry->ref_cnt = 1u;
101         lentry->gentry = gentry;
102         lentry->lcore_idx = (uint32_t)lcore_index;
103         LIST_INSERT_HEAD(&list->cache[lcore_index].h, lentry, next);
104         return lentry;
105 }
106
107 static void
108 __list_cache_clean(struct mlx5_list *list, int lcore_index)
109 {
110         struct mlx5_list_cache *c = &list->cache[lcore_index];
111         struct mlx5_list_entry *entry = LIST_FIRST(&c->h);
112         uint32_t inv_cnt = __atomic_exchange_n(&c->inv_cnt, 0,
113                                                __ATOMIC_RELAXED);
114
115         while (inv_cnt != 0 && entry != NULL) {
116                 struct mlx5_list_entry *nentry = LIST_NEXT(entry, next);
117
118                 if (__atomic_load_n(&entry->ref_cnt, __ATOMIC_RELAXED) == 0) {
119                         LIST_REMOVE(entry, next);
120                         list->cb_clone_free(list, entry);
121                         inv_cnt--;
122                 }
123                 entry = nentry;
124         }
125 }
126
127 struct mlx5_list_entry *
128 mlx5_list_register(struct mlx5_list *list, void *ctx)
129 {
130         struct mlx5_list_entry *entry, *local_entry;
131         volatile uint32_t prev_gen_cnt = 0;
132         int lcore_index = rte_lcore_index(rte_lcore_id());
133
134         MLX5_ASSERT(list);
135         MLX5_ASSERT(lcore_index < RTE_MAX_LCORE);
136         if (unlikely(lcore_index == -1)) {
137                 rte_errno = ENOTSUP;
138                 return NULL;
139         }
140         /* 0. Free entries that was invalidated by other lcores. */
141         __list_cache_clean(list, lcore_index);
142         /* 1. Lookup in local cache. */
143         local_entry = __list_lookup(list, lcore_index, ctx, true);
144         if (local_entry)
145                 return local_entry;
146         /* 2. Lookup with read lock on global list, reuse if found. */
147         rte_rwlock_read_lock(&list->lock);
148         entry = __list_lookup(list, RTE_MAX_LCORE, ctx, true);
149         if (likely(entry)) {
150                 rte_rwlock_read_unlock(&list->lock);
151                 return mlx5_list_cache_insert(list, lcore_index, entry, ctx);
152         }
153         prev_gen_cnt = list->gen_cnt;
154         rte_rwlock_read_unlock(&list->lock);
155         /* 3. Prepare new entry for global list and for cache. */
156         entry = list->cb_create(list, entry, ctx);
157         if (unlikely(!entry))
158                 return NULL;
159         local_entry = list->cb_clone(list, entry, ctx);
160         if (unlikely(!local_entry)) {
161                 list->cb_remove(list, entry);
162                 return NULL;
163         }
164         entry->ref_cnt = 1u;
165         local_entry->ref_cnt = 1u;
166         local_entry->gentry = entry;
167         local_entry->lcore_idx = (uint32_t)lcore_index;
168         rte_rwlock_write_lock(&list->lock);
169         /* 4. Make sure the same entry was not created before the write lock. */
170         if (unlikely(prev_gen_cnt != list->gen_cnt)) {
171                 struct mlx5_list_entry *oentry = __list_lookup(list,
172                                                                RTE_MAX_LCORE,
173                                                                ctx, true);
174
175                 if (unlikely(oentry)) {
176                         /* 4.5. Found real race!!, reuse the old entry. */
177                         rte_rwlock_write_unlock(&list->lock);
178                         list->cb_remove(list, entry);
179                         list->cb_clone_free(list, local_entry);
180                         return mlx5_list_cache_insert(list, lcore_index, oentry,
181                                                       ctx);
182                 }
183         }
184         /* 5. Update lists. */
185         LIST_INSERT_HEAD(&list->cache[RTE_MAX_LCORE].h, entry, next);
186         list->gen_cnt++;
187         rte_rwlock_write_unlock(&list->lock);
188         LIST_INSERT_HEAD(&list->cache[lcore_index].h, local_entry, next);
189         __atomic_add_fetch(&list->count, 1, __ATOMIC_RELAXED);
190         DRV_LOG(DEBUG, "mlx5 list %s entry %p new: %u.", list->name,
191                 (void *)entry, entry->ref_cnt);
192         return local_entry;
193 }
194
195 int
196 mlx5_list_unregister(struct mlx5_list *list,
197                       struct mlx5_list_entry *entry)
198 {
199         struct mlx5_list_entry *gentry = entry->gentry;
200         int lcore_idx;
201
202         if (__atomic_sub_fetch(&entry->ref_cnt, 1, __ATOMIC_RELAXED) != 0)
203                 return 1;
204         lcore_idx = rte_lcore_index(rte_lcore_id());
205         MLX5_ASSERT(lcore_idx < RTE_MAX_LCORE);
206         if (entry->lcore_idx == (uint32_t)lcore_idx) {
207                 LIST_REMOVE(entry, next);
208                 list->cb_clone_free(list, entry);
209         } else if (likely(lcore_idx != -1)) {
210                 __atomic_add_fetch(&list->cache[entry->lcore_idx].inv_cnt, 1,
211                                    __ATOMIC_RELAXED);
212         } else {
213                 return 0;
214         }
215         if (__atomic_sub_fetch(&gentry->ref_cnt, 1, __ATOMIC_RELAXED) != 0)
216                 return 1;
217         rte_rwlock_write_lock(&list->lock);
218         if (likely(gentry->ref_cnt == 0)) {
219                 LIST_REMOVE(gentry, next);
220                 rte_rwlock_write_unlock(&list->lock);
221                 list->cb_remove(list, gentry);
222                 __atomic_sub_fetch(&list->count, 1, __ATOMIC_RELAXED);
223                 DRV_LOG(DEBUG, "mlx5 list %s entry %p removed.",
224                         list->name, (void *)gentry);
225                 return 0;
226         }
227         rte_rwlock_write_unlock(&list->lock);
228         return 1;
229 }
230
231 void
232 mlx5_list_destroy(struct mlx5_list *list)
233 {
234         struct mlx5_list_entry *entry;
235         int i;
236
237         MLX5_ASSERT(list);
238         for (i = 0; i <= RTE_MAX_LCORE; i++) {
239                 while (!LIST_EMPTY(&list->cache[i].h)) {
240                         entry = LIST_FIRST(&list->cache[i].h);
241                         LIST_REMOVE(entry, next);
242                         if (i == RTE_MAX_LCORE) {
243                                 list->cb_remove(list, entry);
244                                 DRV_LOG(DEBUG, "mlx5 list %s entry %p "
245                                         "destroyed.", list->name,
246                                         (void *)entry);
247                         } else {
248                                 list->cb_clone_free(list, entry);
249                         }
250                 }
251         }
252         mlx5_free(list);
253 }
254
255 uint32_t
256 mlx5_list_get_entry_num(struct mlx5_list *list)
257 {
258         MLX5_ASSERT(list);
259         return __atomic_load_n(&list->count, __ATOMIC_RELAXED);
260 }
261
262 /********************* Indexed pool **********************/
263
264 static inline void
265 mlx5_ipool_lock(struct mlx5_indexed_pool *pool)
266 {
267         if (pool->cfg.need_lock)
268                 rte_spinlock_lock(&pool->rsz_lock);
269 }
270
271 static inline void
272 mlx5_ipool_unlock(struct mlx5_indexed_pool *pool)
273 {
274         if (pool->cfg.need_lock)
275                 rte_spinlock_unlock(&pool->rsz_lock);
276 }
277
278 static inline uint32_t
279 mlx5_trunk_idx_get(struct mlx5_indexed_pool *pool, uint32_t entry_idx)
280 {
281         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
282         uint32_t trunk_idx = 0;
283         uint32_t i;
284
285         if (!cfg->grow_trunk)
286                 return entry_idx / cfg->trunk_size;
287         if (entry_idx >= pool->grow_tbl[cfg->grow_trunk - 1]) {
288                 trunk_idx = (entry_idx - pool->grow_tbl[cfg->grow_trunk - 1]) /
289                             (cfg->trunk_size << (cfg->grow_shift *
290                             cfg->grow_trunk)) + cfg->grow_trunk;
291         } else {
292                 for (i = 0; i < cfg->grow_trunk; i++) {
293                         if (entry_idx < pool->grow_tbl[i])
294                                 break;
295                 }
296                 trunk_idx = i;
297         }
298         return trunk_idx;
299 }
300
301 static inline uint32_t
302 mlx5_trunk_size_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
303 {
304         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
305
306         return cfg->trunk_size << (cfg->grow_shift *
307                (trunk_idx > cfg->grow_trunk ? cfg->grow_trunk : trunk_idx));
308 }
309
310 static inline uint32_t
311 mlx5_trunk_idx_offset_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
312 {
313         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
314         uint32_t offset = 0;
315
316         if (!trunk_idx)
317                 return 0;
318         if (!cfg->grow_trunk)
319                 return cfg->trunk_size * trunk_idx;
320         if (trunk_idx < cfg->grow_trunk)
321                 offset = pool->grow_tbl[trunk_idx - 1];
322         else
323                 offset = pool->grow_tbl[cfg->grow_trunk - 1] +
324                          (cfg->trunk_size << (cfg->grow_shift *
325                          cfg->grow_trunk)) * (trunk_idx - cfg->grow_trunk);
326         return offset;
327 }
328
329 struct mlx5_indexed_pool *
330 mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg)
331 {
332         struct mlx5_indexed_pool *pool;
333         uint32_t i;
334
335         if (!cfg || (!cfg->malloc ^ !cfg->free) ||
336             (cfg->per_core_cache && cfg->release_mem_en) ||
337             (cfg->trunk_size && ((cfg->trunk_size & (cfg->trunk_size - 1)) ||
338             ((__builtin_ffs(cfg->trunk_size) + TRUNK_IDX_BITS) > 32))))
339                 return NULL;
340         pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool) + cfg->grow_trunk *
341                            sizeof(pool->grow_tbl[0]), RTE_CACHE_LINE_SIZE,
342                            SOCKET_ID_ANY);
343         if (!pool)
344                 return NULL;
345         pool->cfg = *cfg;
346         if (!pool->cfg.trunk_size)
347                 pool->cfg.trunk_size = MLX5_IPOOL_DEFAULT_TRUNK_SIZE;
348         if (!cfg->malloc && !cfg->free) {
349                 pool->cfg.malloc = mlx5_malloc;
350                 pool->cfg.free = mlx5_free;
351         }
352         if (pool->cfg.need_lock)
353                 rte_spinlock_init(&pool->rsz_lock);
354         /*
355          * Initialize the dynamic grow trunk size lookup table to have a quick
356          * lookup for the trunk entry index offset.
357          */
358         for (i = 0; i < cfg->grow_trunk; i++) {
359                 pool->grow_tbl[i] = cfg->trunk_size << (cfg->grow_shift * i);
360                 if (i > 0)
361                         pool->grow_tbl[i] += pool->grow_tbl[i - 1];
362         }
363         if (!pool->cfg.max_idx)
364                 pool->cfg.max_idx =
365                         mlx5_trunk_idx_offset_get(pool, TRUNK_MAX_IDX + 1);
366         if (!cfg->per_core_cache)
367                 pool->free_list = TRUNK_INVALID;
368         rte_spinlock_init(&pool->lcore_lock);
369         return pool;
370 }
371
372 static int
373 mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
374 {
375         struct mlx5_indexed_trunk *trunk;
376         struct mlx5_indexed_trunk **trunk_tmp;
377         struct mlx5_indexed_trunk **p;
378         size_t trunk_size = 0;
379         size_t data_size;
380         size_t bmp_size;
381         uint32_t idx, cur_max_idx, i;
382
383         cur_max_idx = mlx5_trunk_idx_offset_get(pool, pool->n_trunk_valid);
384         if (pool->n_trunk_valid == TRUNK_MAX_IDX ||
385             cur_max_idx >= pool->cfg.max_idx)
386                 return -ENOMEM;
387         if (pool->n_trunk_valid == pool->n_trunk) {
388                 /* No free trunk flags, expand trunk list. */
389                 int n_grow = pool->n_trunk_valid ? pool->n_trunk :
390                              RTE_CACHE_LINE_SIZE / sizeof(void *);
391
392                 p = pool->cfg.malloc(0, (pool->n_trunk_valid + n_grow) *
393                                      sizeof(struct mlx5_indexed_trunk *),
394                                      RTE_CACHE_LINE_SIZE, rte_socket_id());
395                 if (!p)
396                         return -ENOMEM;
397                 if (pool->trunks)
398                         memcpy(p, pool->trunks, pool->n_trunk_valid *
399                                sizeof(struct mlx5_indexed_trunk *));
400                 memset(RTE_PTR_ADD(p, pool->n_trunk_valid * sizeof(void *)), 0,
401                        n_grow * sizeof(void *));
402                 trunk_tmp = pool->trunks;
403                 pool->trunks = p;
404                 if (trunk_tmp)
405                         pool->cfg.free(trunk_tmp);
406                 pool->n_trunk += n_grow;
407         }
408         if (!pool->cfg.release_mem_en) {
409                 idx = pool->n_trunk_valid;
410         } else {
411                 /* Find the first available slot in trunk list */
412                 for (idx = 0; idx < pool->n_trunk; idx++)
413                         if (pool->trunks[idx] == NULL)
414                                 break;
415         }
416         trunk_size += sizeof(*trunk);
417         data_size = mlx5_trunk_size_get(pool, idx);
418         bmp_size = rte_bitmap_get_memory_footprint(data_size);
419         /* rte_bitmap requires memory cacheline aligned. */
420         trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
421         trunk_size += bmp_size;
422         trunk = pool->cfg.malloc(0, trunk_size,
423                                  RTE_CACHE_LINE_SIZE, rte_socket_id());
424         if (!trunk)
425                 return -ENOMEM;
426         pool->trunks[idx] = trunk;
427         trunk->idx = idx;
428         trunk->free = data_size;
429         trunk->prev = TRUNK_INVALID;
430         trunk->next = TRUNK_INVALID;
431         MLX5_ASSERT(pool->free_list == TRUNK_INVALID);
432         pool->free_list = idx;
433         /* Mark all entries as available. */
434         trunk->bmp = rte_bitmap_init_with_all_set(data_size, &trunk->data
435                      [RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size)],
436                      bmp_size);
437         /* Clear the overhead bits in the trunk if it happens. */
438         if (cur_max_idx + data_size > pool->cfg.max_idx) {
439                 for (i = pool->cfg.max_idx - cur_max_idx; i < data_size; i++)
440                         rte_bitmap_clear(trunk->bmp, i);
441         }
442         MLX5_ASSERT(trunk->bmp);
443         pool->n_trunk_valid++;
444 #ifdef POOL_DEBUG
445         pool->trunk_new++;
446         pool->trunk_avail++;
447 #endif
448         return 0;
449 }
450
451 static inline struct mlx5_indexed_cache *
452 mlx5_ipool_update_global_cache(struct mlx5_indexed_pool *pool, int cidx)
453 {
454         struct mlx5_indexed_cache *gc, *lc, *olc = NULL;
455
456         lc = pool->cache[cidx]->lc;
457         gc = __atomic_load_n(&pool->gc, __ATOMIC_RELAXED);
458         if (gc && lc != gc) {
459                 mlx5_ipool_lock(pool);
460                 if (lc && !(--lc->ref_cnt))
461                         olc = lc;
462                 lc = pool->gc;
463                 lc->ref_cnt++;
464                 pool->cache[cidx]->lc = lc;
465                 mlx5_ipool_unlock(pool);
466                 if (olc)
467                         pool->cfg.free(olc);
468         }
469         return lc;
470 }
471
472 static uint32_t
473 mlx5_ipool_allocate_from_global(struct mlx5_indexed_pool *pool, int cidx)
474 {
475         struct mlx5_indexed_trunk *trunk;
476         struct mlx5_indexed_cache *p, *lc, *olc = NULL;
477         size_t trunk_size = 0;
478         size_t data_size;
479         uint32_t cur_max_idx, trunk_idx, trunk_n;
480         uint32_t fetch_size, ts_idx, i;
481         int n_grow;
482
483 check_again:
484         p = NULL;
485         fetch_size = 0;
486         /*
487          * Fetch new index from global if possible. First round local
488          * cache will be NULL.
489          */
490         lc = pool->cache[cidx]->lc;
491         mlx5_ipool_lock(pool);
492         /* Try to update local cache first. */
493         if (likely(pool->gc)) {
494                 if (lc != pool->gc) {
495                         if (lc && !(--lc->ref_cnt))
496                                 olc = lc;
497                         lc = pool->gc;
498                         lc->ref_cnt++;
499                         pool->cache[cidx]->lc = lc;
500                 }
501                 if (lc->len) {
502                         /* Use the updated local cache to fetch index. */
503                         fetch_size = pool->cfg.per_core_cache >> 2;
504                         if (lc->len < fetch_size)
505                                 fetch_size = lc->len;
506                         lc->len -= fetch_size;
507                         memcpy(pool->cache[cidx]->idx, &lc->idx[lc->len],
508                                sizeof(uint32_t) * fetch_size);
509                 }
510         }
511         mlx5_ipool_unlock(pool);
512         if (unlikely(olc)) {
513                 pool->cfg.free(olc);
514                 olc = NULL;
515         }
516         if (fetch_size) {
517                 pool->cache[cidx]->len = fetch_size - 1;
518                 return pool->cache[cidx]->idx[pool->cache[cidx]->len];
519         }
520         trunk_idx = lc ? __atomic_load_n(&lc->n_trunk_valid,
521                          __ATOMIC_ACQUIRE) : 0;
522         trunk_n = lc ? lc->n_trunk : 0;
523         cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx);
524         /* Check if index reach maximum. */
525         if (trunk_idx == TRUNK_MAX_IDX ||
526             cur_max_idx >= pool->cfg.max_idx)
527                 return 0;
528         /* No enough space in trunk array, resize the trunks array. */
529         if (trunk_idx == trunk_n) {
530                 n_grow = trunk_idx ? trunk_idx :
531                              RTE_CACHE_LINE_SIZE / sizeof(void *);
532                 cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_n + n_grow);
533                 /* Resize the trunk array. */
534                 p = pool->cfg.malloc(0, ((trunk_idx + n_grow) *
535                         sizeof(struct mlx5_indexed_trunk *)) +
536                         (cur_max_idx * sizeof(uint32_t)) + sizeof(*p),
537                         RTE_CACHE_LINE_SIZE, rte_socket_id());
538                 if (!p)
539                         return 0;
540                 p->trunks = (struct mlx5_indexed_trunk **)&p->idx[cur_max_idx];
541                 if (lc)
542                         memcpy(p->trunks, lc->trunks, trunk_idx *
543                        sizeof(struct mlx5_indexed_trunk *));
544 #ifdef RTE_LIBRTE_MLX5_DEBUG
545                 memset(RTE_PTR_ADD(p->trunks, trunk_idx * sizeof(void *)), 0,
546                         n_grow * sizeof(void *));
547 #endif
548                 p->n_trunk_valid = trunk_idx;
549                 p->n_trunk = trunk_n + n_grow;
550                 p->len = 0;
551         }
552         /* Prepare the new trunk. */
553         trunk_size = sizeof(*trunk);
554         data_size = mlx5_trunk_size_get(pool, trunk_idx);
555         trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
556         trunk = pool->cfg.malloc(0, trunk_size,
557                                  RTE_CACHE_LINE_SIZE, rte_socket_id());
558         if (unlikely(!trunk)) {
559                 pool->cfg.free(p);
560                 return 0;
561         }
562         trunk->idx = trunk_idx;
563         trunk->free = data_size;
564         mlx5_ipool_lock(pool);
565         /*
566          * Double check if trunks has been updated or have available index.
567          * During the new trunk allocate, index may still be flushed to the
568          * global cache. So also need to check the pool->gc->len.
569          */
570         if (pool->gc && (lc != pool->gc ||
571             lc->n_trunk_valid != trunk_idx ||
572             pool->gc->len)) {
573                 mlx5_ipool_unlock(pool);
574                 if (p)
575                         pool->cfg.free(p);
576                 pool->cfg.free(trunk);
577                 goto check_again;
578         }
579         /* Resize the trunk array and update local cache first.  */
580         if (p) {
581                 if (lc && !(--lc->ref_cnt))
582                         olc = lc;
583                 lc = p;
584                 lc->ref_cnt = 1;
585                 pool->cache[cidx]->lc = lc;
586                 __atomic_store_n(&pool->gc, p, __ATOMIC_RELAXED);
587         }
588         /* Add trunk to trunks array. */
589         lc->trunks[trunk_idx] = trunk;
590         __atomic_fetch_add(&lc->n_trunk_valid, 1, __ATOMIC_RELAXED);
591         /* Enqueue half of the index to global. */
592         ts_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx) + 1;
593         fetch_size = trunk->free >> 1;
594         for (i = 0; i < fetch_size; i++)
595                 lc->idx[i] = ts_idx + i;
596         lc->len = fetch_size;
597         mlx5_ipool_unlock(pool);
598         /* Copy left half - 1 to local cache index array. */
599         pool->cache[cidx]->len = trunk->free - fetch_size - 1;
600         ts_idx += fetch_size;
601         for (i = 0; i < pool->cache[cidx]->len; i++)
602                 pool->cache[cidx]->idx[i] = ts_idx + i;
603         if (olc)
604                 pool->cfg.free(olc);
605         return ts_idx + i;
606 }
607
608 static void *
609 _mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
610 {
611         struct mlx5_indexed_trunk *trunk;
612         struct mlx5_indexed_cache *lc;
613         uint32_t trunk_idx;
614         uint32_t entry_idx;
615
616         MLX5_ASSERT(idx);
617         if (unlikely(!pool->cache[cidx])) {
618                 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
619                         sizeof(struct mlx5_ipool_per_lcore) +
620                         (pool->cfg.per_core_cache * sizeof(uint32_t)),
621                         RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
622                 if (!pool->cache[cidx]) {
623                         DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
624                         return NULL;
625                 }
626         }
627         lc = mlx5_ipool_update_global_cache(pool, cidx);
628         idx -= 1;
629         trunk_idx = mlx5_trunk_idx_get(pool, idx);
630         trunk = lc->trunks[trunk_idx];
631         MLX5_ASSERT(trunk);
632         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk_idx);
633         return &trunk->data[entry_idx * pool->cfg.size];
634 }
635
636 static void *
637 mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
638 {
639         void *entry;
640         int cidx;
641
642         cidx = rte_lcore_index(rte_lcore_id());
643         if (unlikely(cidx == -1)) {
644                 cidx = RTE_MAX_LCORE;
645                 rte_spinlock_lock(&pool->lcore_lock);
646         }
647         entry = _mlx5_ipool_get_cache(pool, cidx, idx);
648         if (unlikely(cidx == RTE_MAX_LCORE))
649                 rte_spinlock_unlock(&pool->lcore_lock);
650         return entry;
651 }
652
653
654 static void *
655 _mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, int cidx,
656                          uint32_t *idx)
657 {
658         if (unlikely(!pool->cache[cidx])) {
659                 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
660                         sizeof(struct mlx5_ipool_per_lcore) +
661                         (pool->cfg.per_core_cache * sizeof(uint32_t)),
662                         RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
663                 if (!pool->cache[cidx]) {
664                         DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
665                         return NULL;
666                 }
667         } else if (pool->cache[cidx]->len) {
668                 pool->cache[cidx]->len--;
669                 *idx = pool->cache[cidx]->idx[pool->cache[cidx]->len];
670                 return _mlx5_ipool_get_cache(pool, cidx, *idx);
671         }
672         /* Not enough idx in global cache. Keep fetching from global. */
673         *idx = mlx5_ipool_allocate_from_global(pool, cidx);
674         if (unlikely(!(*idx)))
675                 return NULL;
676         return _mlx5_ipool_get_cache(pool, cidx, *idx);
677 }
678
679 static void *
680 mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, uint32_t *idx)
681 {
682         void *entry;
683         int cidx;
684
685         cidx = rte_lcore_index(rte_lcore_id());
686         if (unlikely(cidx == -1)) {
687                 cidx = RTE_MAX_LCORE;
688                 rte_spinlock_lock(&pool->lcore_lock);
689         }
690         entry = _mlx5_ipool_malloc_cache(pool, cidx, idx);
691         if (unlikely(cidx == RTE_MAX_LCORE))
692                 rte_spinlock_unlock(&pool->lcore_lock);
693         return entry;
694 }
695
696 static void
697 _mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
698 {
699         struct mlx5_ipool_per_lcore *ilc;
700         struct mlx5_indexed_cache *gc, *olc = NULL;
701         uint32_t reclaim_num = 0;
702
703         MLX5_ASSERT(idx);
704         /*
705          * When index was allocated on core A but freed on core B. In this
706          * case check if local cache on core B was allocated before.
707          */
708         if (unlikely(!pool->cache[cidx])) {
709                 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
710                         sizeof(struct mlx5_ipool_per_lcore) +
711                         (pool->cfg.per_core_cache * sizeof(uint32_t)),
712                         RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
713                 if (!pool->cache[cidx]) {
714                         DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
715                         return;
716                 }
717         }
718         /* Try to enqueue to local index cache. */
719         if (pool->cache[cidx]->len < pool->cfg.per_core_cache) {
720                 pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
721                 pool->cache[cidx]->len++;
722                 return;
723         }
724         ilc = pool->cache[cidx];
725         reclaim_num = pool->cfg.per_core_cache >> 2;
726         ilc->len -= reclaim_num;
727         /* Local index cache full, try with global index cache. */
728         mlx5_ipool_lock(pool);
729         gc = pool->gc;
730         if (ilc->lc != gc) {
731                 if (!(--ilc->lc->ref_cnt))
732                         olc = ilc->lc;
733                 gc->ref_cnt++;
734                 ilc->lc = gc;
735         }
736         memcpy(&gc->idx[gc->len], &ilc->idx[ilc->len],
737                reclaim_num * sizeof(uint32_t));
738         gc->len += reclaim_num;
739         mlx5_ipool_unlock(pool);
740         if (olc)
741                 pool->cfg.free(olc);
742         pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
743         pool->cache[cidx]->len++;
744 }
745
746 static void
747 mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
748 {
749         int cidx;
750
751         cidx = rte_lcore_index(rte_lcore_id());
752         if (unlikely(cidx == -1)) {
753                 cidx = RTE_MAX_LCORE;
754                 rte_spinlock_lock(&pool->lcore_lock);
755         }
756         _mlx5_ipool_free_cache(pool, cidx, idx);
757         if (unlikely(cidx == RTE_MAX_LCORE))
758                 rte_spinlock_unlock(&pool->lcore_lock);
759 }
760
761 void *
762 mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
763 {
764         struct mlx5_indexed_trunk *trunk;
765         uint64_t slab = 0;
766         uint32_t iidx = 0;
767         void *p;
768
769         if (pool->cfg.per_core_cache)
770                 return mlx5_ipool_malloc_cache(pool, idx);
771         mlx5_ipool_lock(pool);
772         if (pool->free_list == TRUNK_INVALID) {
773                 /* If no available trunks, grow new. */
774                 if (mlx5_ipool_grow(pool)) {
775                         mlx5_ipool_unlock(pool);
776                         return NULL;
777                 }
778         }
779         MLX5_ASSERT(pool->free_list != TRUNK_INVALID);
780         trunk = pool->trunks[pool->free_list];
781         MLX5_ASSERT(trunk->free);
782         if (!rte_bitmap_scan(trunk->bmp, &iidx, &slab)) {
783                 mlx5_ipool_unlock(pool);
784                 return NULL;
785         }
786         MLX5_ASSERT(slab);
787         iidx += __builtin_ctzll(slab);
788         MLX5_ASSERT(iidx != UINT32_MAX);
789         MLX5_ASSERT(iidx < mlx5_trunk_size_get(pool, trunk->idx));
790         rte_bitmap_clear(trunk->bmp, iidx);
791         p = &trunk->data[iidx * pool->cfg.size];
792         /*
793          * The ipool index should grow continually from small to big,
794          * some features as metering only accept limited bits of index.
795          * Random index with MSB set may be rejected.
796          */
797         iidx += mlx5_trunk_idx_offset_get(pool, trunk->idx);
798         iidx += 1; /* non-zero index. */
799         trunk->free--;
800 #ifdef POOL_DEBUG
801         pool->n_entry++;
802 #endif
803         if (!trunk->free) {
804                 /* Full trunk will be removed from free list in imalloc. */
805                 MLX5_ASSERT(pool->free_list == trunk->idx);
806                 pool->free_list = trunk->next;
807                 if (trunk->next != TRUNK_INVALID)
808                         pool->trunks[trunk->next]->prev = TRUNK_INVALID;
809                 trunk->prev = TRUNK_INVALID;
810                 trunk->next = TRUNK_INVALID;
811 #ifdef POOL_DEBUG
812                 pool->trunk_empty++;
813                 pool->trunk_avail--;
814 #endif
815         }
816         *idx = iidx;
817         mlx5_ipool_unlock(pool);
818         return p;
819 }
820
821 void *
822 mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
823 {
824         void *entry = mlx5_ipool_malloc(pool, idx);
825
826         if (entry && pool->cfg.size)
827                 memset(entry, 0, pool->cfg.size);
828         return entry;
829 }
830
831 void
832 mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx)
833 {
834         struct mlx5_indexed_trunk *trunk;
835         uint32_t trunk_idx;
836         uint32_t entry_idx;
837
838         if (!idx)
839                 return;
840         if (pool->cfg.per_core_cache) {
841                 mlx5_ipool_free_cache(pool, idx);
842                 return;
843         }
844         idx -= 1;
845         mlx5_ipool_lock(pool);
846         trunk_idx = mlx5_trunk_idx_get(pool, idx);
847         if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
848             (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
849                 goto out;
850         trunk = pool->trunks[trunk_idx];
851         if (!trunk)
852                 goto out;
853         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
854         if (trunk_idx != trunk->idx ||
855             rte_bitmap_get(trunk->bmp, entry_idx))
856                 goto out;
857         rte_bitmap_set(trunk->bmp, entry_idx);
858         trunk->free++;
859         if (pool->cfg.release_mem_en && trunk->free == mlx5_trunk_size_get
860            (pool, trunk->idx)) {
861                 if (pool->free_list == trunk->idx)
862                         pool->free_list = trunk->next;
863                 if (trunk->next != TRUNK_INVALID)
864                         pool->trunks[trunk->next]->prev = trunk->prev;
865                 if (trunk->prev != TRUNK_INVALID)
866                         pool->trunks[trunk->prev]->next = trunk->next;
867                 pool->cfg.free(trunk);
868                 pool->trunks[trunk_idx] = NULL;
869                 pool->n_trunk_valid--;
870 #ifdef POOL_DEBUG
871                 pool->trunk_avail--;
872                 pool->trunk_free++;
873 #endif
874                 if (pool->n_trunk_valid == 0) {
875                         pool->cfg.free(pool->trunks);
876                         pool->trunks = NULL;
877                         pool->n_trunk = 0;
878                 }
879         } else if (trunk->free == 1) {
880                 /* Put into free trunk list head. */
881                 MLX5_ASSERT(pool->free_list != trunk->idx);
882                 trunk->next = pool->free_list;
883                 trunk->prev = TRUNK_INVALID;
884                 if (pool->free_list != TRUNK_INVALID)
885                         pool->trunks[pool->free_list]->prev = trunk->idx;
886                 pool->free_list = trunk->idx;
887 #ifdef POOL_DEBUG
888                 pool->trunk_empty--;
889                 pool->trunk_avail++;
890 #endif
891         }
892 #ifdef POOL_DEBUG
893         pool->n_entry--;
894 #endif
895 out:
896         mlx5_ipool_unlock(pool);
897 }
898
899 void *
900 mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx)
901 {
902         struct mlx5_indexed_trunk *trunk;
903         void *p = NULL;
904         uint32_t trunk_idx;
905         uint32_t entry_idx;
906
907         if (!idx)
908                 return NULL;
909         if (pool->cfg.per_core_cache)
910                 return mlx5_ipool_get_cache(pool, idx);
911         idx -= 1;
912         mlx5_ipool_lock(pool);
913         trunk_idx = mlx5_trunk_idx_get(pool, idx);
914         if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
915             (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
916                 goto out;
917         trunk = pool->trunks[trunk_idx];
918         if (!trunk)
919                 goto out;
920         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
921         if (trunk_idx != trunk->idx ||
922             rte_bitmap_get(trunk->bmp, entry_idx))
923                 goto out;
924         p = &trunk->data[entry_idx * pool->cfg.size];
925 out:
926         mlx5_ipool_unlock(pool);
927         return p;
928 }
929
930 int
931 mlx5_ipool_destroy(struct mlx5_indexed_pool *pool)
932 {
933         struct mlx5_indexed_trunk **trunks = NULL;
934         struct mlx5_indexed_cache *gc = pool->gc;
935         uint32_t i, n_trunk_valid = 0;
936
937         MLX5_ASSERT(pool);
938         mlx5_ipool_lock(pool);
939         if (pool->cfg.per_core_cache) {
940                 for (i = 0; i <= RTE_MAX_LCORE; i++) {
941                         /*
942                          * Free only old global cache. Pool gc will be
943                          * freed at last.
944                          */
945                         if (pool->cache[i]) {
946                                 if (pool->cache[i]->lc &&
947                                     pool->cache[i]->lc != pool->gc &&
948                                     (!(--pool->cache[i]->lc->ref_cnt)))
949                                         pool->cfg.free(pool->cache[i]->lc);
950                                 pool->cfg.free(pool->cache[i]);
951                         }
952                 }
953                 if (gc) {
954                         trunks = gc->trunks;
955                         n_trunk_valid = gc->n_trunk_valid;
956                 }
957         } else {
958                 gc = NULL;
959                 trunks = pool->trunks;
960                 n_trunk_valid = pool->n_trunk_valid;
961         }
962         for (i = 0; i < n_trunk_valid; i++) {
963                 if (trunks[i])
964                         pool->cfg.free(trunks[i]);
965         }
966         if (!gc && trunks)
967                 pool->cfg.free(trunks);
968         if (gc)
969                 pool->cfg.free(gc);
970         mlx5_ipool_unlock(pool);
971         mlx5_free(pool);
972         return 0;
973 }
974
975 void
976 mlx5_ipool_flush_cache(struct mlx5_indexed_pool *pool)
977 {
978         uint32_t i, j;
979         struct mlx5_indexed_cache *gc;
980         struct rte_bitmap *ibmp;
981         uint32_t bmp_num, mem_size;
982
983         if (!pool->cfg.per_core_cache)
984                 return;
985         gc = pool->gc;
986         if (!gc)
987                 return;
988         /* Reset bmp. */
989         bmp_num = mlx5_trunk_idx_offset_get(pool, gc->n_trunk_valid);
990         mem_size = rte_bitmap_get_memory_footprint(bmp_num);
991         pool->bmp_mem = pool->cfg.malloc(MLX5_MEM_ZERO, mem_size,
992                                          RTE_CACHE_LINE_SIZE, rte_socket_id());
993         if (!pool->bmp_mem) {
994                 DRV_LOG(ERR, "Ipool bitmap mem allocate failed.\n");
995                 return;
996         }
997         ibmp = rte_bitmap_init_with_all_set(bmp_num, pool->bmp_mem, mem_size);
998         if (!ibmp) {
999                 pool->cfg.free(pool->bmp_mem);
1000                 pool->bmp_mem = NULL;
1001                 DRV_LOG(ERR, "Ipool bitmap create failed.\n");
1002                 return;
1003         }
1004         pool->ibmp = ibmp;
1005         /* Clear global cache. */
1006         for (i = 0; i < gc->len; i++)
1007                 rte_bitmap_clear(ibmp, gc->idx[i] - 1);
1008         /* Clear core cache. */
1009         for (i = 0; i < RTE_MAX_LCORE + 1; i++) {
1010                 struct mlx5_ipool_per_lcore *ilc = pool->cache[i];
1011
1012                 if (!ilc)
1013                         continue;
1014                 for (j = 0; j < ilc->len; j++)
1015                         rte_bitmap_clear(ibmp, ilc->idx[j] - 1);
1016         }
1017 }
1018
1019 static void *
1020 mlx5_ipool_get_next_cache(struct mlx5_indexed_pool *pool, uint32_t *pos)
1021 {
1022         struct rte_bitmap *ibmp;
1023         uint64_t slab = 0;
1024         uint32_t iidx = *pos;
1025
1026         ibmp = pool->ibmp;
1027         if (!ibmp || !rte_bitmap_scan(ibmp, &iidx, &slab)) {
1028                 if (pool->bmp_mem) {
1029                         pool->cfg.free(pool->bmp_mem);
1030                         pool->bmp_mem = NULL;
1031                         pool->ibmp = NULL;
1032                 }
1033                 return NULL;
1034         }
1035         iidx += __builtin_ctzll(slab);
1036         rte_bitmap_clear(ibmp, iidx);
1037         iidx++;
1038         *pos = iidx;
1039         return mlx5_ipool_get_cache(pool, iidx);
1040 }
1041
1042 void *
1043 mlx5_ipool_get_next(struct mlx5_indexed_pool *pool, uint32_t *pos)
1044 {
1045         uint32_t idx = *pos;
1046         void *entry;
1047
1048         if (pool->cfg.per_core_cache)
1049                 return mlx5_ipool_get_next_cache(pool, pos);
1050         while (idx <= mlx5_trunk_idx_offset_get(pool, pool->n_trunk)) {
1051                 entry = mlx5_ipool_get(pool, idx);
1052                 if (entry) {
1053                         *pos = idx;
1054                         return entry;
1055                 }
1056                 idx++;
1057         }
1058         return NULL;
1059 }
1060
1061 void
1062 mlx5_ipool_dump(struct mlx5_indexed_pool *pool)
1063 {
1064         printf("Pool %s entry size %u, trunks %u, %d entry per trunk, "
1065                "total: %d\n",
1066                pool->cfg.type, pool->cfg.size, pool->n_trunk_valid,
1067                pool->cfg.trunk_size, pool->n_trunk_valid);
1068 #ifdef POOL_DEBUG
1069         printf("Pool %s entry %u, trunk alloc %u, empty: %u, "
1070                "available %u free %u\n",
1071                pool->cfg.type, pool->n_entry, pool->trunk_new,
1072                pool->trunk_empty, pool->trunk_avail, pool->trunk_free);
1073 #endif
1074 }
1075
1076 struct mlx5_l3t_tbl *
1077 mlx5_l3t_create(enum mlx5_l3t_type type)
1078 {
1079         struct mlx5_l3t_tbl *tbl;
1080         struct mlx5_indexed_pool_config l3t_ip_cfg = {
1081                 .trunk_size = 16,
1082                 .grow_trunk = 6,
1083                 .grow_shift = 1,
1084                 .need_lock = 0,
1085                 .release_mem_en = 1,
1086                 .malloc = mlx5_malloc,
1087                 .free = mlx5_free,
1088         };
1089
1090         if (type >= MLX5_L3T_TYPE_MAX) {
1091                 rte_errno = EINVAL;
1092                 return NULL;
1093         }
1094         tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_l3t_tbl), 1,
1095                           SOCKET_ID_ANY);
1096         if (!tbl) {
1097                 rte_errno = ENOMEM;
1098                 return NULL;
1099         }
1100         tbl->type = type;
1101         switch (type) {
1102         case MLX5_L3T_TYPE_WORD:
1103                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word);
1104                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
1105                 break;
1106         case MLX5_L3T_TYPE_DWORD:
1107                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword);
1108                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
1109                 break;
1110         case MLX5_L3T_TYPE_QWORD:
1111                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword);
1112                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
1113                 break;
1114         default:
1115                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr);
1116                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
1117                 break;
1118         }
1119         rte_spinlock_init(&tbl->sl);
1120         tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
1121         if (!tbl->eip) {
1122                 rte_errno = ENOMEM;
1123                 mlx5_free(tbl);
1124                 tbl = NULL;
1125         }
1126         return tbl;
1127 }
1128
1129 void
1130 mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl)
1131 {
1132         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1133         uint32_t i, j;
1134
1135         if (!tbl)
1136                 return;
1137         g_tbl = tbl->tbl;
1138         if (g_tbl) {
1139                 for (i = 0; i < MLX5_L3T_GT_SIZE; i++) {
1140                         m_tbl = g_tbl->tbl[i];
1141                         if (!m_tbl)
1142                                 continue;
1143                         for (j = 0; j < MLX5_L3T_MT_SIZE; j++) {
1144                                 if (!m_tbl->tbl[j])
1145                                         continue;
1146                                 MLX5_ASSERT(!((struct mlx5_l3t_entry_word *)
1147                                             m_tbl->tbl[j])->ref_cnt);
1148                                 mlx5_ipool_free(tbl->eip,
1149                                                 ((struct mlx5_l3t_entry_word *)
1150                                                 m_tbl->tbl[j])->idx);
1151                                 m_tbl->tbl[j] = 0;
1152                                 if (!(--m_tbl->ref_cnt))
1153                                         break;
1154                         }
1155                         MLX5_ASSERT(!m_tbl->ref_cnt);
1156                         mlx5_free(g_tbl->tbl[i]);
1157                         g_tbl->tbl[i] = 0;
1158                         if (!(--g_tbl->ref_cnt))
1159                                 break;
1160                 }
1161                 MLX5_ASSERT(!g_tbl->ref_cnt);
1162                 mlx5_free(tbl->tbl);
1163                 tbl->tbl = 0;
1164         }
1165         mlx5_ipool_destroy(tbl->eip);
1166         mlx5_free(tbl);
1167 }
1168
1169 static int32_t
1170 __l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1171                 union mlx5_l3t_data *data)
1172 {
1173         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1174         struct mlx5_l3t_entry_word *w_e_tbl;
1175         struct mlx5_l3t_entry_dword *dw_e_tbl;
1176         struct mlx5_l3t_entry_qword *qw_e_tbl;
1177         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1178         void *e_tbl;
1179         uint32_t entry_idx;
1180
1181         g_tbl = tbl->tbl;
1182         if (!g_tbl)
1183                 return -1;
1184         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1185         if (!m_tbl)
1186                 return -1;
1187         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1188         if (!e_tbl)
1189                 return -1;
1190         entry_idx = idx & MLX5_L3T_ET_MASK;
1191         switch (tbl->type) {
1192         case MLX5_L3T_TYPE_WORD:
1193                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1194                 data->word = w_e_tbl->entry[entry_idx].data;
1195                 if (w_e_tbl->entry[entry_idx].data)
1196                         w_e_tbl->entry[entry_idx].ref_cnt++;
1197                 break;
1198         case MLX5_L3T_TYPE_DWORD:
1199                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1200                 data->dword = dw_e_tbl->entry[entry_idx].data;
1201                 if (dw_e_tbl->entry[entry_idx].data)
1202                         dw_e_tbl->entry[entry_idx].ref_cnt++;
1203                 break;
1204         case MLX5_L3T_TYPE_QWORD:
1205                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1206                 data->qword = qw_e_tbl->entry[entry_idx].data;
1207                 if (qw_e_tbl->entry[entry_idx].data)
1208                         qw_e_tbl->entry[entry_idx].ref_cnt++;
1209                 break;
1210         default:
1211                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1212                 data->ptr = ptr_e_tbl->entry[entry_idx].data;
1213                 if (ptr_e_tbl->entry[entry_idx].data)
1214                         ptr_e_tbl->entry[entry_idx].ref_cnt++;
1215                 break;
1216         }
1217         return 0;
1218 }
1219
1220 int32_t
1221 mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1222                    union mlx5_l3t_data *data)
1223 {
1224         int ret;
1225
1226         rte_spinlock_lock(&tbl->sl);
1227         ret = __l3t_get_entry(tbl, idx, data);
1228         rte_spinlock_unlock(&tbl->sl);
1229         return ret;
1230 }
1231
1232 int32_t
1233 mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
1234 {
1235         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1236         struct mlx5_l3t_entry_word *w_e_tbl;
1237         struct mlx5_l3t_entry_dword *dw_e_tbl;
1238         struct mlx5_l3t_entry_qword *qw_e_tbl;
1239         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1240         void *e_tbl;
1241         uint32_t entry_idx;
1242         uint64_t ref_cnt;
1243         int32_t ret = -1;
1244
1245         rte_spinlock_lock(&tbl->sl);
1246         g_tbl = tbl->tbl;
1247         if (!g_tbl)
1248                 goto out;
1249         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1250         if (!m_tbl)
1251                 goto out;
1252         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1253         if (!e_tbl)
1254                 goto out;
1255         entry_idx = idx & MLX5_L3T_ET_MASK;
1256         switch (tbl->type) {
1257         case MLX5_L3T_TYPE_WORD:
1258                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1259                 MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt);
1260                 ret = --w_e_tbl->entry[entry_idx].ref_cnt;
1261                 if (ret)
1262                         goto out;
1263                 w_e_tbl->entry[entry_idx].data = 0;
1264                 ref_cnt = --w_e_tbl->ref_cnt;
1265                 break;
1266         case MLX5_L3T_TYPE_DWORD:
1267                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1268                 MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt);
1269                 ret = --dw_e_tbl->entry[entry_idx].ref_cnt;
1270                 if (ret)
1271                         goto out;
1272                 dw_e_tbl->entry[entry_idx].data = 0;
1273                 ref_cnt = --dw_e_tbl->ref_cnt;
1274                 break;
1275         case MLX5_L3T_TYPE_QWORD:
1276                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1277                 MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt);
1278                 ret = --qw_e_tbl->entry[entry_idx].ref_cnt;
1279                 if (ret)
1280                         goto out;
1281                 qw_e_tbl->entry[entry_idx].data = 0;
1282                 ref_cnt = --qw_e_tbl->ref_cnt;
1283                 break;
1284         default:
1285                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1286                 MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt);
1287                 ret = --ptr_e_tbl->entry[entry_idx].ref_cnt;
1288                 if (ret)
1289                         goto out;
1290                 ptr_e_tbl->entry[entry_idx].data = NULL;
1291                 ref_cnt = --ptr_e_tbl->ref_cnt;
1292                 break;
1293         }
1294         if (!ref_cnt) {
1295                 mlx5_ipool_free(tbl->eip,
1296                                 ((struct mlx5_l3t_entry_word *)e_tbl)->idx);
1297                 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1298                                                                         NULL;
1299                 if (!(--m_tbl->ref_cnt)) {
1300                         mlx5_free(m_tbl);
1301                         g_tbl->tbl
1302                         [(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] = NULL;
1303                         if (!(--g_tbl->ref_cnt)) {
1304                                 mlx5_free(g_tbl);
1305                                 tbl->tbl = 0;
1306                         }
1307                 }
1308         }
1309 out:
1310         rte_spinlock_unlock(&tbl->sl);
1311         return ret;
1312 }
1313
1314 static int32_t
1315 __l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1316                 union mlx5_l3t_data *data)
1317 {
1318         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1319         struct mlx5_l3t_entry_word *w_e_tbl;
1320         struct mlx5_l3t_entry_dword *dw_e_tbl;
1321         struct mlx5_l3t_entry_qword *qw_e_tbl;
1322         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1323         void *e_tbl;
1324         uint32_t entry_idx, tbl_idx = 0;
1325
1326         /* Check the global table, create it if empty. */
1327         g_tbl = tbl->tbl;
1328         if (!g_tbl) {
1329                 g_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1330                                     sizeof(struct mlx5_l3t_level_tbl) +
1331                                     sizeof(void *) * MLX5_L3T_GT_SIZE, 1,
1332                                     SOCKET_ID_ANY);
1333                 if (!g_tbl) {
1334                         rte_errno = ENOMEM;
1335                         return -1;
1336                 }
1337                 tbl->tbl = g_tbl;
1338         }
1339         /*
1340          * Check the middle table, create it if empty. Ref_cnt will be
1341          * increased if new sub table created.
1342          */
1343         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1344         if (!m_tbl) {
1345                 m_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1346                                     sizeof(struct mlx5_l3t_level_tbl) +
1347                                     sizeof(void *) * MLX5_L3T_MT_SIZE, 1,
1348                                     SOCKET_ID_ANY);
1349                 if (!m_tbl) {
1350                         rte_errno = ENOMEM;
1351                         return -1;
1352                 }
1353                 g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] =
1354                                                                         m_tbl;
1355                 g_tbl->ref_cnt++;
1356         }
1357         /*
1358          * Check the entry table, create it if empty. Ref_cnt will be
1359          * increased if new sub entry table created.
1360          */
1361         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1362         if (!e_tbl) {
1363                 e_tbl = mlx5_ipool_zmalloc(tbl->eip, &tbl_idx);
1364                 if (!e_tbl) {
1365                         rte_errno = ENOMEM;
1366                         return -1;
1367                 }
1368                 ((struct mlx5_l3t_entry_word *)e_tbl)->idx = tbl_idx;
1369                 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1370                                                                         e_tbl;
1371                 m_tbl->ref_cnt++;
1372         }
1373         entry_idx = idx & MLX5_L3T_ET_MASK;
1374         switch (tbl->type) {
1375         case MLX5_L3T_TYPE_WORD:
1376                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1377                 if (w_e_tbl->entry[entry_idx].data) {
1378                         data->word = w_e_tbl->entry[entry_idx].data;
1379                         w_e_tbl->entry[entry_idx].ref_cnt++;
1380                         rte_errno = EEXIST;
1381                         return -1;
1382                 }
1383                 w_e_tbl->entry[entry_idx].data = data->word;
1384                 w_e_tbl->entry[entry_idx].ref_cnt = 1;
1385                 w_e_tbl->ref_cnt++;
1386                 break;
1387         case MLX5_L3T_TYPE_DWORD:
1388                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1389                 if (dw_e_tbl->entry[entry_idx].data) {
1390                         data->dword = dw_e_tbl->entry[entry_idx].data;
1391                         dw_e_tbl->entry[entry_idx].ref_cnt++;
1392                         rte_errno = EEXIST;
1393                         return -1;
1394                 }
1395                 dw_e_tbl->entry[entry_idx].data = data->dword;
1396                 dw_e_tbl->entry[entry_idx].ref_cnt = 1;
1397                 dw_e_tbl->ref_cnt++;
1398                 break;
1399         case MLX5_L3T_TYPE_QWORD:
1400                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1401                 if (qw_e_tbl->entry[entry_idx].data) {
1402                         data->qword = qw_e_tbl->entry[entry_idx].data;
1403                         qw_e_tbl->entry[entry_idx].ref_cnt++;
1404                         rte_errno = EEXIST;
1405                         return -1;
1406                 }
1407                 qw_e_tbl->entry[entry_idx].data = data->qword;
1408                 qw_e_tbl->entry[entry_idx].ref_cnt = 1;
1409                 qw_e_tbl->ref_cnt++;
1410                 break;
1411         default:
1412                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1413                 if (ptr_e_tbl->entry[entry_idx].data) {
1414                         data->ptr = ptr_e_tbl->entry[entry_idx].data;
1415                         ptr_e_tbl->entry[entry_idx].ref_cnt++;
1416                         rte_errno = EEXIST;
1417                         return -1;
1418                 }
1419                 ptr_e_tbl->entry[entry_idx].data = data->ptr;
1420                 ptr_e_tbl->entry[entry_idx].ref_cnt = 1;
1421                 ptr_e_tbl->ref_cnt++;
1422                 break;
1423         }
1424         return 0;
1425 }
1426
1427 int32_t
1428 mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1429                    union mlx5_l3t_data *data)
1430 {
1431         int ret;
1432
1433         rte_spinlock_lock(&tbl->sl);
1434         ret = __l3t_set_entry(tbl, idx, data);
1435         rte_spinlock_unlock(&tbl->sl);
1436         return ret;
1437 }
1438
1439 int32_t
1440 mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1441                        union mlx5_l3t_data *data,
1442                        mlx5_l3t_alloc_callback_fn cb, void *ctx)
1443 {
1444         int32_t ret;
1445
1446         rte_spinlock_lock(&tbl->sl);
1447         /* Check if entry data is ready. */
1448         ret = __l3t_get_entry(tbl, idx, data);
1449         if (!ret) {
1450                 switch (tbl->type) {
1451                 case MLX5_L3T_TYPE_WORD:
1452                         if (data->word)
1453                                 goto out;
1454                         break;
1455                 case MLX5_L3T_TYPE_DWORD:
1456                         if (data->dword)
1457                                 goto out;
1458                         break;
1459                 case MLX5_L3T_TYPE_QWORD:
1460                         if (data->qword)
1461                                 goto out;
1462                         break;
1463                 default:
1464                         if (data->ptr)
1465                                 goto out;
1466                         break;
1467                 }
1468         }
1469         /* Entry data is not ready, use user callback to create it. */
1470         ret = cb(ctx, data);
1471         if (ret)
1472                 goto out;
1473         /* Save the new allocated data to entry. */
1474         ret = __l3t_set_entry(tbl, idx, data);
1475 out:
1476         rte_spinlock_unlock(&tbl->sl);
1477         return ret;
1478 }