32f8d650736a6c4b6f185f7955c1813eb25c78c2
[dpdk.git] / drivers / net / mlx5 / mlx5_utils.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4
5 #include <rte_malloc.h>
6
7 #include <mlx5_malloc.h>
8
9 #include "mlx5_utils.h"
10
11
12 /********************* Cache list ************************/
13
14 static struct mlx5_cache_entry *
15 mlx5_clist_default_create_cb(struct mlx5_cache_list *list,
16                              struct mlx5_cache_entry *entry __rte_unused,
17                              void *ctx __rte_unused)
18 {
19         return mlx5_malloc(MLX5_MEM_ZERO, list->entry_sz, 0, SOCKET_ID_ANY);
20 }
21
22 static void
23 mlx5_clist_default_remove_cb(struct mlx5_cache_list *list __rte_unused,
24                              struct mlx5_cache_entry *entry)
25 {
26         mlx5_free(entry);
27 }
28
29 int
30 mlx5_cache_list_init(struct mlx5_cache_list *list, const char *name,
31                      uint32_t entry_size, void *ctx,
32                      mlx5_cache_create_cb cb_create,
33                      mlx5_cache_match_cb cb_match,
34                      mlx5_cache_remove_cb cb_remove)
35 {
36         MLX5_ASSERT(list);
37         if (!cb_match || (!cb_create ^ !cb_remove))
38                 return -1;
39         if (name)
40                 snprintf(list->name, sizeof(list->name), "%s", name);
41         list->entry_sz = entry_size;
42         list->ctx = ctx;
43         list->cb_create = cb_create ? cb_create : mlx5_clist_default_create_cb;
44         list->cb_match = cb_match;
45         list->cb_remove = cb_remove ? cb_remove : mlx5_clist_default_remove_cb;
46         rte_rwlock_init(&list->lock);
47         DRV_LOG(DEBUG, "Cache list %s initialized.", list->name);
48         LIST_INIT(&list->head);
49         return 0;
50 }
51
52 static struct mlx5_cache_entry *
53 __cache_lookup(struct mlx5_cache_list *list, void *ctx, bool reuse)
54 {
55         struct mlx5_cache_entry *entry;
56
57         LIST_FOREACH(entry, &list->head, next) {
58                 if (list->cb_match(list, entry, ctx))
59                         continue;
60                 if (reuse) {
61                         __atomic_add_fetch(&entry->ref_cnt, 1,
62                                            __ATOMIC_RELAXED);
63                         DRV_LOG(DEBUG, "Cache list %s entry %p ref++: %u.",
64                                 list->name, (void *)entry, entry->ref_cnt);
65                 }
66                 break;
67         }
68         return entry;
69 }
70
71 static struct mlx5_cache_entry *
72 cache_lookup(struct mlx5_cache_list *list, void *ctx, bool reuse)
73 {
74         struct mlx5_cache_entry *entry;
75
76         rte_rwlock_read_lock(&list->lock);
77         entry = __cache_lookup(list, ctx, reuse);
78         rte_rwlock_read_unlock(&list->lock);
79         return entry;
80 }
81
82 struct mlx5_cache_entry *
83 mlx5_cache_lookup(struct mlx5_cache_list *list, void *ctx)
84 {
85         return cache_lookup(list, ctx, false);
86 }
87
88 struct mlx5_cache_entry *
89 mlx5_cache_register(struct mlx5_cache_list *list, void *ctx)
90 {
91         struct mlx5_cache_entry *entry;
92         uint32_t prev_gen_cnt = 0;
93
94         MLX5_ASSERT(list);
95         prev_gen_cnt = __atomic_load_n(&list->gen_cnt, __ATOMIC_ACQUIRE);
96         /* Lookup with read lock, reuse if found. */
97         entry = cache_lookup(list, ctx, true);
98         if (entry)
99                 return entry;
100         /* Not found, append with write lock - block read from other threads. */
101         rte_rwlock_write_lock(&list->lock);
102         /* If list changed by other threads before lock, search again. */
103         if (prev_gen_cnt != __atomic_load_n(&list->gen_cnt, __ATOMIC_ACQUIRE)) {
104                 /* Lookup and reuse w/o read lock. */
105                 entry = __cache_lookup(list, ctx, true);
106                 if (entry)
107                         goto done;
108         }
109         entry = list->cb_create(list, entry, ctx);
110         if (!entry) {
111                 DRV_LOG(ERR, "Failed to init cache list %s entry %p.",
112                         list->name, (void *)entry);
113                 goto done;
114         }
115         entry->ref_cnt = 1;
116         LIST_INSERT_HEAD(&list->head, entry, next);
117         __atomic_add_fetch(&list->gen_cnt, 1, __ATOMIC_RELEASE);
118         __atomic_add_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
119         DRV_LOG(DEBUG, "Cache list %s entry %p new: %u.",
120                 list->name, (void *)entry, entry->ref_cnt);
121 done:
122         rte_rwlock_write_unlock(&list->lock);
123         return entry;
124 }
125
126 int
127 mlx5_cache_unregister(struct mlx5_cache_list *list,
128                       struct mlx5_cache_entry *entry)
129 {
130         rte_rwlock_write_lock(&list->lock);
131         MLX5_ASSERT(entry && entry->next.le_prev);
132         DRV_LOG(DEBUG, "Cache list %s entry %p ref--: %u.",
133                 list->name, (void *)entry, entry->ref_cnt);
134         if (--entry->ref_cnt) {
135                 rte_rwlock_write_unlock(&list->lock);
136                 return 1;
137         }
138         __atomic_add_fetch(&list->gen_cnt, 1, __ATOMIC_ACQUIRE);
139         __atomic_sub_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
140         LIST_REMOVE(entry, next);
141         list->cb_remove(list, entry);
142         rte_rwlock_write_unlock(&list->lock);
143         DRV_LOG(DEBUG, "Cache list %s entry %p removed.",
144                 list->name, (void *)entry);
145         return 0;
146 }
147
148 void
149 mlx5_cache_list_destroy(struct mlx5_cache_list *list)
150 {
151         struct mlx5_cache_entry *entry;
152
153         MLX5_ASSERT(list);
154         /* no LIST_FOREACH_SAFE, using while instead */
155         while (!LIST_EMPTY(&list->head)) {
156                 entry = LIST_FIRST(&list->head);
157                 LIST_REMOVE(entry, next);
158                 list->cb_remove(list, entry);
159                 DRV_LOG(DEBUG, "Cache list %s entry %p destroyed.",
160                         list->name, (void *)entry);
161         }
162         memset(list, 0, sizeof(*list));
163 }
164
165 uint32_t
166 mlx5_cache_list_get_entry_num(struct mlx5_cache_list *list)
167 {
168         MLX5_ASSERT(list);
169         return __atomic_load_n(&list->count, __ATOMIC_RELAXED);
170 }
171
172 /********************* Indexed pool **********************/
173
174 static inline void
175 mlx5_ipool_lock(struct mlx5_indexed_pool *pool)
176 {
177         if (pool->cfg.need_lock)
178                 rte_spinlock_lock(&pool->rsz_lock);
179 }
180
181 static inline void
182 mlx5_ipool_unlock(struct mlx5_indexed_pool *pool)
183 {
184         if (pool->cfg.need_lock)
185                 rte_spinlock_unlock(&pool->rsz_lock);
186 }
187
188 static inline uint32_t
189 mlx5_trunk_idx_get(struct mlx5_indexed_pool *pool, uint32_t entry_idx)
190 {
191         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
192         uint32_t trunk_idx = 0;
193         uint32_t i;
194
195         if (!cfg->grow_trunk)
196                 return entry_idx / cfg->trunk_size;
197         if (entry_idx >= pool->grow_tbl[cfg->grow_trunk - 1]) {
198                 trunk_idx = (entry_idx - pool->grow_tbl[cfg->grow_trunk - 1]) /
199                             (cfg->trunk_size << (cfg->grow_shift *
200                             cfg->grow_trunk)) + cfg->grow_trunk;
201         } else {
202                 for (i = 0; i < cfg->grow_trunk; i++) {
203                         if (entry_idx < pool->grow_tbl[i])
204                                 break;
205                 }
206                 trunk_idx = i;
207         }
208         return trunk_idx;
209 }
210
211 static inline uint32_t
212 mlx5_trunk_size_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
213 {
214         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
215
216         return cfg->trunk_size << (cfg->grow_shift *
217                (trunk_idx > cfg->grow_trunk ? cfg->grow_trunk : trunk_idx));
218 }
219
220 static inline uint32_t
221 mlx5_trunk_idx_offset_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
222 {
223         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
224         uint32_t offset = 0;
225
226         if (!trunk_idx)
227                 return 0;
228         if (!cfg->grow_trunk)
229                 return cfg->trunk_size * trunk_idx;
230         if (trunk_idx < cfg->grow_trunk)
231                 offset = pool->grow_tbl[trunk_idx - 1];
232         else
233                 offset = pool->grow_tbl[cfg->grow_trunk - 1] +
234                          (cfg->trunk_size << (cfg->grow_shift *
235                          cfg->grow_trunk)) * (trunk_idx - cfg->grow_trunk);
236         return offset;
237 }
238
239 struct mlx5_indexed_pool *
240 mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg)
241 {
242         struct mlx5_indexed_pool *pool;
243         uint32_t i;
244
245         if (!cfg || (!cfg->malloc ^ !cfg->free) ||
246             (cfg->per_core_cache && cfg->release_mem_en) ||
247             (cfg->trunk_size && ((cfg->trunk_size & (cfg->trunk_size - 1)) ||
248             ((__builtin_ffs(cfg->trunk_size) + TRUNK_IDX_BITS) > 32))))
249                 return NULL;
250         pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool) + cfg->grow_trunk *
251                            sizeof(pool->grow_tbl[0]), RTE_CACHE_LINE_SIZE,
252                            SOCKET_ID_ANY);
253         if (!pool)
254                 return NULL;
255         pool->cfg = *cfg;
256         if (!pool->cfg.trunk_size)
257                 pool->cfg.trunk_size = MLX5_IPOOL_DEFAULT_TRUNK_SIZE;
258         if (!cfg->malloc && !cfg->free) {
259                 pool->cfg.malloc = mlx5_malloc;
260                 pool->cfg.free = mlx5_free;
261         }
262         if (pool->cfg.need_lock)
263                 rte_spinlock_init(&pool->rsz_lock);
264         /*
265          * Initialize the dynamic grow trunk size lookup table to have a quick
266          * lookup for the trunk entry index offset.
267          */
268         for (i = 0; i < cfg->grow_trunk; i++) {
269                 pool->grow_tbl[i] = cfg->trunk_size << (cfg->grow_shift * i);
270                 if (i > 0)
271                         pool->grow_tbl[i] += pool->grow_tbl[i - 1];
272         }
273         if (!pool->cfg.max_idx)
274                 pool->cfg.max_idx =
275                         mlx5_trunk_idx_offset_get(pool, TRUNK_MAX_IDX + 1);
276         if (!cfg->per_core_cache)
277                 pool->free_list = TRUNK_INVALID;
278         return pool;
279 }
280
281 static int
282 mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
283 {
284         struct mlx5_indexed_trunk *trunk;
285         struct mlx5_indexed_trunk **trunk_tmp;
286         struct mlx5_indexed_trunk **p;
287         size_t trunk_size = 0;
288         size_t data_size;
289         size_t bmp_size;
290         uint32_t idx, cur_max_idx, i;
291
292         cur_max_idx = mlx5_trunk_idx_offset_get(pool, pool->n_trunk_valid);
293         if (pool->n_trunk_valid == TRUNK_MAX_IDX ||
294             cur_max_idx >= pool->cfg.max_idx)
295                 return -ENOMEM;
296         if (pool->n_trunk_valid == pool->n_trunk) {
297                 /* No free trunk flags, expand trunk list. */
298                 int n_grow = pool->n_trunk_valid ? pool->n_trunk :
299                              RTE_CACHE_LINE_SIZE / sizeof(void *);
300
301                 p = pool->cfg.malloc(0, (pool->n_trunk_valid + n_grow) *
302                                      sizeof(struct mlx5_indexed_trunk *),
303                                      RTE_CACHE_LINE_SIZE, rte_socket_id());
304                 if (!p)
305                         return -ENOMEM;
306                 if (pool->trunks)
307                         memcpy(p, pool->trunks, pool->n_trunk_valid *
308                                sizeof(struct mlx5_indexed_trunk *));
309                 memset(RTE_PTR_ADD(p, pool->n_trunk_valid * sizeof(void *)), 0,
310                        n_grow * sizeof(void *));
311                 trunk_tmp = pool->trunks;
312                 pool->trunks = p;
313                 if (trunk_tmp)
314                         pool->cfg.free(trunk_tmp);
315                 pool->n_trunk += n_grow;
316         }
317         if (!pool->cfg.release_mem_en) {
318                 idx = pool->n_trunk_valid;
319         } else {
320                 /* Find the first available slot in trunk list */
321                 for (idx = 0; idx < pool->n_trunk; idx++)
322                         if (pool->trunks[idx] == NULL)
323                                 break;
324         }
325         trunk_size += sizeof(*trunk);
326         data_size = mlx5_trunk_size_get(pool, idx);
327         bmp_size = rte_bitmap_get_memory_footprint(data_size);
328         /* rte_bitmap requires memory cacheline aligned. */
329         trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
330         trunk_size += bmp_size;
331         trunk = pool->cfg.malloc(0, trunk_size,
332                                  RTE_CACHE_LINE_SIZE, rte_socket_id());
333         if (!trunk)
334                 return -ENOMEM;
335         pool->trunks[idx] = trunk;
336         trunk->idx = idx;
337         trunk->free = data_size;
338         trunk->prev = TRUNK_INVALID;
339         trunk->next = TRUNK_INVALID;
340         MLX5_ASSERT(pool->free_list == TRUNK_INVALID);
341         pool->free_list = idx;
342         /* Mark all entries as available. */
343         trunk->bmp = rte_bitmap_init_with_all_set(data_size, &trunk->data
344                      [RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size)],
345                      bmp_size);
346         /* Clear the overhead bits in the trunk if it happens. */
347         if (cur_max_idx + data_size > pool->cfg.max_idx) {
348                 for (i = pool->cfg.max_idx - cur_max_idx; i < data_size; i++)
349                         rte_bitmap_clear(trunk->bmp, i);
350         }
351         MLX5_ASSERT(trunk->bmp);
352         pool->n_trunk_valid++;
353 #ifdef POOL_DEBUG
354         pool->trunk_new++;
355         pool->trunk_avail++;
356 #endif
357         return 0;
358 }
359
360 static inline struct mlx5_indexed_cache *
361 mlx5_ipool_update_global_cache(struct mlx5_indexed_pool *pool, int cidx)
362 {
363         struct mlx5_indexed_cache *gc, *lc, *olc = NULL;
364
365         lc = pool->cache[cidx]->lc;
366         gc = __atomic_load_n(&pool->gc, __ATOMIC_RELAXED);
367         if (gc && lc != gc) {
368                 mlx5_ipool_lock(pool);
369                 if (lc && !(--lc->ref_cnt))
370                         olc = lc;
371                 lc = pool->gc;
372                 lc->ref_cnt++;
373                 pool->cache[cidx]->lc = lc;
374                 mlx5_ipool_unlock(pool);
375                 if (olc)
376                         pool->cfg.free(olc);
377         }
378         return lc;
379 }
380
381 static uint32_t
382 mlx5_ipool_allocate_from_global(struct mlx5_indexed_pool *pool, int cidx)
383 {
384         struct mlx5_indexed_trunk *trunk;
385         struct mlx5_indexed_cache *p, *lc, *olc = NULL;
386         size_t trunk_size = 0;
387         size_t data_size;
388         uint32_t cur_max_idx, trunk_idx, trunk_n;
389         uint32_t fetch_size, ts_idx, i;
390         int n_grow;
391
392 check_again:
393         p = NULL;
394         fetch_size = 0;
395         /*
396          * Fetch new index from global if possible. First round local
397          * cache will be NULL.
398          */
399         lc = pool->cache[cidx]->lc;
400         mlx5_ipool_lock(pool);
401         /* Try to update local cache first. */
402         if (likely(pool->gc)) {
403                 if (lc != pool->gc) {
404                         if (lc && !(--lc->ref_cnt))
405                                 olc = lc;
406                         lc = pool->gc;
407                         lc->ref_cnt++;
408                         pool->cache[cidx]->lc = lc;
409                 }
410                 if (lc->len) {
411                         /* Use the updated local cache to fetch index. */
412                         fetch_size = pool->cfg.per_core_cache >> 2;
413                         if (lc->len < fetch_size)
414                                 fetch_size = lc->len;
415                         lc->len -= fetch_size;
416                         memcpy(pool->cache[cidx]->idx, &lc->idx[lc->len],
417                                sizeof(uint32_t) * fetch_size);
418                 }
419         }
420         mlx5_ipool_unlock(pool);
421         if (unlikely(olc)) {
422                 pool->cfg.free(olc);
423                 olc = NULL;
424         }
425         if (fetch_size) {
426                 pool->cache[cidx]->len = fetch_size - 1;
427                 return pool->cache[cidx]->idx[pool->cache[cidx]->len];
428         }
429         trunk_idx = lc ? __atomic_load_n(&lc->n_trunk_valid,
430                          __ATOMIC_ACQUIRE) : 0;
431         trunk_n = lc ? lc->n_trunk : 0;
432         cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx);
433         /* Check if index reach maximum. */
434         if (trunk_idx == TRUNK_MAX_IDX ||
435             cur_max_idx >= pool->cfg.max_idx)
436                 return 0;
437         /* No enough space in trunk array, resize the trunks array. */
438         if (trunk_idx == trunk_n) {
439                 n_grow = trunk_idx ? trunk_idx :
440                              RTE_CACHE_LINE_SIZE / sizeof(void *);
441                 cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_n + n_grow);
442                 /* Resize the trunk array. */
443                 p = pool->cfg.malloc(0, ((trunk_idx + n_grow) *
444                         sizeof(struct mlx5_indexed_trunk *)) +
445                         (cur_max_idx * sizeof(uint32_t)) + sizeof(*p),
446                         RTE_CACHE_LINE_SIZE, rte_socket_id());
447                 if (!p)
448                         return 0;
449                 p->trunks = (struct mlx5_indexed_trunk **)&p->idx[cur_max_idx];
450                 if (lc)
451                         memcpy(p->trunks, lc->trunks, trunk_idx *
452                        sizeof(struct mlx5_indexed_trunk *));
453 #ifdef RTE_LIBRTE_MLX5_DEBUG
454                 memset(RTE_PTR_ADD(p->trunks, trunk_idx * sizeof(void *)), 0,
455                         n_grow * sizeof(void *));
456 #endif
457                 p->n_trunk_valid = trunk_idx;
458                 p->n_trunk = trunk_n + n_grow;
459                 p->len = 0;
460         }
461         /* Prepare the new trunk. */
462         trunk_size = sizeof(*trunk);
463         data_size = mlx5_trunk_size_get(pool, trunk_idx);
464         trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
465         trunk = pool->cfg.malloc(0, trunk_size,
466                                  RTE_CACHE_LINE_SIZE, rte_socket_id());
467         if (unlikely(!trunk)) {
468                 pool->cfg.free(p);
469                 return 0;
470         }
471         trunk->idx = trunk_idx;
472         trunk->free = data_size;
473         mlx5_ipool_lock(pool);
474         /*
475          * Double check if trunks has been updated or have available index.
476          * During the new trunk allocate, index may still be flushed to the
477          * global cache. So also need to check the pool->gc->len.
478          */
479         if (pool->gc && (lc != pool->gc ||
480             lc->n_trunk_valid != trunk_idx ||
481             pool->gc->len)) {
482                 mlx5_ipool_unlock(pool);
483                 if (p)
484                         pool->cfg.free(p);
485                 pool->cfg.free(trunk);
486                 goto check_again;
487         }
488         /* Resize the trunk array and update local cache first.  */
489         if (p) {
490                 if (lc && !(--lc->ref_cnt))
491                         olc = lc;
492                 lc = p;
493                 lc->ref_cnt = 1;
494                 pool->cache[cidx]->lc = lc;
495                 __atomic_store_n(&pool->gc, p, __ATOMIC_RELAXED);
496         }
497         /* Add trunk to trunks array. */
498         lc->trunks[trunk_idx] = trunk;
499         __atomic_fetch_add(&lc->n_trunk_valid, 1, __ATOMIC_RELAXED);
500         /* Enqueue half of the index to global. */
501         ts_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx) + 1;
502         fetch_size = trunk->free >> 1;
503         for (i = 0; i < fetch_size; i++)
504                 lc->idx[i] = ts_idx + i;
505         lc->len = fetch_size;
506         mlx5_ipool_unlock(pool);
507         /* Copy left half - 1 to local cache index array. */
508         pool->cache[cidx]->len = trunk->free - fetch_size - 1;
509         ts_idx += fetch_size;
510         for (i = 0; i < pool->cache[cidx]->len; i++)
511                 pool->cache[cidx]->idx[i] = ts_idx + i;
512         if (olc)
513                 pool->cfg.free(olc);
514         return ts_idx + i;
515 }
516
517 static void *
518 mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
519 {
520         struct mlx5_indexed_trunk *trunk;
521         struct mlx5_indexed_cache *lc;
522         uint32_t trunk_idx;
523         uint32_t entry_idx;
524         int cidx;
525
526         MLX5_ASSERT(idx);
527         cidx = rte_lcore_index(rte_lcore_id());
528         if (unlikely(cidx == -1)) {
529                 rte_errno = ENOTSUP;
530                 return NULL;
531         }
532         if (unlikely(!pool->cache[cidx])) {
533                 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
534                         sizeof(struct mlx5_ipool_per_lcore) +
535                         (pool->cfg.per_core_cache * sizeof(uint32_t)),
536                         RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
537                 if (!pool->cache[cidx]) {
538                         DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
539                         return NULL;
540                 }
541         }
542         lc = mlx5_ipool_update_global_cache(pool, cidx);
543         idx -= 1;
544         trunk_idx = mlx5_trunk_idx_get(pool, idx);
545         trunk = lc->trunks[trunk_idx];
546         MLX5_ASSERT(trunk);
547         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk_idx);
548         return &trunk->data[entry_idx * pool->cfg.size];
549 }
550
551 static void *
552 mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, uint32_t *idx)
553 {
554         int cidx;
555
556         cidx = rte_lcore_index(rte_lcore_id());
557         if (unlikely(cidx == -1)) {
558                 rte_errno = ENOTSUP;
559                 return NULL;
560         }
561         if (unlikely(!pool->cache[cidx])) {
562                 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
563                         sizeof(struct mlx5_ipool_per_lcore) +
564                         (pool->cfg.per_core_cache * sizeof(uint32_t)),
565                         RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
566                 if (!pool->cache[cidx]) {
567                         DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
568                         return NULL;
569                 }
570         } else if (pool->cache[cidx]->len) {
571                 pool->cache[cidx]->len--;
572                 *idx = pool->cache[cidx]->idx[pool->cache[cidx]->len];
573                 return mlx5_ipool_get_cache(pool, *idx);
574         }
575         /* Not enough idx in global cache. Keep fetching from global. */
576         *idx = mlx5_ipool_allocate_from_global(pool, cidx);
577         if (unlikely(!(*idx)))
578                 return NULL;
579         return mlx5_ipool_get_cache(pool, *idx);
580 }
581
582 static void
583 mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
584 {
585         int cidx;
586         struct mlx5_ipool_per_lcore *ilc;
587         struct mlx5_indexed_cache *gc, *olc = NULL;
588         uint32_t reclaim_num = 0;
589
590         MLX5_ASSERT(idx);
591         cidx = rte_lcore_index(rte_lcore_id());
592         if (unlikely(cidx == -1)) {
593                 rte_errno = ENOTSUP;
594                 return;
595         }
596         /*
597          * When index was allocated on core A but freed on core B. In this
598          * case check if local cache on core B was allocated before.
599          */
600         if (unlikely(!pool->cache[cidx])) {
601                 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
602                         sizeof(struct mlx5_ipool_per_lcore) +
603                         (pool->cfg.per_core_cache * sizeof(uint32_t)),
604                         RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
605                 if (!pool->cache[cidx]) {
606                         DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
607                         return;
608                 }
609         }
610         /* Try to enqueue to local index cache. */
611         if (pool->cache[cidx]->len < pool->cfg.per_core_cache) {
612                 pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
613                 pool->cache[cidx]->len++;
614                 return;
615         }
616         ilc = pool->cache[cidx];
617         reclaim_num = pool->cfg.per_core_cache >> 2;
618         ilc->len -= reclaim_num;
619         /* Local index cache full, try with global index cache. */
620         mlx5_ipool_lock(pool);
621         gc = pool->gc;
622         if (ilc->lc != gc) {
623                 if (!(--ilc->lc->ref_cnt))
624                         olc = ilc->lc;
625                 gc->ref_cnt++;
626                 ilc->lc = gc;
627         }
628         memcpy(&gc->idx[gc->len], &ilc->idx[ilc->len],
629                reclaim_num * sizeof(uint32_t));
630         gc->len += reclaim_num;
631         mlx5_ipool_unlock(pool);
632         if (olc)
633                 pool->cfg.free(olc);
634         pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
635         pool->cache[cidx]->len++;
636 }
637
638 void *
639 mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
640 {
641         struct mlx5_indexed_trunk *trunk;
642         uint64_t slab = 0;
643         uint32_t iidx = 0;
644         void *p;
645
646         if (pool->cfg.per_core_cache)
647                 return mlx5_ipool_malloc_cache(pool, idx);
648         mlx5_ipool_lock(pool);
649         if (pool->free_list == TRUNK_INVALID) {
650                 /* If no available trunks, grow new. */
651                 if (mlx5_ipool_grow(pool)) {
652                         mlx5_ipool_unlock(pool);
653                         return NULL;
654                 }
655         }
656         MLX5_ASSERT(pool->free_list != TRUNK_INVALID);
657         trunk = pool->trunks[pool->free_list];
658         MLX5_ASSERT(trunk->free);
659         if (!rte_bitmap_scan(trunk->bmp, &iidx, &slab)) {
660                 mlx5_ipool_unlock(pool);
661                 return NULL;
662         }
663         MLX5_ASSERT(slab);
664         iidx += __builtin_ctzll(slab);
665         MLX5_ASSERT(iidx != UINT32_MAX);
666         MLX5_ASSERT(iidx < mlx5_trunk_size_get(pool, trunk->idx));
667         rte_bitmap_clear(trunk->bmp, iidx);
668         p = &trunk->data[iidx * pool->cfg.size];
669         /*
670          * The ipool index should grow continually from small to big,
671          * some features as metering only accept limited bits of index.
672          * Random index with MSB set may be rejected.
673          */
674         iidx += mlx5_trunk_idx_offset_get(pool, trunk->idx);
675         iidx += 1; /* non-zero index. */
676         trunk->free--;
677 #ifdef POOL_DEBUG
678         pool->n_entry++;
679 #endif
680         if (!trunk->free) {
681                 /* Full trunk will be removed from free list in imalloc. */
682                 MLX5_ASSERT(pool->free_list == trunk->idx);
683                 pool->free_list = trunk->next;
684                 if (trunk->next != TRUNK_INVALID)
685                         pool->trunks[trunk->next]->prev = TRUNK_INVALID;
686                 trunk->prev = TRUNK_INVALID;
687                 trunk->next = TRUNK_INVALID;
688 #ifdef POOL_DEBUG
689                 pool->trunk_empty++;
690                 pool->trunk_avail--;
691 #endif
692         }
693         *idx = iidx;
694         mlx5_ipool_unlock(pool);
695         return p;
696 }
697
698 void *
699 mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
700 {
701         void *entry = mlx5_ipool_malloc(pool, idx);
702
703         if (entry && pool->cfg.size)
704                 memset(entry, 0, pool->cfg.size);
705         return entry;
706 }
707
708 void
709 mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx)
710 {
711         struct mlx5_indexed_trunk *trunk;
712         uint32_t trunk_idx;
713         uint32_t entry_idx;
714
715         if (!idx)
716                 return;
717         if (pool->cfg.per_core_cache) {
718                 mlx5_ipool_free_cache(pool, idx);
719                 return;
720         }
721         idx -= 1;
722         mlx5_ipool_lock(pool);
723         trunk_idx = mlx5_trunk_idx_get(pool, idx);
724         if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
725             (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
726                 goto out;
727         trunk = pool->trunks[trunk_idx];
728         if (!trunk)
729                 goto out;
730         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
731         if (trunk_idx != trunk->idx ||
732             rte_bitmap_get(trunk->bmp, entry_idx))
733                 goto out;
734         rte_bitmap_set(trunk->bmp, entry_idx);
735         trunk->free++;
736         if (pool->cfg.release_mem_en && trunk->free == mlx5_trunk_size_get
737            (pool, trunk->idx)) {
738                 if (pool->free_list == trunk->idx)
739                         pool->free_list = trunk->next;
740                 if (trunk->next != TRUNK_INVALID)
741                         pool->trunks[trunk->next]->prev = trunk->prev;
742                 if (trunk->prev != TRUNK_INVALID)
743                         pool->trunks[trunk->prev]->next = trunk->next;
744                 pool->cfg.free(trunk);
745                 pool->trunks[trunk_idx] = NULL;
746                 pool->n_trunk_valid--;
747 #ifdef POOL_DEBUG
748                 pool->trunk_avail--;
749                 pool->trunk_free++;
750 #endif
751                 if (pool->n_trunk_valid == 0) {
752                         pool->cfg.free(pool->trunks);
753                         pool->trunks = NULL;
754                         pool->n_trunk = 0;
755                 }
756         } else if (trunk->free == 1) {
757                 /* Put into free trunk list head. */
758                 MLX5_ASSERT(pool->free_list != trunk->idx);
759                 trunk->next = pool->free_list;
760                 trunk->prev = TRUNK_INVALID;
761                 if (pool->free_list != TRUNK_INVALID)
762                         pool->trunks[pool->free_list]->prev = trunk->idx;
763                 pool->free_list = trunk->idx;
764 #ifdef POOL_DEBUG
765                 pool->trunk_empty--;
766                 pool->trunk_avail++;
767 #endif
768         }
769 #ifdef POOL_DEBUG
770         pool->n_entry--;
771 #endif
772 out:
773         mlx5_ipool_unlock(pool);
774 }
775
776 void *
777 mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx)
778 {
779         struct mlx5_indexed_trunk *trunk;
780         void *p = NULL;
781         uint32_t trunk_idx;
782         uint32_t entry_idx;
783
784         if (!idx)
785                 return NULL;
786         if (pool->cfg.per_core_cache)
787                 return mlx5_ipool_get_cache(pool, idx);
788         idx -= 1;
789         mlx5_ipool_lock(pool);
790         trunk_idx = mlx5_trunk_idx_get(pool, idx);
791         if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
792             (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
793                 goto out;
794         trunk = pool->trunks[trunk_idx];
795         if (!trunk)
796                 goto out;
797         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
798         if (trunk_idx != trunk->idx ||
799             rte_bitmap_get(trunk->bmp, entry_idx))
800                 goto out;
801         p = &trunk->data[entry_idx * pool->cfg.size];
802 out:
803         mlx5_ipool_unlock(pool);
804         return p;
805 }
806
807 int
808 mlx5_ipool_destroy(struct mlx5_indexed_pool *pool)
809 {
810         struct mlx5_indexed_trunk **trunks = NULL;
811         struct mlx5_indexed_cache *gc = pool->gc;
812         uint32_t i, n_trunk_valid = 0;
813
814         MLX5_ASSERT(pool);
815         mlx5_ipool_lock(pool);
816         if (pool->cfg.per_core_cache) {
817                 for (i = 0; i < RTE_MAX_LCORE; i++) {
818                         /*
819                          * Free only old global cache. Pool gc will be
820                          * freed at last.
821                          */
822                         if (pool->cache[i]) {
823                                 if (pool->cache[i]->lc &&
824                                     pool->cache[i]->lc != pool->gc &&
825                                     (!(--pool->cache[i]->lc->ref_cnt)))
826                                         pool->cfg.free(pool->cache[i]->lc);
827                                 pool->cfg.free(pool->cache[i]);
828                         }
829                 }
830                 if (gc) {
831                         trunks = gc->trunks;
832                         n_trunk_valid = gc->n_trunk_valid;
833                 }
834         } else {
835                 gc = NULL;
836                 trunks = pool->trunks;
837                 n_trunk_valid = pool->n_trunk_valid;
838         }
839         for (i = 0; i < n_trunk_valid; i++) {
840                 if (trunks[i])
841                         pool->cfg.free(trunks[i]);
842         }
843         if (!gc && trunks)
844                 pool->cfg.free(trunks);
845         if (gc)
846                 pool->cfg.free(gc);
847         mlx5_ipool_unlock(pool);
848         mlx5_free(pool);
849         return 0;
850 }
851
852 void
853 mlx5_ipool_flush_cache(struct mlx5_indexed_pool *pool)
854 {
855         uint32_t i, j;
856         struct mlx5_indexed_cache *gc;
857         struct rte_bitmap *ibmp;
858         uint32_t bmp_num, mem_size;
859
860         if (!pool->cfg.per_core_cache)
861                 return;
862         gc = pool->gc;
863         if (!gc)
864                 return;
865         /* Reset bmp. */
866         bmp_num = mlx5_trunk_idx_offset_get(pool, gc->n_trunk_valid);
867         mem_size = rte_bitmap_get_memory_footprint(bmp_num);
868         pool->bmp_mem = pool->cfg.malloc(MLX5_MEM_ZERO, mem_size,
869                                          RTE_CACHE_LINE_SIZE, rte_socket_id());
870         if (!pool->bmp_mem) {
871                 DRV_LOG(ERR, "Ipool bitmap mem allocate failed.\n");
872                 return;
873         }
874         ibmp = rte_bitmap_init_with_all_set(bmp_num, pool->bmp_mem, mem_size);
875         if (!ibmp) {
876                 pool->cfg.free(pool->bmp_mem);
877                 pool->bmp_mem = NULL;
878                 DRV_LOG(ERR, "Ipool bitmap create failed.\n");
879                 return;
880         }
881         pool->ibmp = ibmp;
882         /* Clear global cache. */
883         for (i = 0; i < gc->len; i++)
884                 rte_bitmap_clear(ibmp, gc->idx[i] - 1);
885         /* Clear core cache. */
886         for (i = 0; i < RTE_MAX_LCORE; i++) {
887                 struct mlx5_ipool_per_lcore *ilc = pool->cache[i];
888
889                 if (!ilc)
890                         continue;
891                 for (j = 0; j < ilc->len; j++)
892                         rte_bitmap_clear(ibmp, ilc->idx[j] - 1);
893         }
894 }
895
896 static void *
897 mlx5_ipool_get_next_cache(struct mlx5_indexed_pool *pool, uint32_t *pos)
898 {
899         struct rte_bitmap *ibmp;
900         uint64_t slab = 0;
901         uint32_t iidx = *pos;
902
903         ibmp = pool->ibmp;
904         if (!ibmp || !rte_bitmap_scan(ibmp, &iidx, &slab)) {
905                 if (pool->bmp_mem) {
906                         pool->cfg.free(pool->bmp_mem);
907                         pool->bmp_mem = NULL;
908                         pool->ibmp = NULL;
909                 }
910                 return NULL;
911         }
912         iidx += __builtin_ctzll(slab);
913         rte_bitmap_clear(ibmp, iidx);
914         iidx++;
915         *pos = iidx;
916         return mlx5_ipool_get_cache(pool, iidx);
917 }
918
919 void *
920 mlx5_ipool_get_next(struct mlx5_indexed_pool *pool, uint32_t *pos)
921 {
922         uint32_t idx = *pos;
923         void *entry;
924
925         if (pool->cfg.per_core_cache)
926                 return mlx5_ipool_get_next_cache(pool, pos);
927         while (idx <= mlx5_trunk_idx_offset_get(pool, pool->n_trunk)) {
928                 entry = mlx5_ipool_get(pool, idx);
929                 if (entry) {
930                         *pos = idx;
931                         return entry;
932                 }
933                 idx++;
934         }
935         return NULL;
936 }
937
938 void
939 mlx5_ipool_dump(struct mlx5_indexed_pool *pool)
940 {
941         printf("Pool %s entry size %u, trunks %u, %d entry per trunk, "
942                "total: %d\n",
943                pool->cfg.type, pool->cfg.size, pool->n_trunk_valid,
944                pool->cfg.trunk_size, pool->n_trunk_valid);
945 #ifdef POOL_DEBUG
946         printf("Pool %s entry %u, trunk alloc %u, empty: %u, "
947                "available %u free %u\n",
948                pool->cfg.type, pool->n_entry, pool->trunk_new,
949                pool->trunk_empty, pool->trunk_avail, pool->trunk_free);
950 #endif
951 }
952
953 struct mlx5_l3t_tbl *
954 mlx5_l3t_create(enum mlx5_l3t_type type)
955 {
956         struct mlx5_l3t_tbl *tbl;
957         struct mlx5_indexed_pool_config l3t_ip_cfg = {
958                 .trunk_size = 16,
959                 .grow_trunk = 6,
960                 .grow_shift = 1,
961                 .need_lock = 0,
962                 .release_mem_en = 1,
963                 .malloc = mlx5_malloc,
964                 .free = mlx5_free,
965         };
966
967         if (type >= MLX5_L3T_TYPE_MAX) {
968                 rte_errno = EINVAL;
969                 return NULL;
970         }
971         tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_l3t_tbl), 1,
972                           SOCKET_ID_ANY);
973         if (!tbl) {
974                 rte_errno = ENOMEM;
975                 return NULL;
976         }
977         tbl->type = type;
978         switch (type) {
979         case MLX5_L3T_TYPE_WORD:
980                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word);
981                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
982                 break;
983         case MLX5_L3T_TYPE_DWORD:
984                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword);
985                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
986                 break;
987         case MLX5_L3T_TYPE_QWORD:
988                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword);
989                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
990                 break;
991         default:
992                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr);
993                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
994                 break;
995         }
996         rte_spinlock_init(&tbl->sl);
997         tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
998         if (!tbl->eip) {
999                 rte_errno = ENOMEM;
1000                 mlx5_free(tbl);
1001                 tbl = NULL;
1002         }
1003         return tbl;
1004 }
1005
1006 void
1007 mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl)
1008 {
1009         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1010         uint32_t i, j;
1011
1012         if (!tbl)
1013                 return;
1014         g_tbl = tbl->tbl;
1015         if (g_tbl) {
1016                 for (i = 0; i < MLX5_L3T_GT_SIZE; i++) {
1017                         m_tbl = g_tbl->tbl[i];
1018                         if (!m_tbl)
1019                                 continue;
1020                         for (j = 0; j < MLX5_L3T_MT_SIZE; j++) {
1021                                 if (!m_tbl->tbl[j])
1022                                         continue;
1023                                 MLX5_ASSERT(!((struct mlx5_l3t_entry_word *)
1024                                             m_tbl->tbl[j])->ref_cnt);
1025                                 mlx5_ipool_free(tbl->eip,
1026                                                 ((struct mlx5_l3t_entry_word *)
1027                                                 m_tbl->tbl[j])->idx);
1028                                 m_tbl->tbl[j] = 0;
1029                                 if (!(--m_tbl->ref_cnt))
1030                                         break;
1031                         }
1032                         MLX5_ASSERT(!m_tbl->ref_cnt);
1033                         mlx5_free(g_tbl->tbl[i]);
1034                         g_tbl->tbl[i] = 0;
1035                         if (!(--g_tbl->ref_cnt))
1036                                 break;
1037                 }
1038                 MLX5_ASSERT(!g_tbl->ref_cnt);
1039                 mlx5_free(tbl->tbl);
1040                 tbl->tbl = 0;
1041         }
1042         mlx5_ipool_destroy(tbl->eip);
1043         mlx5_free(tbl);
1044 }
1045
1046 static int32_t
1047 __l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1048                 union mlx5_l3t_data *data)
1049 {
1050         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1051         struct mlx5_l3t_entry_word *w_e_tbl;
1052         struct mlx5_l3t_entry_dword *dw_e_tbl;
1053         struct mlx5_l3t_entry_qword *qw_e_tbl;
1054         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1055         void *e_tbl;
1056         uint32_t entry_idx;
1057
1058         g_tbl = tbl->tbl;
1059         if (!g_tbl)
1060                 return -1;
1061         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1062         if (!m_tbl)
1063                 return -1;
1064         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1065         if (!e_tbl)
1066                 return -1;
1067         entry_idx = idx & MLX5_L3T_ET_MASK;
1068         switch (tbl->type) {
1069         case MLX5_L3T_TYPE_WORD:
1070                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1071                 data->word = w_e_tbl->entry[entry_idx].data;
1072                 if (w_e_tbl->entry[entry_idx].data)
1073                         w_e_tbl->entry[entry_idx].ref_cnt++;
1074                 break;
1075         case MLX5_L3T_TYPE_DWORD:
1076                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1077                 data->dword = dw_e_tbl->entry[entry_idx].data;
1078                 if (dw_e_tbl->entry[entry_idx].data)
1079                         dw_e_tbl->entry[entry_idx].ref_cnt++;
1080                 break;
1081         case MLX5_L3T_TYPE_QWORD:
1082                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1083                 data->qword = qw_e_tbl->entry[entry_idx].data;
1084                 if (qw_e_tbl->entry[entry_idx].data)
1085                         qw_e_tbl->entry[entry_idx].ref_cnt++;
1086                 break;
1087         default:
1088                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1089                 data->ptr = ptr_e_tbl->entry[entry_idx].data;
1090                 if (ptr_e_tbl->entry[entry_idx].data)
1091                         ptr_e_tbl->entry[entry_idx].ref_cnt++;
1092                 break;
1093         }
1094         return 0;
1095 }
1096
1097 int32_t
1098 mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1099                    union mlx5_l3t_data *data)
1100 {
1101         int ret;
1102
1103         rte_spinlock_lock(&tbl->sl);
1104         ret = __l3t_get_entry(tbl, idx, data);
1105         rte_spinlock_unlock(&tbl->sl);
1106         return ret;
1107 }
1108
1109 int32_t
1110 mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
1111 {
1112         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1113         struct mlx5_l3t_entry_word *w_e_tbl;
1114         struct mlx5_l3t_entry_dword *dw_e_tbl;
1115         struct mlx5_l3t_entry_qword *qw_e_tbl;
1116         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1117         void *e_tbl;
1118         uint32_t entry_idx;
1119         uint64_t ref_cnt;
1120         int32_t ret = -1;
1121
1122         rte_spinlock_lock(&tbl->sl);
1123         g_tbl = tbl->tbl;
1124         if (!g_tbl)
1125                 goto out;
1126         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1127         if (!m_tbl)
1128                 goto out;
1129         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1130         if (!e_tbl)
1131                 goto out;
1132         entry_idx = idx & MLX5_L3T_ET_MASK;
1133         switch (tbl->type) {
1134         case MLX5_L3T_TYPE_WORD:
1135                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1136                 MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt);
1137                 ret = --w_e_tbl->entry[entry_idx].ref_cnt;
1138                 if (ret)
1139                         goto out;
1140                 w_e_tbl->entry[entry_idx].data = 0;
1141                 ref_cnt = --w_e_tbl->ref_cnt;
1142                 break;
1143         case MLX5_L3T_TYPE_DWORD:
1144                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1145                 MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt);
1146                 ret = --dw_e_tbl->entry[entry_idx].ref_cnt;
1147                 if (ret)
1148                         goto out;
1149                 dw_e_tbl->entry[entry_idx].data = 0;
1150                 ref_cnt = --dw_e_tbl->ref_cnt;
1151                 break;
1152         case MLX5_L3T_TYPE_QWORD:
1153                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1154                 MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt);
1155                 ret = --qw_e_tbl->entry[entry_idx].ref_cnt;
1156                 if (ret)
1157                         goto out;
1158                 qw_e_tbl->entry[entry_idx].data = 0;
1159                 ref_cnt = --qw_e_tbl->ref_cnt;
1160                 break;
1161         default:
1162                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1163                 MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt);
1164                 ret = --ptr_e_tbl->entry[entry_idx].ref_cnt;
1165                 if (ret)
1166                         goto out;
1167                 ptr_e_tbl->entry[entry_idx].data = NULL;
1168                 ref_cnt = --ptr_e_tbl->ref_cnt;
1169                 break;
1170         }
1171         if (!ref_cnt) {
1172                 mlx5_ipool_free(tbl->eip,
1173                                 ((struct mlx5_l3t_entry_word *)e_tbl)->idx);
1174                 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1175                                                                         NULL;
1176                 if (!(--m_tbl->ref_cnt)) {
1177                         mlx5_free(m_tbl);
1178                         g_tbl->tbl
1179                         [(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] = NULL;
1180                         if (!(--g_tbl->ref_cnt)) {
1181                                 mlx5_free(g_tbl);
1182                                 tbl->tbl = 0;
1183                         }
1184                 }
1185         }
1186 out:
1187         rte_spinlock_unlock(&tbl->sl);
1188         return ret;
1189 }
1190
1191 static int32_t
1192 __l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1193                 union mlx5_l3t_data *data)
1194 {
1195         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1196         struct mlx5_l3t_entry_word *w_e_tbl;
1197         struct mlx5_l3t_entry_dword *dw_e_tbl;
1198         struct mlx5_l3t_entry_qword *qw_e_tbl;
1199         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1200         void *e_tbl;
1201         uint32_t entry_idx, tbl_idx = 0;
1202
1203         /* Check the global table, create it if empty. */
1204         g_tbl = tbl->tbl;
1205         if (!g_tbl) {
1206                 g_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1207                                     sizeof(struct mlx5_l3t_level_tbl) +
1208                                     sizeof(void *) * MLX5_L3T_GT_SIZE, 1,
1209                                     SOCKET_ID_ANY);
1210                 if (!g_tbl) {
1211                         rte_errno = ENOMEM;
1212                         return -1;
1213                 }
1214                 tbl->tbl = g_tbl;
1215         }
1216         /*
1217          * Check the middle table, create it if empty. Ref_cnt will be
1218          * increased if new sub table created.
1219          */
1220         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1221         if (!m_tbl) {
1222                 m_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1223                                     sizeof(struct mlx5_l3t_level_tbl) +
1224                                     sizeof(void *) * MLX5_L3T_MT_SIZE, 1,
1225                                     SOCKET_ID_ANY);
1226                 if (!m_tbl) {
1227                         rte_errno = ENOMEM;
1228                         return -1;
1229                 }
1230                 g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] =
1231                                                                         m_tbl;
1232                 g_tbl->ref_cnt++;
1233         }
1234         /*
1235          * Check the entry table, create it if empty. Ref_cnt will be
1236          * increased if new sub entry table created.
1237          */
1238         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1239         if (!e_tbl) {
1240                 e_tbl = mlx5_ipool_zmalloc(tbl->eip, &tbl_idx);
1241                 if (!e_tbl) {
1242                         rte_errno = ENOMEM;
1243                         return -1;
1244                 }
1245                 ((struct mlx5_l3t_entry_word *)e_tbl)->idx = tbl_idx;
1246                 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1247                                                                         e_tbl;
1248                 m_tbl->ref_cnt++;
1249         }
1250         entry_idx = idx & MLX5_L3T_ET_MASK;
1251         switch (tbl->type) {
1252         case MLX5_L3T_TYPE_WORD:
1253                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1254                 if (w_e_tbl->entry[entry_idx].data) {
1255                         data->word = w_e_tbl->entry[entry_idx].data;
1256                         w_e_tbl->entry[entry_idx].ref_cnt++;
1257                         rte_errno = EEXIST;
1258                         return -1;
1259                 }
1260                 w_e_tbl->entry[entry_idx].data = data->word;
1261                 w_e_tbl->entry[entry_idx].ref_cnt = 1;
1262                 w_e_tbl->ref_cnt++;
1263                 break;
1264         case MLX5_L3T_TYPE_DWORD:
1265                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1266                 if (dw_e_tbl->entry[entry_idx].data) {
1267                         data->dword = dw_e_tbl->entry[entry_idx].data;
1268                         dw_e_tbl->entry[entry_idx].ref_cnt++;
1269                         rte_errno = EEXIST;
1270                         return -1;
1271                 }
1272                 dw_e_tbl->entry[entry_idx].data = data->dword;
1273                 dw_e_tbl->entry[entry_idx].ref_cnt = 1;
1274                 dw_e_tbl->ref_cnt++;
1275                 break;
1276         case MLX5_L3T_TYPE_QWORD:
1277                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1278                 if (qw_e_tbl->entry[entry_idx].data) {
1279                         data->qword = qw_e_tbl->entry[entry_idx].data;
1280                         qw_e_tbl->entry[entry_idx].ref_cnt++;
1281                         rte_errno = EEXIST;
1282                         return -1;
1283                 }
1284                 qw_e_tbl->entry[entry_idx].data = data->qword;
1285                 qw_e_tbl->entry[entry_idx].ref_cnt = 1;
1286                 qw_e_tbl->ref_cnt++;
1287                 break;
1288         default:
1289                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1290                 if (ptr_e_tbl->entry[entry_idx].data) {
1291                         data->ptr = ptr_e_tbl->entry[entry_idx].data;
1292                         ptr_e_tbl->entry[entry_idx].ref_cnt++;
1293                         rte_errno = EEXIST;
1294                         return -1;
1295                 }
1296                 ptr_e_tbl->entry[entry_idx].data = data->ptr;
1297                 ptr_e_tbl->entry[entry_idx].ref_cnt = 1;
1298                 ptr_e_tbl->ref_cnt++;
1299                 break;
1300         }
1301         return 0;
1302 }
1303
1304 int32_t
1305 mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1306                    union mlx5_l3t_data *data)
1307 {
1308         int ret;
1309
1310         rte_spinlock_lock(&tbl->sl);
1311         ret = __l3t_set_entry(tbl, idx, data);
1312         rte_spinlock_unlock(&tbl->sl);
1313         return ret;
1314 }
1315
1316 int32_t
1317 mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1318                        union mlx5_l3t_data *data,
1319                        mlx5_l3t_alloc_callback_fn cb, void *ctx)
1320 {
1321         int32_t ret;
1322
1323         rte_spinlock_lock(&tbl->sl);
1324         /* Check if entry data is ready. */
1325         ret = __l3t_get_entry(tbl, idx, data);
1326         if (!ret) {
1327                 switch (tbl->type) {
1328                 case MLX5_L3T_TYPE_WORD:
1329                         if (data->word)
1330                                 goto out;
1331                         break;
1332                 case MLX5_L3T_TYPE_DWORD:
1333                         if (data->dword)
1334                                 goto out;
1335                         break;
1336                 case MLX5_L3T_TYPE_QWORD:
1337                         if (data->qword)
1338                                 goto out;
1339                         break;
1340                 default:
1341                         if (data->ptr)
1342                                 goto out;
1343                         break;
1344                 }
1345         }
1346         /* Entry data is not ready, use user callback to create it. */
1347         ret = cb(ctx, data);
1348         if (ret)
1349                 goto out;
1350         /* Save the new allocated data to entry. */
1351         ret = __l3t_set_entry(tbl, idx, data);
1352 out:
1353         rte_spinlock_unlock(&tbl->sl);
1354         return ret;
1355 }