c4c9adb0391b8fa4b8a08298b53abc9206e7c8b4
[dpdk.git] / drivers / net / mlx5 / mlx5_utils.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4
5 #include <rte_malloc.h>
6
7 #include <mlx5_malloc.h>
8
9 #include "mlx5_utils.h"
10
11
12 /********************* mlx5 list ************************/
13
14 int
15 mlx5_list_create(struct mlx5_list *list, const char *name, void *ctx,
16                  mlx5_list_create_cb cb_create,
17                  mlx5_list_match_cb cb_match,
18                  mlx5_list_remove_cb cb_remove,
19                  mlx5_list_clone_cb cb_clone,
20                  mlx5_list_clone_free_cb cb_clone_free)
21 {
22         int i;
23
24         MLX5_ASSERT(list);
25         if (!cb_match || !cb_create || !cb_remove || !cb_clone ||
26             !cb_clone_free)
27                 return -1;
28         if (name)
29                 snprintf(list->name, sizeof(list->name), "%s", name);
30         list->ctx = ctx;
31         list->cb_create = cb_create;
32         list->cb_match = cb_match;
33         list->cb_remove = cb_remove;
34         list->cb_clone = cb_clone;
35         list->cb_clone_free = cb_clone_free;
36         rte_rwlock_init(&list->lock);
37         DRV_LOG(DEBUG, "mlx5 list %s initialized.", list->name);
38         for (i = 0; i <= RTE_MAX_LCORE; i++)
39                 LIST_INIT(&list->cache[i].h);
40         return 0;
41 }
42
43 static struct mlx5_list_entry *
44 __list_lookup(struct mlx5_list *list, int lcore_index, void *ctx, bool reuse)
45 {
46         struct mlx5_list_entry *entry = LIST_FIRST(&list->cache[lcore_index].h);
47         uint32_t ret;
48
49         while (entry != NULL) {
50                 struct mlx5_list_entry *nentry = LIST_NEXT(entry, next);
51
52                 if (list->cb_match(list, entry, ctx)) {
53                         if (lcore_index < RTE_MAX_LCORE) {
54                                 ret = __atomic_load_n(&entry->ref_cnt,
55                                                       __ATOMIC_ACQUIRE);
56                                 if (ret == 0) {
57                                         LIST_REMOVE(entry, next);
58                                         list->cb_clone_free(list, entry);
59                                 }
60                         }
61                         entry = nentry;
62                         continue;
63                 }
64                 if (reuse) {
65                         ret = __atomic_add_fetch(&entry->ref_cnt, 1,
66                                                  __ATOMIC_ACQUIRE);
67                         if (ret == 1u) {
68                                 /* Entry was invalid before, free it. */
69                                 LIST_REMOVE(entry, next);
70                                 list->cb_clone_free(list, entry);
71                                 entry = nentry;
72                                 continue;
73                         }
74                         DRV_LOG(DEBUG, "mlx5 list %s entry %p ref++: %u.",
75                                 list->name, (void *)entry, entry->ref_cnt);
76                 }
77                 break;
78         }
79         return entry;
80 }
81
82 struct mlx5_list_entry *
83 mlx5_list_lookup(struct mlx5_list *list, void *ctx)
84 {
85         struct mlx5_list_entry *entry = NULL;
86         int i;
87
88         rte_rwlock_read_lock(&list->lock);
89         for (i = 0; i < RTE_MAX_LCORE; i++) {
90                 entry = __list_lookup(list, i, ctx, false);
91                 if (entry)
92                         break;
93         }
94         rte_rwlock_read_unlock(&list->lock);
95         return entry;
96 }
97
98 static struct mlx5_list_entry *
99 mlx5_list_cache_insert(struct mlx5_list *list, int lcore_index,
100                        struct mlx5_list_entry *gentry, void *ctx)
101 {
102         struct mlx5_list_entry *lentry = list->cb_clone(list, gentry, ctx);
103
104         if (unlikely(!lentry))
105                 return NULL;
106         lentry->ref_cnt = 1u;
107         lentry->gentry = gentry;
108         LIST_INSERT_HEAD(&list->cache[lcore_index].h, lentry, next);
109         return lentry;
110 }
111
112 struct mlx5_list_entry *
113 mlx5_list_register(struct mlx5_list *list, void *ctx)
114 {
115         struct mlx5_list_entry *entry, *local_entry;
116         volatile uint32_t prev_gen_cnt = 0;
117         int lcore_index = rte_lcore_index(rte_lcore_id());
118
119         MLX5_ASSERT(list);
120         MLX5_ASSERT(lcore_index < RTE_MAX_LCORE);
121         if (unlikely(lcore_index == -1)) {
122                 rte_errno = ENOTSUP;
123                 return NULL;
124         }
125         /* 1. Lookup in local cache. */
126         local_entry = __list_lookup(list, lcore_index, ctx, true);
127         if (local_entry)
128                 return local_entry;
129         /* 2. Lookup with read lock on global list, reuse if found. */
130         rte_rwlock_read_lock(&list->lock);
131         entry = __list_lookup(list, RTE_MAX_LCORE, ctx, true);
132         if (likely(entry)) {
133                 rte_rwlock_read_unlock(&list->lock);
134                 return mlx5_list_cache_insert(list, lcore_index, entry, ctx);
135         }
136         prev_gen_cnt = list->gen_cnt;
137         rte_rwlock_read_unlock(&list->lock);
138         /* 3. Prepare new entry for global list and for cache. */
139         entry = list->cb_create(list, entry, ctx);
140         if (unlikely(!entry))
141                 return NULL;
142         local_entry = list->cb_clone(list, entry, ctx);
143         if (unlikely(!local_entry)) {
144                 list->cb_remove(list, entry);
145                 return NULL;
146         }
147         entry->ref_cnt = 1u;
148         local_entry->ref_cnt = 1u;
149         local_entry->gentry = entry;
150         rte_rwlock_write_lock(&list->lock);
151         /* 4. Make sure the same entry was not created before the write lock. */
152         if (unlikely(prev_gen_cnt != list->gen_cnt)) {
153                 struct mlx5_list_entry *oentry = __list_lookup(list,
154                                                                RTE_MAX_LCORE,
155                                                                ctx, true);
156
157                 if (unlikely(oentry)) {
158                         /* 4.5. Found real race!!, reuse the old entry. */
159                         rte_rwlock_write_unlock(&list->lock);
160                         list->cb_remove(list, entry);
161                         list->cb_clone_free(list, local_entry);
162                         return mlx5_list_cache_insert(list, lcore_index, oentry,
163                                                       ctx);
164                 }
165         }
166         /* 5. Update lists. */
167         LIST_INSERT_HEAD(&list->cache[RTE_MAX_LCORE].h, entry, next);
168         list->gen_cnt++;
169         rte_rwlock_write_unlock(&list->lock);
170         LIST_INSERT_HEAD(&list->cache[lcore_index].h, local_entry, next);
171         __atomic_add_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
172         DRV_LOG(DEBUG, "mlx5 list %s entry %p new: %u.",
173                 list->name, (void *)entry, entry->ref_cnt);
174         return local_entry;
175 }
176
177 int
178 mlx5_list_unregister(struct mlx5_list *list,
179                       struct mlx5_list_entry *entry)
180 {
181         struct mlx5_list_entry *gentry = entry->gentry;
182
183         if (__atomic_sub_fetch(&entry->ref_cnt, 1, __ATOMIC_ACQUIRE) != 0)
184                 return 1;
185         if (__atomic_sub_fetch(&gentry->ref_cnt, 1, __ATOMIC_ACQUIRE) != 0)
186                 return 1;
187         rte_rwlock_write_lock(&list->lock);
188         if (likely(gentry->ref_cnt == 0)) {
189                 LIST_REMOVE(gentry, next);
190                 rte_rwlock_write_unlock(&list->lock);
191                 list->cb_remove(list, gentry);
192                 __atomic_sub_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
193                 DRV_LOG(DEBUG, "mlx5 list %s entry %p removed.",
194                         list->name, (void *)gentry);
195                 return 0;
196         }
197         rte_rwlock_write_unlock(&list->lock);
198         return 1;
199 }
200
201 void
202 mlx5_list_destroy(struct mlx5_list *list)
203 {
204         struct mlx5_list_entry *entry;
205         int i;
206
207         MLX5_ASSERT(list);
208         for (i = 0; i <= RTE_MAX_LCORE; i++) {
209                 while (!LIST_EMPTY(&list->cache[i].h)) {
210                         entry = LIST_FIRST(&list->cache[i].h);
211                         LIST_REMOVE(entry, next);
212                         if (i == RTE_MAX_LCORE) {
213                                 list->cb_remove(list, entry);
214                                 DRV_LOG(DEBUG, "mlx5 list %s entry %p "
215                                         "destroyed.", list->name,
216                                         (void *)entry);
217                         } else {
218                                 list->cb_clone_free(list, entry);
219                         }
220                 }
221         }
222         memset(list, 0, sizeof(*list));
223 }
224
225 uint32_t
226 mlx5_list_get_entry_num(struct mlx5_list *list)
227 {
228         MLX5_ASSERT(list);
229         return __atomic_load_n(&list->count, __ATOMIC_RELAXED);
230 }
231
232 /********************* Indexed pool **********************/
233
234 static inline void
235 mlx5_ipool_lock(struct mlx5_indexed_pool *pool)
236 {
237         if (pool->cfg.need_lock)
238                 rte_spinlock_lock(&pool->rsz_lock);
239 }
240
241 static inline void
242 mlx5_ipool_unlock(struct mlx5_indexed_pool *pool)
243 {
244         if (pool->cfg.need_lock)
245                 rte_spinlock_unlock(&pool->rsz_lock);
246 }
247
248 static inline uint32_t
249 mlx5_trunk_idx_get(struct mlx5_indexed_pool *pool, uint32_t entry_idx)
250 {
251         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
252         uint32_t trunk_idx = 0;
253         uint32_t i;
254
255         if (!cfg->grow_trunk)
256                 return entry_idx / cfg->trunk_size;
257         if (entry_idx >= pool->grow_tbl[cfg->grow_trunk - 1]) {
258                 trunk_idx = (entry_idx - pool->grow_tbl[cfg->grow_trunk - 1]) /
259                             (cfg->trunk_size << (cfg->grow_shift *
260                             cfg->grow_trunk)) + cfg->grow_trunk;
261         } else {
262                 for (i = 0; i < cfg->grow_trunk; i++) {
263                         if (entry_idx < pool->grow_tbl[i])
264                                 break;
265                 }
266                 trunk_idx = i;
267         }
268         return trunk_idx;
269 }
270
271 static inline uint32_t
272 mlx5_trunk_size_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
273 {
274         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
275
276         return cfg->trunk_size << (cfg->grow_shift *
277                (trunk_idx > cfg->grow_trunk ? cfg->grow_trunk : trunk_idx));
278 }
279
280 static inline uint32_t
281 mlx5_trunk_idx_offset_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
282 {
283         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
284         uint32_t offset = 0;
285
286         if (!trunk_idx)
287                 return 0;
288         if (!cfg->grow_trunk)
289                 return cfg->trunk_size * trunk_idx;
290         if (trunk_idx < cfg->grow_trunk)
291                 offset = pool->grow_tbl[trunk_idx - 1];
292         else
293                 offset = pool->grow_tbl[cfg->grow_trunk - 1] +
294                          (cfg->trunk_size << (cfg->grow_shift *
295                          cfg->grow_trunk)) * (trunk_idx - cfg->grow_trunk);
296         return offset;
297 }
298
299 struct mlx5_indexed_pool *
300 mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg)
301 {
302         struct mlx5_indexed_pool *pool;
303         uint32_t i;
304
305         if (!cfg || (!cfg->malloc ^ !cfg->free) ||
306             (cfg->per_core_cache && cfg->release_mem_en) ||
307             (cfg->trunk_size && ((cfg->trunk_size & (cfg->trunk_size - 1)) ||
308             ((__builtin_ffs(cfg->trunk_size) + TRUNK_IDX_BITS) > 32))))
309                 return NULL;
310         pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool) + cfg->grow_trunk *
311                            sizeof(pool->grow_tbl[0]), RTE_CACHE_LINE_SIZE,
312                            SOCKET_ID_ANY);
313         if (!pool)
314                 return NULL;
315         pool->cfg = *cfg;
316         if (!pool->cfg.trunk_size)
317                 pool->cfg.trunk_size = MLX5_IPOOL_DEFAULT_TRUNK_SIZE;
318         if (!cfg->malloc && !cfg->free) {
319                 pool->cfg.malloc = mlx5_malloc;
320                 pool->cfg.free = mlx5_free;
321         }
322         if (pool->cfg.need_lock)
323                 rte_spinlock_init(&pool->rsz_lock);
324         /*
325          * Initialize the dynamic grow trunk size lookup table to have a quick
326          * lookup for the trunk entry index offset.
327          */
328         for (i = 0; i < cfg->grow_trunk; i++) {
329                 pool->grow_tbl[i] = cfg->trunk_size << (cfg->grow_shift * i);
330                 if (i > 0)
331                         pool->grow_tbl[i] += pool->grow_tbl[i - 1];
332         }
333         if (!pool->cfg.max_idx)
334                 pool->cfg.max_idx =
335                         mlx5_trunk_idx_offset_get(pool, TRUNK_MAX_IDX + 1);
336         if (!cfg->per_core_cache)
337                 pool->free_list = TRUNK_INVALID;
338         rte_spinlock_init(&pool->lcore_lock);
339         return pool;
340 }
341
342 static int
343 mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
344 {
345         struct mlx5_indexed_trunk *trunk;
346         struct mlx5_indexed_trunk **trunk_tmp;
347         struct mlx5_indexed_trunk **p;
348         size_t trunk_size = 0;
349         size_t data_size;
350         size_t bmp_size;
351         uint32_t idx, cur_max_idx, i;
352
353         cur_max_idx = mlx5_trunk_idx_offset_get(pool, pool->n_trunk_valid);
354         if (pool->n_trunk_valid == TRUNK_MAX_IDX ||
355             cur_max_idx >= pool->cfg.max_idx)
356                 return -ENOMEM;
357         if (pool->n_trunk_valid == pool->n_trunk) {
358                 /* No free trunk flags, expand trunk list. */
359                 int n_grow = pool->n_trunk_valid ? pool->n_trunk :
360                              RTE_CACHE_LINE_SIZE / sizeof(void *);
361
362                 p = pool->cfg.malloc(0, (pool->n_trunk_valid + n_grow) *
363                                      sizeof(struct mlx5_indexed_trunk *),
364                                      RTE_CACHE_LINE_SIZE, rte_socket_id());
365                 if (!p)
366                         return -ENOMEM;
367                 if (pool->trunks)
368                         memcpy(p, pool->trunks, pool->n_trunk_valid *
369                                sizeof(struct mlx5_indexed_trunk *));
370                 memset(RTE_PTR_ADD(p, pool->n_trunk_valid * sizeof(void *)), 0,
371                        n_grow * sizeof(void *));
372                 trunk_tmp = pool->trunks;
373                 pool->trunks = p;
374                 if (trunk_tmp)
375                         pool->cfg.free(trunk_tmp);
376                 pool->n_trunk += n_grow;
377         }
378         if (!pool->cfg.release_mem_en) {
379                 idx = pool->n_trunk_valid;
380         } else {
381                 /* Find the first available slot in trunk list */
382                 for (idx = 0; idx < pool->n_trunk; idx++)
383                         if (pool->trunks[idx] == NULL)
384                                 break;
385         }
386         trunk_size += sizeof(*trunk);
387         data_size = mlx5_trunk_size_get(pool, idx);
388         bmp_size = rte_bitmap_get_memory_footprint(data_size);
389         /* rte_bitmap requires memory cacheline aligned. */
390         trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
391         trunk_size += bmp_size;
392         trunk = pool->cfg.malloc(0, trunk_size,
393                                  RTE_CACHE_LINE_SIZE, rte_socket_id());
394         if (!trunk)
395                 return -ENOMEM;
396         pool->trunks[idx] = trunk;
397         trunk->idx = idx;
398         trunk->free = data_size;
399         trunk->prev = TRUNK_INVALID;
400         trunk->next = TRUNK_INVALID;
401         MLX5_ASSERT(pool->free_list == TRUNK_INVALID);
402         pool->free_list = idx;
403         /* Mark all entries as available. */
404         trunk->bmp = rte_bitmap_init_with_all_set(data_size, &trunk->data
405                      [RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size)],
406                      bmp_size);
407         /* Clear the overhead bits in the trunk if it happens. */
408         if (cur_max_idx + data_size > pool->cfg.max_idx) {
409                 for (i = pool->cfg.max_idx - cur_max_idx; i < data_size; i++)
410                         rte_bitmap_clear(trunk->bmp, i);
411         }
412         MLX5_ASSERT(trunk->bmp);
413         pool->n_trunk_valid++;
414 #ifdef POOL_DEBUG
415         pool->trunk_new++;
416         pool->trunk_avail++;
417 #endif
418         return 0;
419 }
420
421 static inline struct mlx5_indexed_cache *
422 mlx5_ipool_update_global_cache(struct mlx5_indexed_pool *pool, int cidx)
423 {
424         struct mlx5_indexed_cache *gc, *lc, *olc = NULL;
425
426         lc = pool->cache[cidx]->lc;
427         gc = __atomic_load_n(&pool->gc, __ATOMIC_RELAXED);
428         if (gc && lc != gc) {
429                 mlx5_ipool_lock(pool);
430                 if (lc && !(--lc->ref_cnt))
431                         olc = lc;
432                 lc = pool->gc;
433                 lc->ref_cnt++;
434                 pool->cache[cidx]->lc = lc;
435                 mlx5_ipool_unlock(pool);
436                 if (olc)
437                         pool->cfg.free(olc);
438         }
439         return lc;
440 }
441
442 static uint32_t
443 mlx5_ipool_allocate_from_global(struct mlx5_indexed_pool *pool, int cidx)
444 {
445         struct mlx5_indexed_trunk *trunk;
446         struct mlx5_indexed_cache *p, *lc, *olc = NULL;
447         size_t trunk_size = 0;
448         size_t data_size;
449         uint32_t cur_max_idx, trunk_idx, trunk_n;
450         uint32_t fetch_size, ts_idx, i;
451         int n_grow;
452
453 check_again:
454         p = NULL;
455         fetch_size = 0;
456         /*
457          * Fetch new index from global if possible. First round local
458          * cache will be NULL.
459          */
460         lc = pool->cache[cidx]->lc;
461         mlx5_ipool_lock(pool);
462         /* Try to update local cache first. */
463         if (likely(pool->gc)) {
464                 if (lc != pool->gc) {
465                         if (lc && !(--lc->ref_cnt))
466                                 olc = lc;
467                         lc = pool->gc;
468                         lc->ref_cnt++;
469                         pool->cache[cidx]->lc = lc;
470                 }
471                 if (lc->len) {
472                         /* Use the updated local cache to fetch index. */
473                         fetch_size = pool->cfg.per_core_cache >> 2;
474                         if (lc->len < fetch_size)
475                                 fetch_size = lc->len;
476                         lc->len -= fetch_size;
477                         memcpy(pool->cache[cidx]->idx, &lc->idx[lc->len],
478                                sizeof(uint32_t) * fetch_size);
479                 }
480         }
481         mlx5_ipool_unlock(pool);
482         if (unlikely(olc)) {
483                 pool->cfg.free(olc);
484                 olc = NULL;
485         }
486         if (fetch_size) {
487                 pool->cache[cidx]->len = fetch_size - 1;
488                 return pool->cache[cidx]->idx[pool->cache[cidx]->len];
489         }
490         trunk_idx = lc ? __atomic_load_n(&lc->n_trunk_valid,
491                          __ATOMIC_ACQUIRE) : 0;
492         trunk_n = lc ? lc->n_trunk : 0;
493         cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx);
494         /* Check if index reach maximum. */
495         if (trunk_idx == TRUNK_MAX_IDX ||
496             cur_max_idx >= pool->cfg.max_idx)
497                 return 0;
498         /* No enough space in trunk array, resize the trunks array. */
499         if (trunk_idx == trunk_n) {
500                 n_grow = trunk_idx ? trunk_idx :
501                              RTE_CACHE_LINE_SIZE / sizeof(void *);
502                 cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_n + n_grow);
503                 /* Resize the trunk array. */
504                 p = pool->cfg.malloc(0, ((trunk_idx + n_grow) *
505                         sizeof(struct mlx5_indexed_trunk *)) +
506                         (cur_max_idx * sizeof(uint32_t)) + sizeof(*p),
507                         RTE_CACHE_LINE_SIZE, rte_socket_id());
508                 if (!p)
509                         return 0;
510                 p->trunks = (struct mlx5_indexed_trunk **)&p->idx[cur_max_idx];
511                 if (lc)
512                         memcpy(p->trunks, lc->trunks, trunk_idx *
513                        sizeof(struct mlx5_indexed_trunk *));
514 #ifdef RTE_LIBRTE_MLX5_DEBUG
515                 memset(RTE_PTR_ADD(p->trunks, trunk_idx * sizeof(void *)), 0,
516                         n_grow * sizeof(void *));
517 #endif
518                 p->n_trunk_valid = trunk_idx;
519                 p->n_trunk = trunk_n + n_grow;
520                 p->len = 0;
521         }
522         /* Prepare the new trunk. */
523         trunk_size = sizeof(*trunk);
524         data_size = mlx5_trunk_size_get(pool, trunk_idx);
525         trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
526         trunk = pool->cfg.malloc(0, trunk_size,
527                                  RTE_CACHE_LINE_SIZE, rte_socket_id());
528         if (unlikely(!trunk)) {
529                 pool->cfg.free(p);
530                 return 0;
531         }
532         trunk->idx = trunk_idx;
533         trunk->free = data_size;
534         mlx5_ipool_lock(pool);
535         /*
536          * Double check if trunks has been updated or have available index.
537          * During the new trunk allocate, index may still be flushed to the
538          * global cache. So also need to check the pool->gc->len.
539          */
540         if (pool->gc && (lc != pool->gc ||
541             lc->n_trunk_valid != trunk_idx ||
542             pool->gc->len)) {
543                 mlx5_ipool_unlock(pool);
544                 if (p)
545                         pool->cfg.free(p);
546                 pool->cfg.free(trunk);
547                 goto check_again;
548         }
549         /* Resize the trunk array and update local cache first.  */
550         if (p) {
551                 if (lc && !(--lc->ref_cnt))
552                         olc = lc;
553                 lc = p;
554                 lc->ref_cnt = 1;
555                 pool->cache[cidx]->lc = lc;
556                 __atomic_store_n(&pool->gc, p, __ATOMIC_RELAXED);
557         }
558         /* Add trunk to trunks array. */
559         lc->trunks[trunk_idx] = trunk;
560         __atomic_fetch_add(&lc->n_trunk_valid, 1, __ATOMIC_RELAXED);
561         /* Enqueue half of the index to global. */
562         ts_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx) + 1;
563         fetch_size = trunk->free >> 1;
564         for (i = 0; i < fetch_size; i++)
565                 lc->idx[i] = ts_idx + i;
566         lc->len = fetch_size;
567         mlx5_ipool_unlock(pool);
568         /* Copy left half - 1 to local cache index array. */
569         pool->cache[cidx]->len = trunk->free - fetch_size - 1;
570         ts_idx += fetch_size;
571         for (i = 0; i < pool->cache[cidx]->len; i++)
572                 pool->cache[cidx]->idx[i] = ts_idx + i;
573         if (olc)
574                 pool->cfg.free(olc);
575         return ts_idx + i;
576 }
577
578 static void *
579 _mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
580 {
581         struct mlx5_indexed_trunk *trunk;
582         struct mlx5_indexed_cache *lc;
583         uint32_t trunk_idx;
584         uint32_t entry_idx;
585
586         MLX5_ASSERT(idx);
587         if (unlikely(!pool->cache[cidx])) {
588                 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
589                         sizeof(struct mlx5_ipool_per_lcore) +
590                         (pool->cfg.per_core_cache * sizeof(uint32_t)),
591                         RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
592                 if (!pool->cache[cidx]) {
593                         DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
594                         return NULL;
595                 }
596         }
597         lc = mlx5_ipool_update_global_cache(pool, cidx);
598         idx -= 1;
599         trunk_idx = mlx5_trunk_idx_get(pool, idx);
600         trunk = lc->trunks[trunk_idx];
601         MLX5_ASSERT(trunk);
602         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk_idx);
603         return &trunk->data[entry_idx * pool->cfg.size];
604 }
605
606 static void *
607 mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
608 {
609         void *entry;
610         int cidx;
611
612         cidx = rte_lcore_index(rte_lcore_id());
613         if (unlikely(cidx == -1)) {
614                 cidx = RTE_MAX_LCORE;
615                 rte_spinlock_lock(&pool->lcore_lock);
616         }
617         entry = _mlx5_ipool_get_cache(pool, cidx, idx);
618         if (unlikely(cidx == RTE_MAX_LCORE))
619                 rte_spinlock_unlock(&pool->lcore_lock);
620         return entry;
621 }
622
623
624 static void *
625 _mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, int cidx,
626                          uint32_t *idx)
627 {
628         if (unlikely(!pool->cache[cidx])) {
629                 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
630                         sizeof(struct mlx5_ipool_per_lcore) +
631                         (pool->cfg.per_core_cache * sizeof(uint32_t)),
632                         RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
633                 if (!pool->cache[cidx]) {
634                         DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
635                         return NULL;
636                 }
637         } else if (pool->cache[cidx]->len) {
638                 pool->cache[cidx]->len--;
639                 *idx = pool->cache[cidx]->idx[pool->cache[cidx]->len];
640                 return _mlx5_ipool_get_cache(pool, cidx, *idx);
641         }
642         /* Not enough idx in global cache. Keep fetching from global. */
643         *idx = mlx5_ipool_allocate_from_global(pool, cidx);
644         if (unlikely(!(*idx)))
645                 return NULL;
646         return _mlx5_ipool_get_cache(pool, cidx, *idx);
647 }
648
649 static void *
650 mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, uint32_t *idx)
651 {
652         void *entry;
653         int cidx;
654
655         cidx = rte_lcore_index(rte_lcore_id());
656         if (unlikely(cidx == -1)) {
657                 cidx = RTE_MAX_LCORE;
658                 rte_spinlock_lock(&pool->lcore_lock);
659         }
660         entry = _mlx5_ipool_malloc_cache(pool, cidx, idx);
661         if (unlikely(cidx == RTE_MAX_LCORE))
662                 rte_spinlock_unlock(&pool->lcore_lock);
663         return entry;
664 }
665
666 static void
667 _mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
668 {
669         struct mlx5_ipool_per_lcore *ilc;
670         struct mlx5_indexed_cache *gc, *olc = NULL;
671         uint32_t reclaim_num = 0;
672
673         MLX5_ASSERT(idx);
674         /*
675          * When index was allocated on core A but freed on core B. In this
676          * case check if local cache on core B was allocated before.
677          */
678         if (unlikely(!pool->cache[cidx])) {
679                 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
680                         sizeof(struct mlx5_ipool_per_lcore) +
681                         (pool->cfg.per_core_cache * sizeof(uint32_t)),
682                         RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
683                 if (!pool->cache[cidx]) {
684                         DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
685                         return;
686                 }
687         }
688         /* Try to enqueue to local index cache. */
689         if (pool->cache[cidx]->len < pool->cfg.per_core_cache) {
690                 pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
691                 pool->cache[cidx]->len++;
692                 return;
693         }
694         ilc = pool->cache[cidx];
695         reclaim_num = pool->cfg.per_core_cache >> 2;
696         ilc->len -= reclaim_num;
697         /* Local index cache full, try with global index cache. */
698         mlx5_ipool_lock(pool);
699         gc = pool->gc;
700         if (ilc->lc != gc) {
701                 if (!(--ilc->lc->ref_cnt))
702                         olc = ilc->lc;
703                 gc->ref_cnt++;
704                 ilc->lc = gc;
705         }
706         memcpy(&gc->idx[gc->len], &ilc->idx[ilc->len],
707                reclaim_num * sizeof(uint32_t));
708         gc->len += reclaim_num;
709         mlx5_ipool_unlock(pool);
710         if (olc)
711                 pool->cfg.free(olc);
712         pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
713         pool->cache[cidx]->len++;
714 }
715
716 static void
717 mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
718 {
719         int cidx;
720
721         cidx = rte_lcore_index(rte_lcore_id());
722         if (unlikely(cidx == -1)) {
723                 cidx = RTE_MAX_LCORE;
724                 rte_spinlock_lock(&pool->lcore_lock);
725         }
726         _mlx5_ipool_free_cache(pool, cidx, idx);
727         if (unlikely(cidx == RTE_MAX_LCORE))
728                 rte_spinlock_unlock(&pool->lcore_lock);
729 }
730
731 void *
732 mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
733 {
734         struct mlx5_indexed_trunk *trunk;
735         uint64_t slab = 0;
736         uint32_t iidx = 0;
737         void *p;
738
739         if (pool->cfg.per_core_cache)
740                 return mlx5_ipool_malloc_cache(pool, idx);
741         mlx5_ipool_lock(pool);
742         if (pool->free_list == TRUNK_INVALID) {
743                 /* If no available trunks, grow new. */
744                 if (mlx5_ipool_grow(pool)) {
745                         mlx5_ipool_unlock(pool);
746                         return NULL;
747                 }
748         }
749         MLX5_ASSERT(pool->free_list != TRUNK_INVALID);
750         trunk = pool->trunks[pool->free_list];
751         MLX5_ASSERT(trunk->free);
752         if (!rte_bitmap_scan(trunk->bmp, &iidx, &slab)) {
753                 mlx5_ipool_unlock(pool);
754                 return NULL;
755         }
756         MLX5_ASSERT(slab);
757         iidx += __builtin_ctzll(slab);
758         MLX5_ASSERT(iidx != UINT32_MAX);
759         MLX5_ASSERT(iidx < mlx5_trunk_size_get(pool, trunk->idx));
760         rte_bitmap_clear(trunk->bmp, iidx);
761         p = &trunk->data[iidx * pool->cfg.size];
762         /*
763          * The ipool index should grow continually from small to big,
764          * some features as metering only accept limited bits of index.
765          * Random index with MSB set may be rejected.
766          */
767         iidx += mlx5_trunk_idx_offset_get(pool, trunk->idx);
768         iidx += 1; /* non-zero index. */
769         trunk->free--;
770 #ifdef POOL_DEBUG
771         pool->n_entry++;
772 #endif
773         if (!trunk->free) {
774                 /* Full trunk will be removed from free list in imalloc. */
775                 MLX5_ASSERT(pool->free_list == trunk->idx);
776                 pool->free_list = trunk->next;
777                 if (trunk->next != TRUNK_INVALID)
778                         pool->trunks[trunk->next]->prev = TRUNK_INVALID;
779                 trunk->prev = TRUNK_INVALID;
780                 trunk->next = TRUNK_INVALID;
781 #ifdef POOL_DEBUG
782                 pool->trunk_empty++;
783                 pool->trunk_avail--;
784 #endif
785         }
786         *idx = iidx;
787         mlx5_ipool_unlock(pool);
788         return p;
789 }
790
791 void *
792 mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
793 {
794         void *entry = mlx5_ipool_malloc(pool, idx);
795
796         if (entry && pool->cfg.size)
797                 memset(entry, 0, pool->cfg.size);
798         return entry;
799 }
800
801 void
802 mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx)
803 {
804         struct mlx5_indexed_trunk *trunk;
805         uint32_t trunk_idx;
806         uint32_t entry_idx;
807
808         if (!idx)
809                 return;
810         if (pool->cfg.per_core_cache) {
811                 mlx5_ipool_free_cache(pool, idx);
812                 return;
813         }
814         idx -= 1;
815         mlx5_ipool_lock(pool);
816         trunk_idx = mlx5_trunk_idx_get(pool, idx);
817         if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
818             (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
819                 goto out;
820         trunk = pool->trunks[trunk_idx];
821         if (!trunk)
822                 goto out;
823         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
824         if (trunk_idx != trunk->idx ||
825             rte_bitmap_get(trunk->bmp, entry_idx))
826                 goto out;
827         rte_bitmap_set(trunk->bmp, entry_idx);
828         trunk->free++;
829         if (pool->cfg.release_mem_en && trunk->free == mlx5_trunk_size_get
830            (pool, trunk->idx)) {
831                 if (pool->free_list == trunk->idx)
832                         pool->free_list = trunk->next;
833                 if (trunk->next != TRUNK_INVALID)
834                         pool->trunks[trunk->next]->prev = trunk->prev;
835                 if (trunk->prev != TRUNK_INVALID)
836                         pool->trunks[trunk->prev]->next = trunk->next;
837                 pool->cfg.free(trunk);
838                 pool->trunks[trunk_idx] = NULL;
839                 pool->n_trunk_valid--;
840 #ifdef POOL_DEBUG
841                 pool->trunk_avail--;
842                 pool->trunk_free++;
843 #endif
844                 if (pool->n_trunk_valid == 0) {
845                         pool->cfg.free(pool->trunks);
846                         pool->trunks = NULL;
847                         pool->n_trunk = 0;
848                 }
849         } else if (trunk->free == 1) {
850                 /* Put into free trunk list head. */
851                 MLX5_ASSERT(pool->free_list != trunk->idx);
852                 trunk->next = pool->free_list;
853                 trunk->prev = TRUNK_INVALID;
854                 if (pool->free_list != TRUNK_INVALID)
855                         pool->trunks[pool->free_list]->prev = trunk->idx;
856                 pool->free_list = trunk->idx;
857 #ifdef POOL_DEBUG
858                 pool->trunk_empty--;
859                 pool->trunk_avail++;
860 #endif
861         }
862 #ifdef POOL_DEBUG
863         pool->n_entry--;
864 #endif
865 out:
866         mlx5_ipool_unlock(pool);
867 }
868
869 void *
870 mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx)
871 {
872         struct mlx5_indexed_trunk *trunk;
873         void *p = NULL;
874         uint32_t trunk_idx;
875         uint32_t entry_idx;
876
877         if (!idx)
878                 return NULL;
879         if (pool->cfg.per_core_cache)
880                 return mlx5_ipool_get_cache(pool, idx);
881         idx -= 1;
882         mlx5_ipool_lock(pool);
883         trunk_idx = mlx5_trunk_idx_get(pool, idx);
884         if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
885             (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
886                 goto out;
887         trunk = pool->trunks[trunk_idx];
888         if (!trunk)
889                 goto out;
890         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
891         if (trunk_idx != trunk->idx ||
892             rte_bitmap_get(trunk->bmp, entry_idx))
893                 goto out;
894         p = &trunk->data[entry_idx * pool->cfg.size];
895 out:
896         mlx5_ipool_unlock(pool);
897         return p;
898 }
899
900 int
901 mlx5_ipool_destroy(struct mlx5_indexed_pool *pool)
902 {
903         struct mlx5_indexed_trunk **trunks = NULL;
904         struct mlx5_indexed_cache *gc = pool->gc;
905         uint32_t i, n_trunk_valid = 0;
906
907         MLX5_ASSERT(pool);
908         mlx5_ipool_lock(pool);
909         if (pool->cfg.per_core_cache) {
910                 for (i = 0; i <= RTE_MAX_LCORE; i++) {
911                         /*
912                          * Free only old global cache. Pool gc will be
913                          * freed at last.
914                          */
915                         if (pool->cache[i]) {
916                                 if (pool->cache[i]->lc &&
917                                     pool->cache[i]->lc != pool->gc &&
918                                     (!(--pool->cache[i]->lc->ref_cnt)))
919                                         pool->cfg.free(pool->cache[i]->lc);
920                                 pool->cfg.free(pool->cache[i]);
921                         }
922                 }
923                 if (gc) {
924                         trunks = gc->trunks;
925                         n_trunk_valid = gc->n_trunk_valid;
926                 }
927         } else {
928                 gc = NULL;
929                 trunks = pool->trunks;
930                 n_trunk_valid = pool->n_trunk_valid;
931         }
932         for (i = 0; i < n_trunk_valid; i++) {
933                 if (trunks[i])
934                         pool->cfg.free(trunks[i]);
935         }
936         if (!gc && trunks)
937                 pool->cfg.free(trunks);
938         if (gc)
939                 pool->cfg.free(gc);
940         mlx5_ipool_unlock(pool);
941         mlx5_free(pool);
942         return 0;
943 }
944
945 void
946 mlx5_ipool_flush_cache(struct mlx5_indexed_pool *pool)
947 {
948         uint32_t i, j;
949         struct mlx5_indexed_cache *gc;
950         struct rte_bitmap *ibmp;
951         uint32_t bmp_num, mem_size;
952
953         if (!pool->cfg.per_core_cache)
954                 return;
955         gc = pool->gc;
956         if (!gc)
957                 return;
958         /* Reset bmp. */
959         bmp_num = mlx5_trunk_idx_offset_get(pool, gc->n_trunk_valid);
960         mem_size = rte_bitmap_get_memory_footprint(bmp_num);
961         pool->bmp_mem = pool->cfg.malloc(MLX5_MEM_ZERO, mem_size,
962                                          RTE_CACHE_LINE_SIZE, rte_socket_id());
963         if (!pool->bmp_mem) {
964                 DRV_LOG(ERR, "Ipool bitmap mem allocate failed.\n");
965                 return;
966         }
967         ibmp = rte_bitmap_init_with_all_set(bmp_num, pool->bmp_mem, mem_size);
968         if (!ibmp) {
969                 pool->cfg.free(pool->bmp_mem);
970                 pool->bmp_mem = NULL;
971                 DRV_LOG(ERR, "Ipool bitmap create failed.\n");
972                 return;
973         }
974         pool->ibmp = ibmp;
975         /* Clear global cache. */
976         for (i = 0; i < gc->len; i++)
977                 rte_bitmap_clear(ibmp, gc->idx[i] - 1);
978         /* Clear core cache. */
979         for (i = 0; i < RTE_MAX_LCORE + 1; i++) {
980                 struct mlx5_ipool_per_lcore *ilc = pool->cache[i];
981
982                 if (!ilc)
983                         continue;
984                 for (j = 0; j < ilc->len; j++)
985                         rte_bitmap_clear(ibmp, ilc->idx[j] - 1);
986         }
987 }
988
989 static void *
990 mlx5_ipool_get_next_cache(struct mlx5_indexed_pool *pool, uint32_t *pos)
991 {
992         struct rte_bitmap *ibmp;
993         uint64_t slab = 0;
994         uint32_t iidx = *pos;
995
996         ibmp = pool->ibmp;
997         if (!ibmp || !rte_bitmap_scan(ibmp, &iidx, &slab)) {
998                 if (pool->bmp_mem) {
999                         pool->cfg.free(pool->bmp_mem);
1000                         pool->bmp_mem = NULL;
1001                         pool->ibmp = NULL;
1002                 }
1003                 return NULL;
1004         }
1005         iidx += __builtin_ctzll(slab);
1006         rte_bitmap_clear(ibmp, iidx);
1007         iidx++;
1008         *pos = iidx;
1009         return mlx5_ipool_get_cache(pool, iidx);
1010 }
1011
1012 void *
1013 mlx5_ipool_get_next(struct mlx5_indexed_pool *pool, uint32_t *pos)
1014 {
1015         uint32_t idx = *pos;
1016         void *entry;
1017
1018         if (pool->cfg.per_core_cache)
1019                 return mlx5_ipool_get_next_cache(pool, pos);
1020         while (idx <= mlx5_trunk_idx_offset_get(pool, pool->n_trunk)) {
1021                 entry = mlx5_ipool_get(pool, idx);
1022                 if (entry) {
1023                         *pos = idx;
1024                         return entry;
1025                 }
1026                 idx++;
1027         }
1028         return NULL;
1029 }
1030
1031 void
1032 mlx5_ipool_dump(struct mlx5_indexed_pool *pool)
1033 {
1034         printf("Pool %s entry size %u, trunks %u, %d entry per trunk, "
1035                "total: %d\n",
1036                pool->cfg.type, pool->cfg.size, pool->n_trunk_valid,
1037                pool->cfg.trunk_size, pool->n_trunk_valid);
1038 #ifdef POOL_DEBUG
1039         printf("Pool %s entry %u, trunk alloc %u, empty: %u, "
1040                "available %u free %u\n",
1041                pool->cfg.type, pool->n_entry, pool->trunk_new,
1042                pool->trunk_empty, pool->trunk_avail, pool->trunk_free);
1043 #endif
1044 }
1045
1046 struct mlx5_l3t_tbl *
1047 mlx5_l3t_create(enum mlx5_l3t_type type)
1048 {
1049         struct mlx5_l3t_tbl *tbl;
1050         struct mlx5_indexed_pool_config l3t_ip_cfg = {
1051                 .trunk_size = 16,
1052                 .grow_trunk = 6,
1053                 .grow_shift = 1,
1054                 .need_lock = 0,
1055                 .release_mem_en = 1,
1056                 .malloc = mlx5_malloc,
1057                 .free = mlx5_free,
1058         };
1059
1060         if (type >= MLX5_L3T_TYPE_MAX) {
1061                 rte_errno = EINVAL;
1062                 return NULL;
1063         }
1064         tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_l3t_tbl), 1,
1065                           SOCKET_ID_ANY);
1066         if (!tbl) {
1067                 rte_errno = ENOMEM;
1068                 return NULL;
1069         }
1070         tbl->type = type;
1071         switch (type) {
1072         case MLX5_L3T_TYPE_WORD:
1073                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word);
1074                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
1075                 break;
1076         case MLX5_L3T_TYPE_DWORD:
1077                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword);
1078                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
1079                 break;
1080         case MLX5_L3T_TYPE_QWORD:
1081                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword);
1082                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
1083                 break;
1084         default:
1085                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr);
1086                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
1087                 break;
1088         }
1089         rte_spinlock_init(&tbl->sl);
1090         tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
1091         if (!tbl->eip) {
1092                 rte_errno = ENOMEM;
1093                 mlx5_free(tbl);
1094                 tbl = NULL;
1095         }
1096         return tbl;
1097 }
1098
1099 void
1100 mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl)
1101 {
1102         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1103         uint32_t i, j;
1104
1105         if (!tbl)
1106                 return;
1107         g_tbl = tbl->tbl;
1108         if (g_tbl) {
1109                 for (i = 0; i < MLX5_L3T_GT_SIZE; i++) {
1110                         m_tbl = g_tbl->tbl[i];
1111                         if (!m_tbl)
1112                                 continue;
1113                         for (j = 0; j < MLX5_L3T_MT_SIZE; j++) {
1114                                 if (!m_tbl->tbl[j])
1115                                         continue;
1116                                 MLX5_ASSERT(!((struct mlx5_l3t_entry_word *)
1117                                             m_tbl->tbl[j])->ref_cnt);
1118                                 mlx5_ipool_free(tbl->eip,
1119                                                 ((struct mlx5_l3t_entry_word *)
1120                                                 m_tbl->tbl[j])->idx);
1121                                 m_tbl->tbl[j] = 0;
1122                                 if (!(--m_tbl->ref_cnt))
1123                                         break;
1124                         }
1125                         MLX5_ASSERT(!m_tbl->ref_cnt);
1126                         mlx5_free(g_tbl->tbl[i]);
1127                         g_tbl->tbl[i] = 0;
1128                         if (!(--g_tbl->ref_cnt))
1129                                 break;
1130                 }
1131                 MLX5_ASSERT(!g_tbl->ref_cnt);
1132                 mlx5_free(tbl->tbl);
1133                 tbl->tbl = 0;
1134         }
1135         mlx5_ipool_destroy(tbl->eip);
1136         mlx5_free(tbl);
1137 }
1138
1139 static int32_t
1140 __l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1141                 union mlx5_l3t_data *data)
1142 {
1143         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1144         struct mlx5_l3t_entry_word *w_e_tbl;
1145         struct mlx5_l3t_entry_dword *dw_e_tbl;
1146         struct mlx5_l3t_entry_qword *qw_e_tbl;
1147         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1148         void *e_tbl;
1149         uint32_t entry_idx;
1150
1151         g_tbl = tbl->tbl;
1152         if (!g_tbl)
1153                 return -1;
1154         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1155         if (!m_tbl)
1156                 return -1;
1157         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1158         if (!e_tbl)
1159                 return -1;
1160         entry_idx = idx & MLX5_L3T_ET_MASK;
1161         switch (tbl->type) {
1162         case MLX5_L3T_TYPE_WORD:
1163                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1164                 data->word = w_e_tbl->entry[entry_idx].data;
1165                 if (w_e_tbl->entry[entry_idx].data)
1166                         w_e_tbl->entry[entry_idx].ref_cnt++;
1167                 break;
1168         case MLX5_L3T_TYPE_DWORD:
1169                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1170                 data->dword = dw_e_tbl->entry[entry_idx].data;
1171                 if (dw_e_tbl->entry[entry_idx].data)
1172                         dw_e_tbl->entry[entry_idx].ref_cnt++;
1173                 break;
1174         case MLX5_L3T_TYPE_QWORD:
1175                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1176                 data->qword = qw_e_tbl->entry[entry_idx].data;
1177                 if (qw_e_tbl->entry[entry_idx].data)
1178                         qw_e_tbl->entry[entry_idx].ref_cnt++;
1179                 break;
1180         default:
1181                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1182                 data->ptr = ptr_e_tbl->entry[entry_idx].data;
1183                 if (ptr_e_tbl->entry[entry_idx].data)
1184                         ptr_e_tbl->entry[entry_idx].ref_cnt++;
1185                 break;
1186         }
1187         return 0;
1188 }
1189
1190 int32_t
1191 mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1192                    union mlx5_l3t_data *data)
1193 {
1194         int ret;
1195
1196         rte_spinlock_lock(&tbl->sl);
1197         ret = __l3t_get_entry(tbl, idx, data);
1198         rte_spinlock_unlock(&tbl->sl);
1199         return ret;
1200 }
1201
1202 int32_t
1203 mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
1204 {
1205         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1206         struct mlx5_l3t_entry_word *w_e_tbl;
1207         struct mlx5_l3t_entry_dword *dw_e_tbl;
1208         struct mlx5_l3t_entry_qword *qw_e_tbl;
1209         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1210         void *e_tbl;
1211         uint32_t entry_idx;
1212         uint64_t ref_cnt;
1213         int32_t ret = -1;
1214
1215         rte_spinlock_lock(&tbl->sl);
1216         g_tbl = tbl->tbl;
1217         if (!g_tbl)
1218                 goto out;
1219         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1220         if (!m_tbl)
1221                 goto out;
1222         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1223         if (!e_tbl)
1224                 goto out;
1225         entry_idx = idx & MLX5_L3T_ET_MASK;
1226         switch (tbl->type) {
1227         case MLX5_L3T_TYPE_WORD:
1228                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1229                 MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt);
1230                 ret = --w_e_tbl->entry[entry_idx].ref_cnt;
1231                 if (ret)
1232                         goto out;
1233                 w_e_tbl->entry[entry_idx].data = 0;
1234                 ref_cnt = --w_e_tbl->ref_cnt;
1235                 break;
1236         case MLX5_L3T_TYPE_DWORD:
1237                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1238                 MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt);
1239                 ret = --dw_e_tbl->entry[entry_idx].ref_cnt;
1240                 if (ret)
1241                         goto out;
1242                 dw_e_tbl->entry[entry_idx].data = 0;
1243                 ref_cnt = --dw_e_tbl->ref_cnt;
1244                 break;
1245         case MLX5_L3T_TYPE_QWORD:
1246                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1247                 MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt);
1248                 ret = --qw_e_tbl->entry[entry_idx].ref_cnt;
1249                 if (ret)
1250                         goto out;
1251                 qw_e_tbl->entry[entry_idx].data = 0;
1252                 ref_cnt = --qw_e_tbl->ref_cnt;
1253                 break;
1254         default:
1255                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1256                 MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt);
1257                 ret = --ptr_e_tbl->entry[entry_idx].ref_cnt;
1258                 if (ret)
1259                         goto out;
1260                 ptr_e_tbl->entry[entry_idx].data = NULL;
1261                 ref_cnt = --ptr_e_tbl->ref_cnt;
1262                 break;
1263         }
1264         if (!ref_cnt) {
1265                 mlx5_ipool_free(tbl->eip,
1266                                 ((struct mlx5_l3t_entry_word *)e_tbl)->idx);
1267                 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1268                                                                         NULL;
1269                 if (!(--m_tbl->ref_cnt)) {
1270                         mlx5_free(m_tbl);
1271                         g_tbl->tbl
1272                         [(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] = NULL;
1273                         if (!(--g_tbl->ref_cnt)) {
1274                                 mlx5_free(g_tbl);
1275                                 tbl->tbl = 0;
1276                         }
1277                 }
1278         }
1279 out:
1280         rte_spinlock_unlock(&tbl->sl);
1281         return ret;
1282 }
1283
1284 static int32_t
1285 __l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1286                 union mlx5_l3t_data *data)
1287 {
1288         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1289         struct mlx5_l3t_entry_word *w_e_tbl;
1290         struct mlx5_l3t_entry_dword *dw_e_tbl;
1291         struct mlx5_l3t_entry_qword *qw_e_tbl;
1292         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1293         void *e_tbl;
1294         uint32_t entry_idx, tbl_idx = 0;
1295
1296         /* Check the global table, create it if empty. */
1297         g_tbl = tbl->tbl;
1298         if (!g_tbl) {
1299                 g_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1300                                     sizeof(struct mlx5_l3t_level_tbl) +
1301                                     sizeof(void *) * MLX5_L3T_GT_SIZE, 1,
1302                                     SOCKET_ID_ANY);
1303                 if (!g_tbl) {
1304                         rte_errno = ENOMEM;
1305                         return -1;
1306                 }
1307                 tbl->tbl = g_tbl;
1308         }
1309         /*
1310          * Check the middle table, create it if empty. Ref_cnt will be
1311          * increased if new sub table created.
1312          */
1313         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1314         if (!m_tbl) {
1315                 m_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1316                                     sizeof(struct mlx5_l3t_level_tbl) +
1317                                     sizeof(void *) * MLX5_L3T_MT_SIZE, 1,
1318                                     SOCKET_ID_ANY);
1319                 if (!m_tbl) {
1320                         rte_errno = ENOMEM;
1321                         return -1;
1322                 }
1323                 g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] =
1324                                                                         m_tbl;
1325                 g_tbl->ref_cnt++;
1326         }
1327         /*
1328          * Check the entry table, create it if empty. Ref_cnt will be
1329          * increased if new sub entry table created.
1330          */
1331         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1332         if (!e_tbl) {
1333                 e_tbl = mlx5_ipool_zmalloc(tbl->eip, &tbl_idx);
1334                 if (!e_tbl) {
1335                         rte_errno = ENOMEM;
1336                         return -1;
1337                 }
1338                 ((struct mlx5_l3t_entry_word *)e_tbl)->idx = tbl_idx;
1339                 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1340                                                                         e_tbl;
1341                 m_tbl->ref_cnt++;
1342         }
1343         entry_idx = idx & MLX5_L3T_ET_MASK;
1344         switch (tbl->type) {
1345         case MLX5_L3T_TYPE_WORD:
1346                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1347                 if (w_e_tbl->entry[entry_idx].data) {
1348                         data->word = w_e_tbl->entry[entry_idx].data;
1349                         w_e_tbl->entry[entry_idx].ref_cnt++;
1350                         rte_errno = EEXIST;
1351                         return -1;
1352                 }
1353                 w_e_tbl->entry[entry_idx].data = data->word;
1354                 w_e_tbl->entry[entry_idx].ref_cnt = 1;
1355                 w_e_tbl->ref_cnt++;
1356                 break;
1357         case MLX5_L3T_TYPE_DWORD:
1358                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1359                 if (dw_e_tbl->entry[entry_idx].data) {
1360                         data->dword = dw_e_tbl->entry[entry_idx].data;
1361                         dw_e_tbl->entry[entry_idx].ref_cnt++;
1362                         rte_errno = EEXIST;
1363                         return -1;
1364                 }
1365                 dw_e_tbl->entry[entry_idx].data = data->dword;
1366                 dw_e_tbl->entry[entry_idx].ref_cnt = 1;
1367                 dw_e_tbl->ref_cnt++;
1368                 break;
1369         case MLX5_L3T_TYPE_QWORD:
1370                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1371                 if (qw_e_tbl->entry[entry_idx].data) {
1372                         data->qword = qw_e_tbl->entry[entry_idx].data;
1373                         qw_e_tbl->entry[entry_idx].ref_cnt++;
1374                         rte_errno = EEXIST;
1375                         return -1;
1376                 }
1377                 qw_e_tbl->entry[entry_idx].data = data->qword;
1378                 qw_e_tbl->entry[entry_idx].ref_cnt = 1;
1379                 qw_e_tbl->ref_cnt++;
1380                 break;
1381         default:
1382                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1383                 if (ptr_e_tbl->entry[entry_idx].data) {
1384                         data->ptr = ptr_e_tbl->entry[entry_idx].data;
1385                         ptr_e_tbl->entry[entry_idx].ref_cnt++;
1386                         rte_errno = EEXIST;
1387                         return -1;
1388                 }
1389                 ptr_e_tbl->entry[entry_idx].data = data->ptr;
1390                 ptr_e_tbl->entry[entry_idx].ref_cnt = 1;
1391                 ptr_e_tbl->ref_cnt++;
1392                 break;
1393         }
1394         return 0;
1395 }
1396
1397 int32_t
1398 mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1399                    union mlx5_l3t_data *data)
1400 {
1401         int ret;
1402
1403         rte_spinlock_lock(&tbl->sl);
1404         ret = __l3t_set_entry(tbl, idx, data);
1405         rte_spinlock_unlock(&tbl->sl);
1406         return ret;
1407 }
1408
1409 int32_t
1410 mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1411                        union mlx5_l3t_data *data,
1412                        mlx5_l3t_alloc_callback_fn cb, void *ctx)
1413 {
1414         int32_t ret;
1415
1416         rte_spinlock_lock(&tbl->sl);
1417         /* Check if entry data is ready. */
1418         ret = __l3t_get_entry(tbl, idx, data);
1419         if (!ret) {
1420                 switch (tbl->type) {
1421                 case MLX5_L3T_TYPE_WORD:
1422                         if (data->word)
1423                                 goto out;
1424                         break;
1425                 case MLX5_L3T_TYPE_DWORD:
1426                         if (data->dword)
1427                                 goto out;
1428                         break;
1429                 case MLX5_L3T_TYPE_QWORD:
1430                         if (data->qword)
1431                                 goto out;
1432                         break;
1433                 default:
1434                         if (data->ptr)
1435                                 goto out;
1436                         break;
1437                 }
1438         }
1439         /* Entry data is not ready, use user callback to create it. */
1440         ret = cb(ctx, data);
1441         if (ret)
1442                 goto out;
1443         /* Save the new allocated data to entry. */
1444         ret = __l3t_set_entry(tbl, idx, data);
1445 out:
1446         rte_spinlock_unlock(&tbl->sl);
1447         return ret;
1448 }