f505caed4e31d6379a770a5d2844c92336a53c71
[dpdk.git] / drivers / net / mlx5 / mlx5_utils.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4
5 #include <rte_malloc.h>
6
7 #include <mlx5_malloc.h>
8
9 #include "mlx5_utils.h"
10
11
12 /********************* mlx5 list ************************/
13
14 int
15 mlx5_list_create(struct mlx5_list *list, const char *name, void *ctx,
16                  mlx5_list_create_cb cb_create,
17                  mlx5_list_match_cb cb_match,
18                  mlx5_list_remove_cb cb_remove,
19                  mlx5_list_clone_cb cb_clone,
20                  mlx5_list_clone_free_cb cb_clone_free)
21 {
22         int i;
23
24         MLX5_ASSERT(list);
25         if (!cb_match || !cb_create || !cb_remove || !cb_clone ||
26             !cb_clone_free)
27                 return -1;
28         if (name)
29                 snprintf(list->name, sizeof(list->name), "%s", name);
30         list->ctx = ctx;
31         list->cb_create = cb_create;
32         list->cb_match = cb_match;
33         list->cb_remove = cb_remove;
34         list->cb_clone = cb_clone;
35         list->cb_clone_free = cb_clone_free;
36         rte_rwlock_init(&list->lock);
37         DRV_LOG(DEBUG, "mlx5 list %s initialized.", list->name);
38         for (i = 0; i <= RTE_MAX_LCORE; i++)
39                 LIST_INIT(&list->cache[i].h);
40         return 0;
41 }
42
43 static struct mlx5_list_entry *
44 __list_lookup(struct mlx5_list *list, int lcore_index, void *ctx, bool reuse)
45 {
46         struct mlx5_list_entry *entry = LIST_FIRST(&list->cache[lcore_index].h);
47         uint32_t ret;
48
49         while (entry != NULL) {
50                 struct mlx5_list_entry *nentry = LIST_NEXT(entry, next);
51
52                 if (list->cb_match(list, entry, ctx)) {
53                         if (lcore_index < RTE_MAX_LCORE) {
54                                 ret = __atomic_load_n(&entry->ref_cnt,
55                                                       __ATOMIC_ACQUIRE);
56                                 if (ret == 0) {
57                                         LIST_REMOVE(entry, next);
58                                         list->cb_clone_free(list, entry);
59                                 }
60                         }
61                         entry = nentry;
62                         continue;
63                 }
64                 if (reuse) {
65                         ret = __atomic_add_fetch(&entry->ref_cnt, 1,
66                                                  __ATOMIC_ACQUIRE);
67                         if (ret == 1u) {
68                                 /* Entry was invalid before, free it. */
69                                 LIST_REMOVE(entry, next);
70                                 list->cb_clone_free(list, entry);
71                                 entry = nentry;
72                                 continue;
73                         }
74                         DRV_LOG(DEBUG, "mlx5 list %s entry %p ref++: %u.",
75                                 list->name, (void *)entry, entry->ref_cnt);
76                 }
77                 break;
78         }
79         return entry;
80 }
81
82 struct mlx5_list_entry *
83 mlx5_list_lookup(struct mlx5_list *list, void *ctx)
84 {
85         struct mlx5_list_entry *entry = NULL;
86         int i;
87
88         rte_rwlock_read_lock(&list->lock);
89         for (i = 0; i < RTE_MAX_LCORE; i++) {
90                 entry = __list_lookup(list, i, ctx, false);
91                 if (entry)
92                         break;
93         }
94         rte_rwlock_read_unlock(&list->lock);
95         return entry;
96 }
97
98 static struct mlx5_list_entry *
99 mlx5_list_cache_insert(struct mlx5_list *list, int lcore_index,
100                        struct mlx5_list_entry *gentry, void *ctx)
101 {
102         struct mlx5_list_entry *lentry = list->cb_clone(list, gentry, ctx);
103
104         if (!lentry)
105                 return NULL;
106         lentry->ref_cnt = 1u;
107         lentry->gentry = gentry;
108         LIST_INSERT_HEAD(&list->cache[lcore_index].h, lentry, next);
109         return lentry;
110 }
111
112 struct mlx5_list_entry *
113 mlx5_list_register(struct mlx5_list *list, void *ctx)
114 {
115         struct mlx5_list_entry *entry, *lentry;
116         uint32_t prev_gen_cnt = 0;
117         int lcore_index = rte_lcore_index(rte_lcore_id());
118
119         MLX5_ASSERT(list);
120         MLX5_ASSERT(lcore_index < RTE_MAX_LCORE);
121         if (unlikely(lcore_index == -1)) {
122                 rte_errno = ENOTSUP;
123                 return NULL;
124         }
125         /* Lookup in local cache. */
126         lentry = __list_lookup(list, lcore_index, ctx, true);
127         if (lentry)
128                 return lentry;
129         /* Lookup with read lock, reuse if found. */
130         rte_rwlock_read_lock(&list->lock);
131         entry = __list_lookup(list, RTE_MAX_LCORE, ctx, true);
132         if (entry == NULL) {
133                 prev_gen_cnt = __atomic_load_n(&list->gen_cnt,
134                                                __ATOMIC_ACQUIRE);
135                 rte_rwlock_read_unlock(&list->lock);
136         } else {
137                 rte_rwlock_read_unlock(&list->lock);
138                 return mlx5_list_cache_insert(list, lcore_index, entry, ctx);
139         }
140         /* Not found, append with write lock - block read from other threads. */
141         rte_rwlock_write_lock(&list->lock);
142         /* If list changed by other threads before lock, search again. */
143         if (prev_gen_cnt != __atomic_load_n(&list->gen_cnt, __ATOMIC_ACQUIRE)) {
144                 /* Lookup and reuse w/o read lock. */
145                 entry = __list_lookup(list, RTE_MAX_LCORE, ctx, true);
146                 if (entry) {
147                         rte_rwlock_write_unlock(&list->lock);
148                         return mlx5_list_cache_insert(list, lcore_index, entry,
149                                                       ctx);
150                 }
151         }
152         entry = list->cb_create(list, entry, ctx);
153         if (entry) {
154                 lentry = mlx5_list_cache_insert(list, lcore_index, entry, ctx);
155                 if (!lentry) {
156                         list->cb_remove(list, entry);
157                 } else {
158                         entry->ref_cnt = 1u;
159                         LIST_INSERT_HEAD(&list->cache[RTE_MAX_LCORE].h, entry,
160                                          next);
161                         __atomic_add_fetch(&list->gen_cnt, 1, __ATOMIC_RELEASE);
162                         __atomic_add_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
163                         DRV_LOG(DEBUG, "mlx5 list %s entry %p new: %u.",
164                                 list->name, (void *)entry, entry->ref_cnt);
165                 }
166
167         }
168         rte_rwlock_write_unlock(&list->lock);
169         return lentry;
170 }
171
172 int
173 mlx5_list_unregister(struct mlx5_list *list,
174                       struct mlx5_list_entry *entry)
175 {
176         struct mlx5_list_entry *gentry = entry->gentry;
177
178         if (__atomic_sub_fetch(&entry->ref_cnt, 1, __ATOMIC_ACQUIRE) != 0)
179                 return 1;
180         if (__atomic_sub_fetch(&gentry->ref_cnt, 1, __ATOMIC_ACQUIRE) != 0)
181                 return 1;
182         rte_rwlock_write_lock(&list->lock);
183         if (__atomic_load_n(&gentry->ref_cnt, __ATOMIC_ACQUIRE) == 0) {
184                 __atomic_add_fetch(&list->gen_cnt, 1, __ATOMIC_ACQUIRE);
185                 __atomic_sub_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
186                 LIST_REMOVE(gentry, next);
187                 list->cb_remove(list, gentry);
188                 rte_rwlock_write_unlock(&list->lock);
189                 DRV_LOG(DEBUG, "mlx5 list %s entry %p removed.",
190                         list->name, (void *)gentry);
191                 return 0;
192         }
193         rte_rwlock_write_unlock(&list->lock);
194         return 1;
195 }
196
197 void
198 mlx5_list_destroy(struct mlx5_list *list)
199 {
200         struct mlx5_list_entry *entry;
201         int i;
202
203         MLX5_ASSERT(list);
204         for (i = 0; i <= RTE_MAX_LCORE; i++) {
205                 while (!LIST_EMPTY(&list->cache[i].h)) {
206                         entry = LIST_FIRST(&list->cache[i].h);
207                         LIST_REMOVE(entry, next);
208                         if (i == RTE_MAX_LCORE) {
209                                 list->cb_remove(list, entry);
210                                 DRV_LOG(DEBUG, "mlx5 list %s entry %p "
211                                         "destroyed.", list->name,
212                                         (void *)entry);
213                         } else {
214                                 list->cb_clone_free(list, entry);
215                         }
216                 }
217         }
218         memset(list, 0, sizeof(*list));
219 }
220
221 uint32_t
222 mlx5_list_get_entry_num(struct mlx5_list *list)
223 {
224         MLX5_ASSERT(list);
225         return __atomic_load_n(&list->count, __ATOMIC_RELAXED);
226 }
227
228 /********************* Indexed pool **********************/
229
230 static inline void
231 mlx5_ipool_lock(struct mlx5_indexed_pool *pool)
232 {
233         if (pool->cfg.need_lock)
234                 rte_spinlock_lock(&pool->rsz_lock);
235 }
236
237 static inline void
238 mlx5_ipool_unlock(struct mlx5_indexed_pool *pool)
239 {
240         if (pool->cfg.need_lock)
241                 rte_spinlock_unlock(&pool->rsz_lock);
242 }
243
244 static inline uint32_t
245 mlx5_trunk_idx_get(struct mlx5_indexed_pool *pool, uint32_t entry_idx)
246 {
247         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
248         uint32_t trunk_idx = 0;
249         uint32_t i;
250
251         if (!cfg->grow_trunk)
252                 return entry_idx / cfg->trunk_size;
253         if (entry_idx >= pool->grow_tbl[cfg->grow_trunk - 1]) {
254                 trunk_idx = (entry_idx - pool->grow_tbl[cfg->grow_trunk - 1]) /
255                             (cfg->trunk_size << (cfg->grow_shift *
256                             cfg->grow_trunk)) + cfg->grow_trunk;
257         } else {
258                 for (i = 0; i < cfg->grow_trunk; i++) {
259                         if (entry_idx < pool->grow_tbl[i])
260                                 break;
261                 }
262                 trunk_idx = i;
263         }
264         return trunk_idx;
265 }
266
267 static inline uint32_t
268 mlx5_trunk_size_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
269 {
270         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
271
272         return cfg->trunk_size << (cfg->grow_shift *
273                (trunk_idx > cfg->grow_trunk ? cfg->grow_trunk : trunk_idx));
274 }
275
276 static inline uint32_t
277 mlx5_trunk_idx_offset_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
278 {
279         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
280         uint32_t offset = 0;
281
282         if (!trunk_idx)
283                 return 0;
284         if (!cfg->grow_trunk)
285                 return cfg->trunk_size * trunk_idx;
286         if (trunk_idx < cfg->grow_trunk)
287                 offset = pool->grow_tbl[trunk_idx - 1];
288         else
289                 offset = pool->grow_tbl[cfg->grow_trunk - 1] +
290                          (cfg->trunk_size << (cfg->grow_shift *
291                          cfg->grow_trunk)) * (trunk_idx - cfg->grow_trunk);
292         return offset;
293 }
294
295 struct mlx5_indexed_pool *
296 mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg)
297 {
298         struct mlx5_indexed_pool *pool;
299         uint32_t i;
300
301         if (!cfg || (!cfg->malloc ^ !cfg->free) ||
302             (cfg->per_core_cache && cfg->release_mem_en) ||
303             (cfg->trunk_size && ((cfg->trunk_size & (cfg->trunk_size - 1)) ||
304             ((__builtin_ffs(cfg->trunk_size) + TRUNK_IDX_BITS) > 32))))
305                 return NULL;
306         pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool) + cfg->grow_trunk *
307                            sizeof(pool->grow_tbl[0]), RTE_CACHE_LINE_SIZE,
308                            SOCKET_ID_ANY);
309         if (!pool)
310                 return NULL;
311         pool->cfg = *cfg;
312         if (!pool->cfg.trunk_size)
313                 pool->cfg.trunk_size = MLX5_IPOOL_DEFAULT_TRUNK_SIZE;
314         if (!cfg->malloc && !cfg->free) {
315                 pool->cfg.malloc = mlx5_malloc;
316                 pool->cfg.free = mlx5_free;
317         }
318         if (pool->cfg.need_lock)
319                 rte_spinlock_init(&pool->rsz_lock);
320         /*
321          * Initialize the dynamic grow trunk size lookup table to have a quick
322          * lookup for the trunk entry index offset.
323          */
324         for (i = 0; i < cfg->grow_trunk; i++) {
325                 pool->grow_tbl[i] = cfg->trunk_size << (cfg->grow_shift * i);
326                 if (i > 0)
327                         pool->grow_tbl[i] += pool->grow_tbl[i - 1];
328         }
329         if (!pool->cfg.max_idx)
330                 pool->cfg.max_idx =
331                         mlx5_trunk_idx_offset_get(pool, TRUNK_MAX_IDX + 1);
332         if (!cfg->per_core_cache)
333                 pool->free_list = TRUNK_INVALID;
334         rte_spinlock_init(&pool->lcore_lock);
335         return pool;
336 }
337
338 static int
339 mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
340 {
341         struct mlx5_indexed_trunk *trunk;
342         struct mlx5_indexed_trunk **trunk_tmp;
343         struct mlx5_indexed_trunk **p;
344         size_t trunk_size = 0;
345         size_t data_size;
346         size_t bmp_size;
347         uint32_t idx, cur_max_idx, i;
348
349         cur_max_idx = mlx5_trunk_idx_offset_get(pool, pool->n_trunk_valid);
350         if (pool->n_trunk_valid == TRUNK_MAX_IDX ||
351             cur_max_idx >= pool->cfg.max_idx)
352                 return -ENOMEM;
353         if (pool->n_trunk_valid == pool->n_trunk) {
354                 /* No free trunk flags, expand trunk list. */
355                 int n_grow = pool->n_trunk_valid ? pool->n_trunk :
356                              RTE_CACHE_LINE_SIZE / sizeof(void *);
357
358                 p = pool->cfg.malloc(0, (pool->n_trunk_valid + n_grow) *
359                                      sizeof(struct mlx5_indexed_trunk *),
360                                      RTE_CACHE_LINE_SIZE, rte_socket_id());
361                 if (!p)
362                         return -ENOMEM;
363                 if (pool->trunks)
364                         memcpy(p, pool->trunks, pool->n_trunk_valid *
365                                sizeof(struct mlx5_indexed_trunk *));
366                 memset(RTE_PTR_ADD(p, pool->n_trunk_valid * sizeof(void *)), 0,
367                        n_grow * sizeof(void *));
368                 trunk_tmp = pool->trunks;
369                 pool->trunks = p;
370                 if (trunk_tmp)
371                         pool->cfg.free(trunk_tmp);
372                 pool->n_trunk += n_grow;
373         }
374         if (!pool->cfg.release_mem_en) {
375                 idx = pool->n_trunk_valid;
376         } else {
377                 /* Find the first available slot in trunk list */
378                 for (idx = 0; idx < pool->n_trunk; idx++)
379                         if (pool->trunks[idx] == NULL)
380                                 break;
381         }
382         trunk_size += sizeof(*trunk);
383         data_size = mlx5_trunk_size_get(pool, idx);
384         bmp_size = rte_bitmap_get_memory_footprint(data_size);
385         /* rte_bitmap requires memory cacheline aligned. */
386         trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
387         trunk_size += bmp_size;
388         trunk = pool->cfg.malloc(0, trunk_size,
389                                  RTE_CACHE_LINE_SIZE, rte_socket_id());
390         if (!trunk)
391                 return -ENOMEM;
392         pool->trunks[idx] = trunk;
393         trunk->idx = idx;
394         trunk->free = data_size;
395         trunk->prev = TRUNK_INVALID;
396         trunk->next = TRUNK_INVALID;
397         MLX5_ASSERT(pool->free_list == TRUNK_INVALID);
398         pool->free_list = idx;
399         /* Mark all entries as available. */
400         trunk->bmp = rte_bitmap_init_with_all_set(data_size, &trunk->data
401                      [RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size)],
402                      bmp_size);
403         /* Clear the overhead bits in the trunk if it happens. */
404         if (cur_max_idx + data_size > pool->cfg.max_idx) {
405                 for (i = pool->cfg.max_idx - cur_max_idx; i < data_size; i++)
406                         rte_bitmap_clear(trunk->bmp, i);
407         }
408         MLX5_ASSERT(trunk->bmp);
409         pool->n_trunk_valid++;
410 #ifdef POOL_DEBUG
411         pool->trunk_new++;
412         pool->trunk_avail++;
413 #endif
414         return 0;
415 }
416
417 static inline struct mlx5_indexed_cache *
418 mlx5_ipool_update_global_cache(struct mlx5_indexed_pool *pool, int cidx)
419 {
420         struct mlx5_indexed_cache *gc, *lc, *olc = NULL;
421
422         lc = pool->cache[cidx]->lc;
423         gc = __atomic_load_n(&pool->gc, __ATOMIC_RELAXED);
424         if (gc && lc != gc) {
425                 mlx5_ipool_lock(pool);
426                 if (lc && !(--lc->ref_cnt))
427                         olc = lc;
428                 lc = pool->gc;
429                 lc->ref_cnt++;
430                 pool->cache[cidx]->lc = lc;
431                 mlx5_ipool_unlock(pool);
432                 if (olc)
433                         pool->cfg.free(olc);
434         }
435         return lc;
436 }
437
438 static uint32_t
439 mlx5_ipool_allocate_from_global(struct mlx5_indexed_pool *pool, int cidx)
440 {
441         struct mlx5_indexed_trunk *trunk;
442         struct mlx5_indexed_cache *p, *lc, *olc = NULL;
443         size_t trunk_size = 0;
444         size_t data_size;
445         uint32_t cur_max_idx, trunk_idx, trunk_n;
446         uint32_t fetch_size, ts_idx, i;
447         int n_grow;
448
449 check_again:
450         p = NULL;
451         fetch_size = 0;
452         /*
453          * Fetch new index from global if possible. First round local
454          * cache will be NULL.
455          */
456         lc = pool->cache[cidx]->lc;
457         mlx5_ipool_lock(pool);
458         /* Try to update local cache first. */
459         if (likely(pool->gc)) {
460                 if (lc != pool->gc) {
461                         if (lc && !(--lc->ref_cnt))
462                                 olc = lc;
463                         lc = pool->gc;
464                         lc->ref_cnt++;
465                         pool->cache[cidx]->lc = lc;
466                 }
467                 if (lc->len) {
468                         /* Use the updated local cache to fetch index. */
469                         fetch_size = pool->cfg.per_core_cache >> 2;
470                         if (lc->len < fetch_size)
471                                 fetch_size = lc->len;
472                         lc->len -= fetch_size;
473                         memcpy(pool->cache[cidx]->idx, &lc->idx[lc->len],
474                                sizeof(uint32_t) * fetch_size);
475                 }
476         }
477         mlx5_ipool_unlock(pool);
478         if (unlikely(olc)) {
479                 pool->cfg.free(olc);
480                 olc = NULL;
481         }
482         if (fetch_size) {
483                 pool->cache[cidx]->len = fetch_size - 1;
484                 return pool->cache[cidx]->idx[pool->cache[cidx]->len];
485         }
486         trunk_idx = lc ? __atomic_load_n(&lc->n_trunk_valid,
487                          __ATOMIC_ACQUIRE) : 0;
488         trunk_n = lc ? lc->n_trunk : 0;
489         cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx);
490         /* Check if index reach maximum. */
491         if (trunk_idx == TRUNK_MAX_IDX ||
492             cur_max_idx >= pool->cfg.max_idx)
493                 return 0;
494         /* No enough space in trunk array, resize the trunks array. */
495         if (trunk_idx == trunk_n) {
496                 n_grow = trunk_idx ? trunk_idx :
497                              RTE_CACHE_LINE_SIZE / sizeof(void *);
498                 cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_n + n_grow);
499                 /* Resize the trunk array. */
500                 p = pool->cfg.malloc(0, ((trunk_idx + n_grow) *
501                         sizeof(struct mlx5_indexed_trunk *)) +
502                         (cur_max_idx * sizeof(uint32_t)) + sizeof(*p),
503                         RTE_CACHE_LINE_SIZE, rte_socket_id());
504                 if (!p)
505                         return 0;
506                 p->trunks = (struct mlx5_indexed_trunk **)&p->idx[cur_max_idx];
507                 if (lc)
508                         memcpy(p->trunks, lc->trunks, trunk_idx *
509                        sizeof(struct mlx5_indexed_trunk *));
510 #ifdef RTE_LIBRTE_MLX5_DEBUG
511                 memset(RTE_PTR_ADD(p->trunks, trunk_idx * sizeof(void *)), 0,
512                         n_grow * sizeof(void *));
513 #endif
514                 p->n_trunk_valid = trunk_idx;
515                 p->n_trunk = trunk_n + n_grow;
516                 p->len = 0;
517         }
518         /* Prepare the new trunk. */
519         trunk_size = sizeof(*trunk);
520         data_size = mlx5_trunk_size_get(pool, trunk_idx);
521         trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
522         trunk = pool->cfg.malloc(0, trunk_size,
523                                  RTE_CACHE_LINE_SIZE, rte_socket_id());
524         if (unlikely(!trunk)) {
525                 pool->cfg.free(p);
526                 return 0;
527         }
528         trunk->idx = trunk_idx;
529         trunk->free = data_size;
530         mlx5_ipool_lock(pool);
531         /*
532          * Double check if trunks has been updated or have available index.
533          * During the new trunk allocate, index may still be flushed to the
534          * global cache. So also need to check the pool->gc->len.
535          */
536         if (pool->gc && (lc != pool->gc ||
537             lc->n_trunk_valid != trunk_idx ||
538             pool->gc->len)) {
539                 mlx5_ipool_unlock(pool);
540                 if (p)
541                         pool->cfg.free(p);
542                 pool->cfg.free(trunk);
543                 goto check_again;
544         }
545         /* Resize the trunk array and update local cache first.  */
546         if (p) {
547                 if (lc && !(--lc->ref_cnt))
548                         olc = lc;
549                 lc = p;
550                 lc->ref_cnt = 1;
551                 pool->cache[cidx]->lc = lc;
552                 __atomic_store_n(&pool->gc, p, __ATOMIC_RELAXED);
553         }
554         /* Add trunk to trunks array. */
555         lc->trunks[trunk_idx] = trunk;
556         __atomic_fetch_add(&lc->n_trunk_valid, 1, __ATOMIC_RELAXED);
557         /* Enqueue half of the index to global. */
558         ts_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx) + 1;
559         fetch_size = trunk->free >> 1;
560         for (i = 0; i < fetch_size; i++)
561                 lc->idx[i] = ts_idx + i;
562         lc->len = fetch_size;
563         mlx5_ipool_unlock(pool);
564         /* Copy left half - 1 to local cache index array. */
565         pool->cache[cidx]->len = trunk->free - fetch_size - 1;
566         ts_idx += fetch_size;
567         for (i = 0; i < pool->cache[cidx]->len; i++)
568                 pool->cache[cidx]->idx[i] = ts_idx + i;
569         if (olc)
570                 pool->cfg.free(olc);
571         return ts_idx + i;
572 }
573
574 static void *
575 _mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
576 {
577         struct mlx5_indexed_trunk *trunk;
578         struct mlx5_indexed_cache *lc;
579         uint32_t trunk_idx;
580         uint32_t entry_idx;
581
582         MLX5_ASSERT(idx);
583         if (unlikely(!pool->cache[cidx])) {
584                 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
585                         sizeof(struct mlx5_ipool_per_lcore) +
586                         (pool->cfg.per_core_cache * sizeof(uint32_t)),
587                         RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
588                 if (!pool->cache[cidx]) {
589                         DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
590                         return NULL;
591                 }
592         }
593         lc = mlx5_ipool_update_global_cache(pool, cidx);
594         idx -= 1;
595         trunk_idx = mlx5_trunk_idx_get(pool, idx);
596         trunk = lc->trunks[trunk_idx];
597         MLX5_ASSERT(trunk);
598         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk_idx);
599         return &trunk->data[entry_idx * pool->cfg.size];
600 }
601
602 static void *
603 mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
604 {
605         void *entry;
606         int cidx;
607
608         cidx = rte_lcore_index(rte_lcore_id());
609         if (unlikely(cidx == -1)) {
610                 cidx = RTE_MAX_LCORE;
611                 rte_spinlock_lock(&pool->lcore_lock);
612         }
613         entry = _mlx5_ipool_get_cache(pool, cidx, idx);
614         if (unlikely(cidx == RTE_MAX_LCORE))
615                 rte_spinlock_unlock(&pool->lcore_lock);
616         return entry;
617 }
618
619
620 static void *
621 _mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, int cidx,
622                          uint32_t *idx)
623 {
624         if (unlikely(!pool->cache[cidx])) {
625                 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
626                         sizeof(struct mlx5_ipool_per_lcore) +
627                         (pool->cfg.per_core_cache * sizeof(uint32_t)),
628                         RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
629                 if (!pool->cache[cidx]) {
630                         DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
631                         return NULL;
632                 }
633         } else if (pool->cache[cidx]->len) {
634                 pool->cache[cidx]->len--;
635                 *idx = pool->cache[cidx]->idx[pool->cache[cidx]->len];
636                 return _mlx5_ipool_get_cache(pool, cidx, *idx);
637         }
638         /* Not enough idx in global cache. Keep fetching from global. */
639         *idx = mlx5_ipool_allocate_from_global(pool, cidx);
640         if (unlikely(!(*idx)))
641                 return NULL;
642         return _mlx5_ipool_get_cache(pool, cidx, *idx);
643 }
644
645 static void *
646 mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, uint32_t *idx)
647 {
648         void *entry;
649         int cidx;
650
651         cidx = rte_lcore_index(rte_lcore_id());
652         if (unlikely(cidx == -1)) {
653                 cidx = RTE_MAX_LCORE;
654                 rte_spinlock_lock(&pool->lcore_lock);
655         }
656         entry = _mlx5_ipool_malloc_cache(pool, cidx, idx);
657         if (unlikely(cidx == RTE_MAX_LCORE))
658                 rte_spinlock_unlock(&pool->lcore_lock);
659         return entry;
660 }
661
662 static void
663 _mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
664 {
665         struct mlx5_ipool_per_lcore *ilc;
666         struct mlx5_indexed_cache *gc, *olc = NULL;
667         uint32_t reclaim_num = 0;
668
669         MLX5_ASSERT(idx);
670         /*
671          * When index was allocated on core A but freed on core B. In this
672          * case check if local cache on core B was allocated before.
673          */
674         if (unlikely(!pool->cache[cidx])) {
675                 pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
676                         sizeof(struct mlx5_ipool_per_lcore) +
677                         (pool->cfg.per_core_cache * sizeof(uint32_t)),
678                         RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
679                 if (!pool->cache[cidx]) {
680                         DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
681                         return;
682                 }
683         }
684         /* Try to enqueue to local index cache. */
685         if (pool->cache[cidx]->len < pool->cfg.per_core_cache) {
686                 pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
687                 pool->cache[cidx]->len++;
688                 return;
689         }
690         ilc = pool->cache[cidx];
691         reclaim_num = pool->cfg.per_core_cache >> 2;
692         ilc->len -= reclaim_num;
693         /* Local index cache full, try with global index cache. */
694         mlx5_ipool_lock(pool);
695         gc = pool->gc;
696         if (ilc->lc != gc) {
697                 if (!(--ilc->lc->ref_cnt))
698                         olc = ilc->lc;
699                 gc->ref_cnt++;
700                 ilc->lc = gc;
701         }
702         memcpy(&gc->idx[gc->len], &ilc->idx[ilc->len],
703                reclaim_num * sizeof(uint32_t));
704         gc->len += reclaim_num;
705         mlx5_ipool_unlock(pool);
706         if (olc)
707                 pool->cfg.free(olc);
708         pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
709         pool->cache[cidx]->len++;
710 }
711
712 static void
713 mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
714 {
715         int cidx;
716
717         cidx = rte_lcore_index(rte_lcore_id());
718         if (unlikely(cidx == -1)) {
719                 cidx = RTE_MAX_LCORE;
720                 rte_spinlock_lock(&pool->lcore_lock);
721         }
722         _mlx5_ipool_free_cache(pool, cidx, idx);
723         if (unlikely(cidx == RTE_MAX_LCORE))
724                 rte_spinlock_unlock(&pool->lcore_lock);
725 }
726
727 void *
728 mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
729 {
730         struct mlx5_indexed_trunk *trunk;
731         uint64_t slab = 0;
732         uint32_t iidx = 0;
733         void *p;
734
735         if (pool->cfg.per_core_cache)
736                 return mlx5_ipool_malloc_cache(pool, idx);
737         mlx5_ipool_lock(pool);
738         if (pool->free_list == TRUNK_INVALID) {
739                 /* If no available trunks, grow new. */
740                 if (mlx5_ipool_grow(pool)) {
741                         mlx5_ipool_unlock(pool);
742                         return NULL;
743                 }
744         }
745         MLX5_ASSERT(pool->free_list != TRUNK_INVALID);
746         trunk = pool->trunks[pool->free_list];
747         MLX5_ASSERT(trunk->free);
748         if (!rte_bitmap_scan(trunk->bmp, &iidx, &slab)) {
749                 mlx5_ipool_unlock(pool);
750                 return NULL;
751         }
752         MLX5_ASSERT(slab);
753         iidx += __builtin_ctzll(slab);
754         MLX5_ASSERT(iidx != UINT32_MAX);
755         MLX5_ASSERT(iidx < mlx5_trunk_size_get(pool, trunk->idx));
756         rte_bitmap_clear(trunk->bmp, iidx);
757         p = &trunk->data[iidx * pool->cfg.size];
758         /*
759          * The ipool index should grow continually from small to big,
760          * some features as metering only accept limited bits of index.
761          * Random index with MSB set may be rejected.
762          */
763         iidx += mlx5_trunk_idx_offset_get(pool, trunk->idx);
764         iidx += 1; /* non-zero index. */
765         trunk->free--;
766 #ifdef POOL_DEBUG
767         pool->n_entry++;
768 #endif
769         if (!trunk->free) {
770                 /* Full trunk will be removed from free list in imalloc. */
771                 MLX5_ASSERT(pool->free_list == trunk->idx);
772                 pool->free_list = trunk->next;
773                 if (trunk->next != TRUNK_INVALID)
774                         pool->trunks[trunk->next]->prev = TRUNK_INVALID;
775                 trunk->prev = TRUNK_INVALID;
776                 trunk->next = TRUNK_INVALID;
777 #ifdef POOL_DEBUG
778                 pool->trunk_empty++;
779                 pool->trunk_avail--;
780 #endif
781         }
782         *idx = iidx;
783         mlx5_ipool_unlock(pool);
784         return p;
785 }
786
787 void *
788 mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
789 {
790         void *entry = mlx5_ipool_malloc(pool, idx);
791
792         if (entry && pool->cfg.size)
793                 memset(entry, 0, pool->cfg.size);
794         return entry;
795 }
796
797 void
798 mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx)
799 {
800         struct mlx5_indexed_trunk *trunk;
801         uint32_t trunk_idx;
802         uint32_t entry_idx;
803
804         if (!idx)
805                 return;
806         if (pool->cfg.per_core_cache) {
807                 mlx5_ipool_free_cache(pool, idx);
808                 return;
809         }
810         idx -= 1;
811         mlx5_ipool_lock(pool);
812         trunk_idx = mlx5_trunk_idx_get(pool, idx);
813         if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
814             (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
815                 goto out;
816         trunk = pool->trunks[trunk_idx];
817         if (!trunk)
818                 goto out;
819         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
820         if (trunk_idx != trunk->idx ||
821             rte_bitmap_get(trunk->bmp, entry_idx))
822                 goto out;
823         rte_bitmap_set(trunk->bmp, entry_idx);
824         trunk->free++;
825         if (pool->cfg.release_mem_en && trunk->free == mlx5_trunk_size_get
826            (pool, trunk->idx)) {
827                 if (pool->free_list == trunk->idx)
828                         pool->free_list = trunk->next;
829                 if (trunk->next != TRUNK_INVALID)
830                         pool->trunks[trunk->next]->prev = trunk->prev;
831                 if (trunk->prev != TRUNK_INVALID)
832                         pool->trunks[trunk->prev]->next = trunk->next;
833                 pool->cfg.free(trunk);
834                 pool->trunks[trunk_idx] = NULL;
835                 pool->n_trunk_valid--;
836 #ifdef POOL_DEBUG
837                 pool->trunk_avail--;
838                 pool->trunk_free++;
839 #endif
840                 if (pool->n_trunk_valid == 0) {
841                         pool->cfg.free(pool->trunks);
842                         pool->trunks = NULL;
843                         pool->n_trunk = 0;
844                 }
845         } else if (trunk->free == 1) {
846                 /* Put into free trunk list head. */
847                 MLX5_ASSERT(pool->free_list != trunk->idx);
848                 trunk->next = pool->free_list;
849                 trunk->prev = TRUNK_INVALID;
850                 if (pool->free_list != TRUNK_INVALID)
851                         pool->trunks[pool->free_list]->prev = trunk->idx;
852                 pool->free_list = trunk->idx;
853 #ifdef POOL_DEBUG
854                 pool->trunk_empty--;
855                 pool->trunk_avail++;
856 #endif
857         }
858 #ifdef POOL_DEBUG
859         pool->n_entry--;
860 #endif
861 out:
862         mlx5_ipool_unlock(pool);
863 }
864
865 void *
866 mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx)
867 {
868         struct mlx5_indexed_trunk *trunk;
869         void *p = NULL;
870         uint32_t trunk_idx;
871         uint32_t entry_idx;
872
873         if (!idx)
874                 return NULL;
875         if (pool->cfg.per_core_cache)
876                 return mlx5_ipool_get_cache(pool, idx);
877         idx -= 1;
878         mlx5_ipool_lock(pool);
879         trunk_idx = mlx5_trunk_idx_get(pool, idx);
880         if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
881             (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
882                 goto out;
883         trunk = pool->trunks[trunk_idx];
884         if (!trunk)
885                 goto out;
886         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
887         if (trunk_idx != trunk->idx ||
888             rte_bitmap_get(trunk->bmp, entry_idx))
889                 goto out;
890         p = &trunk->data[entry_idx * pool->cfg.size];
891 out:
892         mlx5_ipool_unlock(pool);
893         return p;
894 }
895
896 int
897 mlx5_ipool_destroy(struct mlx5_indexed_pool *pool)
898 {
899         struct mlx5_indexed_trunk **trunks = NULL;
900         struct mlx5_indexed_cache *gc = pool->gc;
901         uint32_t i, n_trunk_valid = 0;
902
903         MLX5_ASSERT(pool);
904         mlx5_ipool_lock(pool);
905         if (pool->cfg.per_core_cache) {
906                 for (i = 0; i <= RTE_MAX_LCORE; i++) {
907                         /*
908                          * Free only old global cache. Pool gc will be
909                          * freed at last.
910                          */
911                         if (pool->cache[i]) {
912                                 if (pool->cache[i]->lc &&
913                                     pool->cache[i]->lc != pool->gc &&
914                                     (!(--pool->cache[i]->lc->ref_cnt)))
915                                         pool->cfg.free(pool->cache[i]->lc);
916                                 pool->cfg.free(pool->cache[i]);
917                         }
918                 }
919                 if (gc) {
920                         trunks = gc->trunks;
921                         n_trunk_valid = gc->n_trunk_valid;
922                 }
923         } else {
924                 gc = NULL;
925                 trunks = pool->trunks;
926                 n_trunk_valid = pool->n_trunk_valid;
927         }
928         for (i = 0; i < n_trunk_valid; i++) {
929                 if (trunks[i])
930                         pool->cfg.free(trunks[i]);
931         }
932         if (!gc && trunks)
933                 pool->cfg.free(trunks);
934         if (gc)
935                 pool->cfg.free(gc);
936         mlx5_ipool_unlock(pool);
937         mlx5_free(pool);
938         return 0;
939 }
940
941 void
942 mlx5_ipool_flush_cache(struct mlx5_indexed_pool *pool)
943 {
944         uint32_t i, j;
945         struct mlx5_indexed_cache *gc;
946         struct rte_bitmap *ibmp;
947         uint32_t bmp_num, mem_size;
948
949         if (!pool->cfg.per_core_cache)
950                 return;
951         gc = pool->gc;
952         if (!gc)
953                 return;
954         /* Reset bmp. */
955         bmp_num = mlx5_trunk_idx_offset_get(pool, gc->n_trunk_valid);
956         mem_size = rte_bitmap_get_memory_footprint(bmp_num);
957         pool->bmp_mem = pool->cfg.malloc(MLX5_MEM_ZERO, mem_size,
958                                          RTE_CACHE_LINE_SIZE, rte_socket_id());
959         if (!pool->bmp_mem) {
960                 DRV_LOG(ERR, "Ipool bitmap mem allocate failed.\n");
961                 return;
962         }
963         ibmp = rte_bitmap_init_with_all_set(bmp_num, pool->bmp_mem, mem_size);
964         if (!ibmp) {
965                 pool->cfg.free(pool->bmp_mem);
966                 pool->bmp_mem = NULL;
967                 DRV_LOG(ERR, "Ipool bitmap create failed.\n");
968                 return;
969         }
970         pool->ibmp = ibmp;
971         /* Clear global cache. */
972         for (i = 0; i < gc->len; i++)
973                 rte_bitmap_clear(ibmp, gc->idx[i] - 1);
974         /* Clear core cache. */
975         for (i = 0; i < RTE_MAX_LCORE + 1; i++) {
976                 struct mlx5_ipool_per_lcore *ilc = pool->cache[i];
977
978                 if (!ilc)
979                         continue;
980                 for (j = 0; j < ilc->len; j++)
981                         rte_bitmap_clear(ibmp, ilc->idx[j] - 1);
982         }
983 }
984
985 static void *
986 mlx5_ipool_get_next_cache(struct mlx5_indexed_pool *pool, uint32_t *pos)
987 {
988         struct rte_bitmap *ibmp;
989         uint64_t slab = 0;
990         uint32_t iidx = *pos;
991
992         ibmp = pool->ibmp;
993         if (!ibmp || !rte_bitmap_scan(ibmp, &iidx, &slab)) {
994                 if (pool->bmp_mem) {
995                         pool->cfg.free(pool->bmp_mem);
996                         pool->bmp_mem = NULL;
997                         pool->ibmp = NULL;
998                 }
999                 return NULL;
1000         }
1001         iidx += __builtin_ctzll(slab);
1002         rte_bitmap_clear(ibmp, iidx);
1003         iidx++;
1004         *pos = iidx;
1005         return mlx5_ipool_get_cache(pool, iidx);
1006 }
1007
1008 void *
1009 mlx5_ipool_get_next(struct mlx5_indexed_pool *pool, uint32_t *pos)
1010 {
1011         uint32_t idx = *pos;
1012         void *entry;
1013
1014         if (pool->cfg.per_core_cache)
1015                 return mlx5_ipool_get_next_cache(pool, pos);
1016         while (idx <= mlx5_trunk_idx_offset_get(pool, pool->n_trunk)) {
1017                 entry = mlx5_ipool_get(pool, idx);
1018                 if (entry) {
1019                         *pos = idx;
1020                         return entry;
1021                 }
1022                 idx++;
1023         }
1024         return NULL;
1025 }
1026
1027 void
1028 mlx5_ipool_dump(struct mlx5_indexed_pool *pool)
1029 {
1030         printf("Pool %s entry size %u, trunks %u, %d entry per trunk, "
1031                "total: %d\n",
1032                pool->cfg.type, pool->cfg.size, pool->n_trunk_valid,
1033                pool->cfg.trunk_size, pool->n_trunk_valid);
1034 #ifdef POOL_DEBUG
1035         printf("Pool %s entry %u, trunk alloc %u, empty: %u, "
1036                "available %u free %u\n",
1037                pool->cfg.type, pool->n_entry, pool->trunk_new,
1038                pool->trunk_empty, pool->trunk_avail, pool->trunk_free);
1039 #endif
1040 }
1041
1042 struct mlx5_l3t_tbl *
1043 mlx5_l3t_create(enum mlx5_l3t_type type)
1044 {
1045         struct mlx5_l3t_tbl *tbl;
1046         struct mlx5_indexed_pool_config l3t_ip_cfg = {
1047                 .trunk_size = 16,
1048                 .grow_trunk = 6,
1049                 .grow_shift = 1,
1050                 .need_lock = 0,
1051                 .release_mem_en = 1,
1052                 .malloc = mlx5_malloc,
1053                 .free = mlx5_free,
1054         };
1055
1056         if (type >= MLX5_L3T_TYPE_MAX) {
1057                 rte_errno = EINVAL;
1058                 return NULL;
1059         }
1060         tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_l3t_tbl), 1,
1061                           SOCKET_ID_ANY);
1062         if (!tbl) {
1063                 rte_errno = ENOMEM;
1064                 return NULL;
1065         }
1066         tbl->type = type;
1067         switch (type) {
1068         case MLX5_L3T_TYPE_WORD:
1069                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word);
1070                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
1071                 break;
1072         case MLX5_L3T_TYPE_DWORD:
1073                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword);
1074                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
1075                 break;
1076         case MLX5_L3T_TYPE_QWORD:
1077                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword);
1078                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
1079                 break;
1080         default:
1081                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr);
1082                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
1083                 break;
1084         }
1085         rte_spinlock_init(&tbl->sl);
1086         tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
1087         if (!tbl->eip) {
1088                 rte_errno = ENOMEM;
1089                 mlx5_free(tbl);
1090                 tbl = NULL;
1091         }
1092         return tbl;
1093 }
1094
1095 void
1096 mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl)
1097 {
1098         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1099         uint32_t i, j;
1100
1101         if (!tbl)
1102                 return;
1103         g_tbl = tbl->tbl;
1104         if (g_tbl) {
1105                 for (i = 0; i < MLX5_L3T_GT_SIZE; i++) {
1106                         m_tbl = g_tbl->tbl[i];
1107                         if (!m_tbl)
1108                                 continue;
1109                         for (j = 0; j < MLX5_L3T_MT_SIZE; j++) {
1110                                 if (!m_tbl->tbl[j])
1111                                         continue;
1112                                 MLX5_ASSERT(!((struct mlx5_l3t_entry_word *)
1113                                             m_tbl->tbl[j])->ref_cnt);
1114                                 mlx5_ipool_free(tbl->eip,
1115                                                 ((struct mlx5_l3t_entry_word *)
1116                                                 m_tbl->tbl[j])->idx);
1117                                 m_tbl->tbl[j] = 0;
1118                                 if (!(--m_tbl->ref_cnt))
1119                                         break;
1120                         }
1121                         MLX5_ASSERT(!m_tbl->ref_cnt);
1122                         mlx5_free(g_tbl->tbl[i]);
1123                         g_tbl->tbl[i] = 0;
1124                         if (!(--g_tbl->ref_cnt))
1125                                 break;
1126                 }
1127                 MLX5_ASSERT(!g_tbl->ref_cnt);
1128                 mlx5_free(tbl->tbl);
1129                 tbl->tbl = 0;
1130         }
1131         mlx5_ipool_destroy(tbl->eip);
1132         mlx5_free(tbl);
1133 }
1134
1135 static int32_t
1136 __l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1137                 union mlx5_l3t_data *data)
1138 {
1139         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1140         struct mlx5_l3t_entry_word *w_e_tbl;
1141         struct mlx5_l3t_entry_dword *dw_e_tbl;
1142         struct mlx5_l3t_entry_qword *qw_e_tbl;
1143         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1144         void *e_tbl;
1145         uint32_t entry_idx;
1146
1147         g_tbl = tbl->tbl;
1148         if (!g_tbl)
1149                 return -1;
1150         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1151         if (!m_tbl)
1152                 return -1;
1153         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1154         if (!e_tbl)
1155                 return -1;
1156         entry_idx = idx & MLX5_L3T_ET_MASK;
1157         switch (tbl->type) {
1158         case MLX5_L3T_TYPE_WORD:
1159                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1160                 data->word = w_e_tbl->entry[entry_idx].data;
1161                 if (w_e_tbl->entry[entry_idx].data)
1162                         w_e_tbl->entry[entry_idx].ref_cnt++;
1163                 break;
1164         case MLX5_L3T_TYPE_DWORD:
1165                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1166                 data->dword = dw_e_tbl->entry[entry_idx].data;
1167                 if (dw_e_tbl->entry[entry_idx].data)
1168                         dw_e_tbl->entry[entry_idx].ref_cnt++;
1169                 break;
1170         case MLX5_L3T_TYPE_QWORD:
1171                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1172                 data->qword = qw_e_tbl->entry[entry_idx].data;
1173                 if (qw_e_tbl->entry[entry_idx].data)
1174                         qw_e_tbl->entry[entry_idx].ref_cnt++;
1175                 break;
1176         default:
1177                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1178                 data->ptr = ptr_e_tbl->entry[entry_idx].data;
1179                 if (ptr_e_tbl->entry[entry_idx].data)
1180                         ptr_e_tbl->entry[entry_idx].ref_cnt++;
1181                 break;
1182         }
1183         return 0;
1184 }
1185
1186 int32_t
1187 mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1188                    union mlx5_l3t_data *data)
1189 {
1190         int ret;
1191
1192         rte_spinlock_lock(&tbl->sl);
1193         ret = __l3t_get_entry(tbl, idx, data);
1194         rte_spinlock_unlock(&tbl->sl);
1195         return ret;
1196 }
1197
1198 int32_t
1199 mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
1200 {
1201         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1202         struct mlx5_l3t_entry_word *w_e_tbl;
1203         struct mlx5_l3t_entry_dword *dw_e_tbl;
1204         struct mlx5_l3t_entry_qword *qw_e_tbl;
1205         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1206         void *e_tbl;
1207         uint32_t entry_idx;
1208         uint64_t ref_cnt;
1209         int32_t ret = -1;
1210
1211         rte_spinlock_lock(&tbl->sl);
1212         g_tbl = tbl->tbl;
1213         if (!g_tbl)
1214                 goto out;
1215         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1216         if (!m_tbl)
1217                 goto out;
1218         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1219         if (!e_tbl)
1220                 goto out;
1221         entry_idx = idx & MLX5_L3T_ET_MASK;
1222         switch (tbl->type) {
1223         case MLX5_L3T_TYPE_WORD:
1224                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1225                 MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt);
1226                 ret = --w_e_tbl->entry[entry_idx].ref_cnt;
1227                 if (ret)
1228                         goto out;
1229                 w_e_tbl->entry[entry_idx].data = 0;
1230                 ref_cnt = --w_e_tbl->ref_cnt;
1231                 break;
1232         case MLX5_L3T_TYPE_DWORD:
1233                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1234                 MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt);
1235                 ret = --dw_e_tbl->entry[entry_idx].ref_cnt;
1236                 if (ret)
1237                         goto out;
1238                 dw_e_tbl->entry[entry_idx].data = 0;
1239                 ref_cnt = --dw_e_tbl->ref_cnt;
1240                 break;
1241         case MLX5_L3T_TYPE_QWORD:
1242                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1243                 MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt);
1244                 ret = --qw_e_tbl->entry[entry_idx].ref_cnt;
1245                 if (ret)
1246                         goto out;
1247                 qw_e_tbl->entry[entry_idx].data = 0;
1248                 ref_cnt = --qw_e_tbl->ref_cnt;
1249                 break;
1250         default:
1251                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1252                 MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt);
1253                 ret = --ptr_e_tbl->entry[entry_idx].ref_cnt;
1254                 if (ret)
1255                         goto out;
1256                 ptr_e_tbl->entry[entry_idx].data = NULL;
1257                 ref_cnt = --ptr_e_tbl->ref_cnt;
1258                 break;
1259         }
1260         if (!ref_cnt) {
1261                 mlx5_ipool_free(tbl->eip,
1262                                 ((struct mlx5_l3t_entry_word *)e_tbl)->idx);
1263                 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1264                                                                         NULL;
1265                 if (!(--m_tbl->ref_cnt)) {
1266                         mlx5_free(m_tbl);
1267                         g_tbl->tbl
1268                         [(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] = NULL;
1269                         if (!(--g_tbl->ref_cnt)) {
1270                                 mlx5_free(g_tbl);
1271                                 tbl->tbl = 0;
1272                         }
1273                 }
1274         }
1275 out:
1276         rte_spinlock_unlock(&tbl->sl);
1277         return ret;
1278 }
1279
1280 static int32_t
1281 __l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1282                 union mlx5_l3t_data *data)
1283 {
1284         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1285         struct mlx5_l3t_entry_word *w_e_tbl;
1286         struct mlx5_l3t_entry_dword *dw_e_tbl;
1287         struct mlx5_l3t_entry_qword *qw_e_tbl;
1288         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1289         void *e_tbl;
1290         uint32_t entry_idx, tbl_idx = 0;
1291
1292         /* Check the global table, create it if empty. */
1293         g_tbl = tbl->tbl;
1294         if (!g_tbl) {
1295                 g_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1296                                     sizeof(struct mlx5_l3t_level_tbl) +
1297                                     sizeof(void *) * MLX5_L3T_GT_SIZE, 1,
1298                                     SOCKET_ID_ANY);
1299                 if (!g_tbl) {
1300                         rte_errno = ENOMEM;
1301                         return -1;
1302                 }
1303                 tbl->tbl = g_tbl;
1304         }
1305         /*
1306          * Check the middle table, create it if empty. Ref_cnt will be
1307          * increased if new sub table created.
1308          */
1309         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1310         if (!m_tbl) {
1311                 m_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1312                                     sizeof(struct mlx5_l3t_level_tbl) +
1313                                     sizeof(void *) * MLX5_L3T_MT_SIZE, 1,
1314                                     SOCKET_ID_ANY);
1315                 if (!m_tbl) {
1316                         rte_errno = ENOMEM;
1317                         return -1;
1318                 }
1319                 g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] =
1320                                                                         m_tbl;
1321                 g_tbl->ref_cnt++;
1322         }
1323         /*
1324          * Check the entry table, create it if empty. Ref_cnt will be
1325          * increased if new sub entry table created.
1326          */
1327         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1328         if (!e_tbl) {
1329                 e_tbl = mlx5_ipool_zmalloc(tbl->eip, &tbl_idx);
1330                 if (!e_tbl) {
1331                         rte_errno = ENOMEM;
1332                         return -1;
1333                 }
1334                 ((struct mlx5_l3t_entry_word *)e_tbl)->idx = tbl_idx;
1335                 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1336                                                                         e_tbl;
1337                 m_tbl->ref_cnt++;
1338         }
1339         entry_idx = idx & MLX5_L3T_ET_MASK;
1340         switch (tbl->type) {
1341         case MLX5_L3T_TYPE_WORD:
1342                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1343                 if (w_e_tbl->entry[entry_idx].data) {
1344                         data->word = w_e_tbl->entry[entry_idx].data;
1345                         w_e_tbl->entry[entry_idx].ref_cnt++;
1346                         rte_errno = EEXIST;
1347                         return -1;
1348                 }
1349                 w_e_tbl->entry[entry_idx].data = data->word;
1350                 w_e_tbl->entry[entry_idx].ref_cnt = 1;
1351                 w_e_tbl->ref_cnt++;
1352                 break;
1353         case MLX5_L3T_TYPE_DWORD:
1354                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1355                 if (dw_e_tbl->entry[entry_idx].data) {
1356                         data->dword = dw_e_tbl->entry[entry_idx].data;
1357                         dw_e_tbl->entry[entry_idx].ref_cnt++;
1358                         rte_errno = EEXIST;
1359                         return -1;
1360                 }
1361                 dw_e_tbl->entry[entry_idx].data = data->dword;
1362                 dw_e_tbl->entry[entry_idx].ref_cnt = 1;
1363                 dw_e_tbl->ref_cnt++;
1364                 break;
1365         case MLX5_L3T_TYPE_QWORD:
1366                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1367                 if (qw_e_tbl->entry[entry_idx].data) {
1368                         data->qword = qw_e_tbl->entry[entry_idx].data;
1369                         qw_e_tbl->entry[entry_idx].ref_cnt++;
1370                         rte_errno = EEXIST;
1371                         return -1;
1372                 }
1373                 qw_e_tbl->entry[entry_idx].data = data->qword;
1374                 qw_e_tbl->entry[entry_idx].ref_cnt = 1;
1375                 qw_e_tbl->ref_cnt++;
1376                 break;
1377         default:
1378                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1379                 if (ptr_e_tbl->entry[entry_idx].data) {
1380                         data->ptr = ptr_e_tbl->entry[entry_idx].data;
1381                         ptr_e_tbl->entry[entry_idx].ref_cnt++;
1382                         rte_errno = EEXIST;
1383                         return -1;
1384                 }
1385                 ptr_e_tbl->entry[entry_idx].data = data->ptr;
1386                 ptr_e_tbl->entry[entry_idx].ref_cnt = 1;
1387                 ptr_e_tbl->ref_cnt++;
1388                 break;
1389         }
1390         return 0;
1391 }
1392
1393 int32_t
1394 mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1395                    union mlx5_l3t_data *data)
1396 {
1397         int ret;
1398
1399         rte_spinlock_lock(&tbl->sl);
1400         ret = __l3t_set_entry(tbl, idx, data);
1401         rte_spinlock_unlock(&tbl->sl);
1402         return ret;
1403 }
1404
1405 int32_t
1406 mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1407                        union mlx5_l3t_data *data,
1408                        mlx5_l3t_alloc_callback_fn cb, void *ctx)
1409 {
1410         int32_t ret;
1411
1412         rte_spinlock_lock(&tbl->sl);
1413         /* Check if entry data is ready. */
1414         ret = __l3t_get_entry(tbl, idx, data);
1415         if (!ret) {
1416                 switch (tbl->type) {
1417                 case MLX5_L3T_TYPE_WORD:
1418                         if (data->word)
1419                                 goto out;
1420                         break;
1421                 case MLX5_L3T_TYPE_DWORD:
1422                         if (data->dword)
1423                                 goto out;
1424                         break;
1425                 case MLX5_L3T_TYPE_QWORD:
1426                         if (data->qword)
1427                                 goto out;
1428                         break;
1429                 default:
1430                         if (data->ptr)
1431                                 goto out;
1432                         break;
1433                 }
1434         }
1435         /* Entry data is not ready, use user callback to create it. */
1436         ret = cb(ctx, data);
1437         if (ret)
1438                 goto out;
1439         /* Save the new allocated data to entry. */
1440         ret = __l3t_set_entry(tbl, idx, data);
1441 out:
1442         rte_spinlock_unlock(&tbl->sl);
1443         return ret;
1444 }