net/mlx5: support concurrent access for hash list
[dpdk.git] / drivers / net / mlx5 / mlx5_utils.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4
5 #include <rte_malloc.h>
6 #include <rte_hash_crc.h>
7
8 #include <mlx5_malloc.h>
9
10 #include "mlx5_utils.h"
11
12 /********************* Hash List **********************/
13
14 static struct mlx5_hlist_entry *
15 mlx5_hlist_default_create_cb(struct mlx5_hlist *h, uint64_t key __rte_unused,
16                              void *ctx __rte_unused)
17 {
18         return mlx5_malloc(MLX5_MEM_ZERO, h->entry_sz, 0, SOCKET_ID_ANY);
19 }
20
21 static void
22 mlx5_hlist_default_remove_cb(struct mlx5_hlist *h __rte_unused,
23                              struct mlx5_hlist_entry *entry)
24 {
25         mlx5_free(entry);
26 }
27
28 static int
29 mlx5_hlist_default_match_cb(struct mlx5_hlist *h __rte_unused,
30                             struct mlx5_hlist_entry *entry,
31                             uint64_t key, void *ctx __rte_unused)
32 {
33         return entry->key != key;
34 }
35
36 struct mlx5_hlist *
37 mlx5_hlist_create(const char *name, uint32_t size, uint32_t entry_size,
38                   uint32_t flags, mlx5_hlist_create_cb cb_create,
39                   mlx5_hlist_match_cb cb_match, mlx5_hlist_remove_cb cb_remove)
40 {
41         struct mlx5_hlist *h;
42         uint32_t act_size;
43         uint32_t alloc_size;
44
45         if (!size || (!cb_create ^ !cb_remove))
46                 return NULL;
47         /* Align to the next power of 2, 32bits integer is enough now. */
48         if (!rte_is_power_of_2(size)) {
49                 act_size = rte_align32pow2(size);
50                 DRV_LOG(WARNING, "Size 0x%" PRIX32 " is not power of 2, will "
51                         "be aligned to 0x%" PRIX32 ".", size, act_size);
52         } else {
53                 act_size = size;
54         }
55         alloc_size = sizeof(struct mlx5_hlist) +
56                      sizeof(struct mlx5_hlist_head) * act_size;
57         /* Using zmalloc, then no need to initialize the heads. */
58         h = mlx5_malloc(MLX5_MEM_ZERO, alloc_size, RTE_CACHE_LINE_SIZE,
59                         SOCKET_ID_ANY);
60         if (!h) {
61                 DRV_LOG(ERR, "No memory for hash list %s creation",
62                         name ? name : "None");
63                 return NULL;
64         }
65         if (name)
66                 snprintf(h->name, MLX5_HLIST_NAMESIZE, "%s", name);
67         h->table_sz = act_size;
68         h->mask = act_size - 1;
69         h->entry_sz = entry_size;
70         h->direct_key = !!(flags & MLX5_HLIST_DIRECT_KEY);
71         h->write_most = !!(flags & MLX5_HLIST_WRITE_MOST);
72         h->cb_create = cb_create ? cb_create : mlx5_hlist_default_create_cb;
73         h->cb_match = cb_match ? cb_match : mlx5_hlist_default_match_cb;
74         h->cb_remove = cb_remove ? cb_remove : mlx5_hlist_default_remove_cb;
75         rte_rwlock_init(&h->lock);
76         DRV_LOG(DEBUG, "Hash list with %s size 0x%" PRIX32 " is created.",
77                 h->name, act_size);
78         return h;
79 }
80
81 static struct mlx5_hlist_entry *
82 __hlist_lookup(struct mlx5_hlist *h, uint64_t key, void *ctx, bool reuse)
83 {
84         uint32_t idx;
85         struct mlx5_hlist_head *first;
86         struct mlx5_hlist_entry *node;
87
88         MLX5_ASSERT(h);
89         if (h->direct_key)
90                 idx = (uint32_t)(key & h->mask);
91         else
92                 idx = rte_hash_crc_8byte(key, 0) & h->mask;
93         first = &h->heads[idx];
94         LIST_FOREACH(node, first, next) {
95                 if (!h->cb_match(h, node, key, ctx)) {
96                         if (reuse) {
97                                 __atomic_add_fetch(&node->ref_cnt, 1,
98                                                    __ATOMIC_RELAXED);
99                                 DRV_LOG(DEBUG, "Hash list %s entry %p "
100                                         "reuse: %u.",
101                                         h->name, (void *)node, node->ref_cnt);
102                         }
103                         break;
104                 }
105         }
106         return node;
107 }
108
109 static struct mlx5_hlist_entry *
110 hlist_lookup(struct mlx5_hlist *h, uint64_t key, void *ctx, bool reuse)
111 {
112         struct mlx5_hlist_entry *node;
113
114         MLX5_ASSERT(h);
115         rte_rwlock_read_lock(&h->lock);
116         node = __hlist_lookup(h, key, ctx, reuse);
117         rte_rwlock_read_unlock(&h->lock);
118         return node;
119 }
120
121 struct mlx5_hlist_entry *
122 mlx5_hlist_lookup(struct mlx5_hlist *h, uint64_t key, void *ctx)
123 {
124         return hlist_lookup(h, key, ctx, false);
125 }
126
127 struct mlx5_hlist_entry*
128 mlx5_hlist_register(struct mlx5_hlist *h, uint64_t key, void *ctx)
129 {
130         uint32_t idx;
131         struct mlx5_hlist_head *first;
132         struct mlx5_hlist_entry *entry;
133         uint32_t prev_gen_cnt = 0;
134
135         MLX5_ASSERT(h && entry);
136         /* Use write lock directly for write-most list. */
137         if (!h->write_most) {
138                 prev_gen_cnt = __atomic_load_n(&h->gen_cnt, __ATOMIC_ACQUIRE);
139                 entry = hlist_lookup(h, key, ctx, true);
140                 if (entry)
141                         return entry;
142         }
143         rte_rwlock_write_lock(&h->lock);
144         /* Check if the list changed by other threads. */
145         if (h->write_most ||
146             prev_gen_cnt != __atomic_load_n(&h->gen_cnt, __ATOMIC_ACQUIRE)) {
147                 entry = __hlist_lookup(h, key, ctx, true);
148                 if (entry)
149                         goto done;
150         }
151         if (h->direct_key)
152                 idx = (uint32_t)(key & h->mask);
153         else
154                 idx = rte_hash_crc_8byte(key, 0) & h->mask;
155         first = &h->heads[idx];
156         entry = h->cb_create(h, key, ctx);
157         if (!entry) {
158                 rte_errno = ENOMEM;
159                 DRV_LOG(ERR, "Can't allocate hash list %s entry.", h->name);
160                 goto done;
161         }
162         entry->key = key;
163         entry->ref_cnt = 1;
164         LIST_INSERT_HEAD(first, entry, next);
165         __atomic_add_fetch(&h->gen_cnt, 1, __ATOMIC_ACQ_REL);
166         DRV_LOG(DEBUG, "Hash list %s entry %p new: %u.",
167                 h->name, (void *)entry, entry->ref_cnt);
168 done:
169         rte_rwlock_write_unlock(&h->lock);
170         return entry;
171 }
172
173 struct mlx5_hlist_entry *
174 mlx5_hlist_lookup_ex(struct mlx5_hlist *h, uint64_t key,
175                      mlx5_hlist_match_callback_fn cb, void *ctx)
176 {
177         uint32_t idx;
178         struct mlx5_hlist_head *first;
179         struct mlx5_hlist_entry *node;
180
181         MLX5_ASSERT(h && cb && ctx);
182         idx = rte_hash_crc_8byte(key, 0) & h->mask;
183         first = &h->heads[idx];
184         LIST_FOREACH(node, first, next) {
185                 if (!cb(node, ctx))
186                         return node;
187         }
188         return NULL;
189 }
190
191 int
192 mlx5_hlist_insert_ex(struct mlx5_hlist *h, struct mlx5_hlist_entry *entry,
193                      mlx5_hlist_match_callback_fn cb, void *ctx)
194 {
195         uint32_t idx;
196         struct mlx5_hlist_head *first;
197         struct mlx5_hlist_entry *node;
198
199         MLX5_ASSERT(h && entry && cb && ctx);
200         idx = rte_hash_crc_8byte(entry->key, 0) & h->mask;
201         first = &h->heads[idx];
202         /* No need to reuse the lookup function. */
203         LIST_FOREACH(node, first, next) {
204                 if (!cb(node, ctx))
205                         return -EEXIST;
206         }
207         LIST_INSERT_HEAD(first, entry, next);
208         return 0;
209 }
210
211 int
212 mlx5_hlist_unregister(struct mlx5_hlist *h, struct mlx5_hlist_entry *entry)
213 {
214         rte_rwlock_write_lock(&h->lock);
215         MLX5_ASSERT(entry && entry->ref_cnt && entry->next.le_prev);
216         DRV_LOG(DEBUG, "Hash list %s entry %p deref: %u.",
217                 h->name, (void *)entry, entry->ref_cnt);
218         if (--entry->ref_cnt) {
219                 rte_rwlock_write_unlock(&h->lock);
220                 return 1;
221         }
222         LIST_REMOVE(entry, next);
223         /* Set to NULL to get rid of removing action for more than once. */
224         entry->next.le_prev = NULL;
225         h->cb_remove(h, entry);
226         rte_rwlock_write_unlock(&h->lock);
227         DRV_LOG(DEBUG, "Hash list %s entry %p removed.",
228                 h->name, (void *)entry);
229         return 0;
230 }
231
232 void
233 mlx5_hlist_destroy(struct mlx5_hlist *h)
234 {
235         uint32_t idx;
236         struct mlx5_hlist_entry *entry;
237
238         MLX5_ASSERT(h);
239         for (idx = 0; idx < h->table_sz; ++idx) {
240                 /* No LIST_FOREACH_SAFE, using while instead. */
241                 while (!LIST_EMPTY(&h->heads[idx])) {
242                         entry = LIST_FIRST(&h->heads[idx]);
243                         LIST_REMOVE(entry, next);
244                         /*
245                          * The owner of whole element which contains data entry
246                          * is the user, so it's the user's duty to do the clean
247                          * up and the free work because someone may not put the
248                          * hlist entry at the beginning(suggested to locate at
249                          * the beginning). Or else the default free function
250                          * will be used.
251                          */
252                         h->cb_remove(h, entry);
253                 }
254         }
255         mlx5_free(h);
256 }
257
258 /********************* Indexed pool **********************/
259
260 static inline void
261 mlx5_ipool_lock(struct mlx5_indexed_pool *pool)
262 {
263         if (pool->cfg.need_lock)
264                 rte_spinlock_lock(&pool->lock);
265 }
266
267 static inline void
268 mlx5_ipool_unlock(struct mlx5_indexed_pool *pool)
269 {
270         if (pool->cfg.need_lock)
271                 rte_spinlock_unlock(&pool->lock);
272 }
273
274 static inline uint32_t
275 mlx5_trunk_idx_get(struct mlx5_indexed_pool *pool, uint32_t entry_idx)
276 {
277         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
278         uint32_t trunk_idx = 0;
279         uint32_t i;
280
281         if (!cfg->grow_trunk)
282                 return entry_idx / cfg->trunk_size;
283         if (entry_idx >= pool->grow_tbl[cfg->grow_trunk - 1]) {
284                 trunk_idx = (entry_idx - pool->grow_tbl[cfg->grow_trunk - 1]) /
285                             (cfg->trunk_size << (cfg->grow_shift *
286                             cfg->grow_trunk)) + cfg->grow_trunk;
287         } else {
288                 for (i = 0; i < cfg->grow_trunk; i++) {
289                         if (entry_idx < pool->grow_tbl[i])
290                                 break;
291                 }
292                 trunk_idx = i;
293         }
294         return trunk_idx;
295 }
296
297 static inline uint32_t
298 mlx5_trunk_size_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
299 {
300         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
301
302         return cfg->trunk_size << (cfg->grow_shift *
303                (trunk_idx > cfg->grow_trunk ? cfg->grow_trunk : trunk_idx));
304 }
305
306 static inline uint32_t
307 mlx5_trunk_idx_offset_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
308 {
309         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
310         uint32_t offset = 0;
311
312         if (!trunk_idx)
313                 return 0;
314         if (!cfg->grow_trunk)
315                 return cfg->trunk_size * trunk_idx;
316         if (trunk_idx < cfg->grow_trunk)
317                 offset = pool->grow_tbl[trunk_idx - 1];
318         else
319                 offset = pool->grow_tbl[cfg->grow_trunk - 1] +
320                          (cfg->trunk_size << (cfg->grow_shift *
321                          cfg->grow_trunk)) * (trunk_idx - cfg->grow_trunk);
322         return offset;
323 }
324
325 struct mlx5_indexed_pool *
326 mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg)
327 {
328         struct mlx5_indexed_pool *pool;
329         uint32_t i;
330
331         if (!cfg || (!cfg->malloc ^ !cfg->free) ||
332             (cfg->trunk_size && ((cfg->trunk_size & (cfg->trunk_size - 1)) ||
333             ((__builtin_ffs(cfg->trunk_size) + TRUNK_IDX_BITS) > 32))))
334                 return NULL;
335         pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool) + cfg->grow_trunk *
336                            sizeof(pool->grow_tbl[0]), RTE_CACHE_LINE_SIZE,
337                            SOCKET_ID_ANY);
338         if (!pool)
339                 return NULL;
340         pool->cfg = *cfg;
341         if (!pool->cfg.trunk_size)
342                 pool->cfg.trunk_size = MLX5_IPOOL_DEFAULT_TRUNK_SIZE;
343         if (!cfg->malloc && !cfg->free) {
344                 pool->cfg.malloc = mlx5_malloc;
345                 pool->cfg.free = mlx5_free;
346         }
347         pool->free_list = TRUNK_INVALID;
348         if (pool->cfg.need_lock)
349                 rte_spinlock_init(&pool->lock);
350         /*
351          * Initialize the dynamic grow trunk size lookup table to have a quick
352          * lookup for the trunk entry index offset.
353          */
354         for (i = 0; i < cfg->grow_trunk; i++) {
355                 pool->grow_tbl[i] = cfg->trunk_size << (cfg->grow_shift * i);
356                 if (i > 0)
357                         pool->grow_tbl[i] += pool->grow_tbl[i - 1];
358         }
359         return pool;
360 }
361
362 static int
363 mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
364 {
365         struct mlx5_indexed_trunk *trunk;
366         struct mlx5_indexed_trunk **trunk_tmp;
367         struct mlx5_indexed_trunk **p;
368         size_t trunk_size = 0;
369         size_t data_size;
370         size_t bmp_size;
371         uint32_t idx;
372
373         if (pool->n_trunk_valid == TRUNK_MAX_IDX)
374                 return -ENOMEM;
375         if (pool->n_trunk_valid == pool->n_trunk) {
376                 /* No free trunk flags, expand trunk list. */
377                 int n_grow = pool->n_trunk_valid ? pool->n_trunk :
378                              RTE_CACHE_LINE_SIZE / sizeof(void *);
379
380                 p = pool->cfg.malloc(0, (pool->n_trunk_valid + n_grow) *
381                                      sizeof(struct mlx5_indexed_trunk *),
382                                      RTE_CACHE_LINE_SIZE, rte_socket_id());
383                 if (!p)
384                         return -ENOMEM;
385                 if (pool->trunks)
386                         memcpy(p, pool->trunks, pool->n_trunk_valid *
387                                sizeof(struct mlx5_indexed_trunk *));
388                 memset(RTE_PTR_ADD(p, pool->n_trunk_valid * sizeof(void *)), 0,
389                        n_grow * sizeof(void *));
390                 trunk_tmp = pool->trunks;
391                 pool->trunks = p;
392                 if (trunk_tmp)
393                         pool->cfg.free(trunk_tmp);
394                 pool->n_trunk += n_grow;
395         }
396         if (!pool->cfg.release_mem_en) {
397                 idx = pool->n_trunk_valid;
398         } else {
399                 /* Find the first available slot in trunk list */
400                 for (idx = 0; idx < pool->n_trunk; idx++)
401                         if (pool->trunks[idx] == NULL)
402                                 break;
403         }
404         trunk_size += sizeof(*trunk);
405         data_size = mlx5_trunk_size_get(pool, idx);
406         bmp_size = rte_bitmap_get_memory_footprint(data_size);
407         /* rte_bitmap requires memory cacheline aligned. */
408         trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
409         trunk_size += bmp_size;
410         trunk = pool->cfg.malloc(0, trunk_size,
411                                  RTE_CACHE_LINE_SIZE, rte_socket_id());
412         if (!trunk)
413                 return -ENOMEM;
414         pool->trunks[idx] = trunk;
415         trunk->idx = idx;
416         trunk->free = data_size;
417         trunk->prev = TRUNK_INVALID;
418         trunk->next = TRUNK_INVALID;
419         MLX5_ASSERT(pool->free_list == TRUNK_INVALID);
420         pool->free_list = idx;
421         /* Mark all entries as available. */
422         trunk->bmp = rte_bitmap_init_with_all_set(data_size, &trunk->data
423                      [RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size)],
424                      bmp_size);
425         MLX5_ASSERT(trunk->bmp);
426         pool->n_trunk_valid++;
427 #ifdef POOL_DEBUG
428         pool->trunk_new++;
429         pool->trunk_avail++;
430 #endif
431         return 0;
432 }
433
434 void *
435 mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
436 {
437         struct mlx5_indexed_trunk *trunk;
438         uint64_t slab = 0;
439         uint32_t iidx = 0;
440         void *p;
441
442         mlx5_ipool_lock(pool);
443         if (pool->free_list == TRUNK_INVALID) {
444                 /* If no available trunks, grow new. */
445                 if (mlx5_ipool_grow(pool)) {
446                         mlx5_ipool_unlock(pool);
447                         return NULL;
448                 }
449         }
450         MLX5_ASSERT(pool->free_list != TRUNK_INVALID);
451         trunk = pool->trunks[pool->free_list];
452         MLX5_ASSERT(trunk->free);
453         if (!rte_bitmap_scan(trunk->bmp, &iidx, &slab)) {
454                 mlx5_ipool_unlock(pool);
455                 return NULL;
456         }
457         MLX5_ASSERT(slab);
458         iidx += __builtin_ctzll(slab);
459         MLX5_ASSERT(iidx != UINT32_MAX);
460         MLX5_ASSERT(iidx < mlx5_trunk_size_get(pool, trunk->idx));
461         rte_bitmap_clear(trunk->bmp, iidx);
462         p = &trunk->data[iidx * pool->cfg.size];
463         /*
464          * The ipool index should grow continually from small to big,
465          * some features as metering only accept limited bits of index.
466          * Random index with MSB set may be rejected.
467          */
468         iidx += mlx5_trunk_idx_offset_get(pool, trunk->idx);
469         iidx += 1; /* non-zero index. */
470         trunk->free--;
471 #ifdef POOL_DEBUG
472         pool->n_entry++;
473 #endif
474         if (!trunk->free) {
475                 /* Full trunk will be removed from free list in imalloc. */
476                 MLX5_ASSERT(pool->free_list == trunk->idx);
477                 pool->free_list = trunk->next;
478                 if (trunk->next != TRUNK_INVALID)
479                         pool->trunks[trunk->next]->prev = TRUNK_INVALID;
480                 trunk->prev = TRUNK_INVALID;
481                 trunk->next = TRUNK_INVALID;
482 #ifdef POOL_DEBUG
483                 pool->trunk_empty++;
484                 pool->trunk_avail--;
485 #endif
486         }
487         *idx = iidx;
488         mlx5_ipool_unlock(pool);
489         return p;
490 }
491
492 void *
493 mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
494 {
495         void *entry = mlx5_ipool_malloc(pool, idx);
496
497         if (entry && pool->cfg.size)
498                 memset(entry, 0, pool->cfg.size);
499         return entry;
500 }
501
502 void
503 mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx)
504 {
505         struct mlx5_indexed_trunk *trunk;
506         uint32_t trunk_idx;
507         uint32_t entry_idx;
508
509         if (!idx)
510                 return;
511         idx -= 1;
512         mlx5_ipool_lock(pool);
513         trunk_idx = mlx5_trunk_idx_get(pool, idx);
514         if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
515             (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
516                 goto out;
517         trunk = pool->trunks[trunk_idx];
518         if (!trunk)
519                 goto out;
520         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
521         if (trunk_idx != trunk->idx ||
522             rte_bitmap_get(trunk->bmp, entry_idx))
523                 goto out;
524         rte_bitmap_set(trunk->bmp, entry_idx);
525         trunk->free++;
526         if (pool->cfg.release_mem_en && trunk->free == mlx5_trunk_size_get
527            (pool, trunk->idx)) {
528                 if (pool->free_list == trunk->idx)
529                         pool->free_list = trunk->next;
530                 if (trunk->next != TRUNK_INVALID)
531                         pool->trunks[trunk->next]->prev = trunk->prev;
532                 if (trunk->prev != TRUNK_INVALID)
533                         pool->trunks[trunk->prev]->next = trunk->next;
534                 pool->cfg.free(trunk);
535                 pool->trunks[trunk_idx] = NULL;
536                 pool->n_trunk_valid--;
537 #ifdef POOL_DEBUG
538                 pool->trunk_avail--;
539                 pool->trunk_free++;
540 #endif
541                 if (pool->n_trunk_valid == 0) {
542                         pool->cfg.free(pool->trunks);
543                         pool->trunks = NULL;
544                         pool->n_trunk = 0;
545                 }
546         } else if (trunk->free == 1) {
547                 /* Put into free trunk list head. */
548                 MLX5_ASSERT(pool->free_list != trunk->idx);
549                 trunk->next = pool->free_list;
550                 trunk->prev = TRUNK_INVALID;
551                 if (pool->free_list != TRUNK_INVALID)
552                         pool->trunks[pool->free_list]->prev = trunk->idx;
553                 pool->free_list = trunk->idx;
554 #ifdef POOL_DEBUG
555                 pool->trunk_empty--;
556                 pool->trunk_avail++;
557 #endif
558         }
559 #ifdef POOL_DEBUG
560         pool->n_entry--;
561 #endif
562 out:
563         mlx5_ipool_unlock(pool);
564 }
565
566 void *
567 mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx)
568 {
569         struct mlx5_indexed_trunk *trunk;
570         void *p = NULL;
571         uint32_t trunk_idx;
572         uint32_t entry_idx;
573
574         if (!idx)
575                 return NULL;
576         idx -= 1;
577         mlx5_ipool_lock(pool);
578         trunk_idx = mlx5_trunk_idx_get(pool, idx);
579         if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
580             (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
581                 goto out;
582         trunk = pool->trunks[trunk_idx];
583         if (!trunk)
584                 goto out;
585         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
586         if (trunk_idx != trunk->idx ||
587             rte_bitmap_get(trunk->bmp, entry_idx))
588                 goto out;
589         p = &trunk->data[entry_idx * pool->cfg.size];
590 out:
591         mlx5_ipool_unlock(pool);
592         return p;
593 }
594
595 int
596 mlx5_ipool_destroy(struct mlx5_indexed_pool *pool)
597 {
598         struct mlx5_indexed_trunk **trunks;
599         uint32_t i;
600
601         MLX5_ASSERT(pool);
602         mlx5_ipool_lock(pool);
603         trunks = pool->trunks;
604         for (i = 0; i < pool->n_trunk; i++) {
605                 if (trunks[i])
606                         pool->cfg.free(trunks[i]);
607         }
608         if (!pool->trunks)
609                 pool->cfg.free(pool->trunks);
610         mlx5_ipool_unlock(pool);
611         mlx5_free(pool);
612         return 0;
613 }
614
615 void
616 mlx5_ipool_dump(struct mlx5_indexed_pool *pool)
617 {
618         printf("Pool %s entry size %u, trunks %u, %d entry per trunk, "
619                "total: %d\n",
620                pool->cfg.type, pool->cfg.size, pool->n_trunk_valid,
621                pool->cfg.trunk_size, pool->n_trunk_valid);
622 #ifdef POOL_DEBUG
623         printf("Pool %s entry %u, trunk alloc %u, empty: %u, "
624                "available %u free %u\n",
625                pool->cfg.type, pool->n_entry, pool->trunk_new,
626                pool->trunk_empty, pool->trunk_avail, pool->trunk_free);
627 #endif
628 }
629
630 struct mlx5_l3t_tbl *
631 mlx5_l3t_create(enum mlx5_l3t_type type)
632 {
633         struct mlx5_l3t_tbl *tbl;
634         struct mlx5_indexed_pool_config l3t_ip_cfg = {
635                 .trunk_size = 16,
636                 .grow_trunk = 6,
637                 .grow_shift = 1,
638                 .need_lock = 0,
639                 .release_mem_en = 1,
640                 .malloc = mlx5_malloc,
641                 .free = mlx5_free,
642         };
643
644         if (type >= MLX5_L3T_TYPE_MAX) {
645                 rte_errno = EINVAL;
646                 return NULL;
647         }
648         tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_l3t_tbl), 1,
649                           SOCKET_ID_ANY);
650         if (!tbl) {
651                 rte_errno = ENOMEM;
652                 return NULL;
653         }
654         tbl->type = type;
655         switch (type) {
656         case MLX5_L3T_TYPE_WORD:
657                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word);
658                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
659                 break;
660         case MLX5_L3T_TYPE_DWORD:
661                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword);
662                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
663                 break;
664         case MLX5_L3T_TYPE_QWORD:
665                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword);
666                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
667                 break;
668         default:
669                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr);
670                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
671                 break;
672         }
673         rte_spinlock_init(&tbl->sl);
674         tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
675         if (!tbl->eip) {
676                 rte_errno = ENOMEM;
677                 mlx5_free(tbl);
678                 tbl = NULL;
679         }
680         return tbl;
681 }
682
683 void
684 mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl)
685 {
686         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
687         uint32_t i, j;
688
689         if (!tbl)
690                 return;
691         g_tbl = tbl->tbl;
692         if (g_tbl) {
693                 for (i = 0; i < MLX5_L3T_GT_SIZE; i++) {
694                         m_tbl = g_tbl->tbl[i];
695                         if (!m_tbl)
696                                 continue;
697                         for (j = 0; j < MLX5_L3T_MT_SIZE; j++) {
698                                 if (!m_tbl->tbl[j])
699                                         continue;
700                                 MLX5_ASSERT(!((struct mlx5_l3t_entry_word *)
701                                             m_tbl->tbl[j])->ref_cnt);
702                                 mlx5_ipool_free(tbl->eip,
703                                                 ((struct mlx5_l3t_entry_word *)
704                                                 m_tbl->tbl[j])->idx);
705                                 m_tbl->tbl[j] = 0;
706                                 if (!(--m_tbl->ref_cnt))
707                                         break;
708                         }
709                         MLX5_ASSERT(!m_tbl->ref_cnt);
710                         mlx5_free(g_tbl->tbl[i]);
711                         g_tbl->tbl[i] = 0;
712                         if (!(--g_tbl->ref_cnt))
713                                 break;
714                 }
715                 MLX5_ASSERT(!g_tbl->ref_cnt);
716                 mlx5_free(tbl->tbl);
717                 tbl->tbl = 0;
718         }
719         mlx5_ipool_destroy(tbl->eip);
720         mlx5_free(tbl);
721 }
722
723 static int32_t
724 __l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
725                 union mlx5_l3t_data *data)
726 {
727         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
728         struct mlx5_l3t_entry_word *w_e_tbl;
729         struct mlx5_l3t_entry_dword *dw_e_tbl;
730         struct mlx5_l3t_entry_qword *qw_e_tbl;
731         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
732         void *e_tbl;
733         uint32_t entry_idx;
734
735         g_tbl = tbl->tbl;
736         if (!g_tbl)
737                 return -1;
738         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
739         if (!m_tbl)
740                 return -1;
741         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
742         if (!e_tbl)
743                 return -1;
744         entry_idx = idx & MLX5_L3T_ET_MASK;
745         switch (tbl->type) {
746         case MLX5_L3T_TYPE_WORD:
747                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
748                 data->word = w_e_tbl->entry[entry_idx].data;
749                 if (w_e_tbl->entry[entry_idx].data)
750                         w_e_tbl->entry[entry_idx].ref_cnt++;
751                 break;
752         case MLX5_L3T_TYPE_DWORD:
753                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
754                 data->dword = dw_e_tbl->entry[entry_idx].data;
755                 if (dw_e_tbl->entry[entry_idx].data)
756                         dw_e_tbl->entry[entry_idx].ref_cnt++;
757                 break;
758         case MLX5_L3T_TYPE_QWORD:
759                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
760                 data->qword = qw_e_tbl->entry[entry_idx].data;
761                 if (qw_e_tbl->entry[entry_idx].data)
762                         qw_e_tbl->entry[entry_idx].ref_cnt++;
763                 break;
764         default:
765                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
766                 data->ptr = ptr_e_tbl->entry[entry_idx].data;
767                 if (ptr_e_tbl->entry[entry_idx].data)
768                         ptr_e_tbl->entry[entry_idx].ref_cnt++;
769                 break;
770         }
771         return 0;
772 }
773
774 int32_t
775 mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
776                    union mlx5_l3t_data *data)
777 {
778         int ret;
779
780         rte_spinlock_lock(&tbl->sl);
781         ret = __l3t_get_entry(tbl, idx, data);
782         rte_spinlock_unlock(&tbl->sl);
783         return ret;
784 }
785
786 int32_t
787 mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
788 {
789         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
790         struct mlx5_l3t_entry_word *w_e_tbl;
791         struct mlx5_l3t_entry_dword *dw_e_tbl;
792         struct mlx5_l3t_entry_qword *qw_e_tbl;
793         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
794         void *e_tbl;
795         uint32_t entry_idx;
796         uint64_t ref_cnt;
797         int32_t ret = -1;
798
799         rte_spinlock_lock(&tbl->sl);
800         g_tbl = tbl->tbl;
801         if (!g_tbl)
802                 goto out;
803         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
804         if (!m_tbl)
805                 goto out;
806         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
807         if (!e_tbl)
808                 goto out;
809         entry_idx = idx & MLX5_L3T_ET_MASK;
810         switch (tbl->type) {
811         case MLX5_L3T_TYPE_WORD:
812                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
813                 MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt);
814                 ret = --w_e_tbl->entry[entry_idx].ref_cnt;
815                 if (ret)
816                         goto out;
817                 w_e_tbl->entry[entry_idx].data = 0;
818                 ref_cnt = --w_e_tbl->ref_cnt;
819                 break;
820         case MLX5_L3T_TYPE_DWORD:
821                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
822                 MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt);
823                 ret = --dw_e_tbl->entry[entry_idx].ref_cnt;
824                 if (ret)
825                         goto out;
826                 dw_e_tbl->entry[entry_idx].data = 0;
827                 ref_cnt = --dw_e_tbl->ref_cnt;
828                 break;
829         case MLX5_L3T_TYPE_QWORD:
830                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
831                 MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt);
832                 ret = --qw_e_tbl->entry[entry_idx].ref_cnt;
833                 if (ret)
834                         goto out;
835                 qw_e_tbl->entry[entry_idx].data = 0;
836                 ref_cnt = --qw_e_tbl->ref_cnt;
837                 break;
838         default:
839                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
840                 MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt);
841                 ret = --ptr_e_tbl->entry[entry_idx].ref_cnt;
842                 if (ret)
843                         goto out;
844                 ptr_e_tbl->entry[entry_idx].data = NULL;
845                 ref_cnt = --ptr_e_tbl->ref_cnt;
846                 break;
847         }
848         if (!ref_cnt) {
849                 mlx5_ipool_free(tbl->eip,
850                                 ((struct mlx5_l3t_entry_word *)e_tbl)->idx);
851                 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
852                                                                         NULL;
853                 if (!(--m_tbl->ref_cnt)) {
854                         mlx5_free(m_tbl);
855                         g_tbl->tbl
856                         [(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] = NULL;
857                         if (!(--g_tbl->ref_cnt)) {
858                                 mlx5_free(g_tbl);
859                                 tbl->tbl = 0;
860                         }
861                 }
862         }
863 out:
864         rte_spinlock_unlock(&tbl->sl);
865         return ret;
866 }
867
868 static int32_t
869 __l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
870                 union mlx5_l3t_data *data)
871 {
872         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
873         struct mlx5_l3t_entry_word *w_e_tbl;
874         struct mlx5_l3t_entry_dword *dw_e_tbl;
875         struct mlx5_l3t_entry_qword *qw_e_tbl;
876         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
877         void *e_tbl;
878         uint32_t entry_idx, tbl_idx = 0;
879
880         /* Check the global table, create it if empty. */
881         g_tbl = tbl->tbl;
882         if (!g_tbl) {
883                 g_tbl = mlx5_malloc(MLX5_MEM_ZERO,
884                                     sizeof(struct mlx5_l3t_level_tbl) +
885                                     sizeof(void *) * MLX5_L3T_GT_SIZE, 1,
886                                     SOCKET_ID_ANY);
887                 if (!g_tbl) {
888                         rte_errno = ENOMEM;
889                         return -1;
890                 }
891                 tbl->tbl = g_tbl;
892         }
893         /*
894          * Check the middle table, create it if empty. Ref_cnt will be
895          * increased if new sub table created.
896          */
897         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
898         if (!m_tbl) {
899                 m_tbl = mlx5_malloc(MLX5_MEM_ZERO,
900                                     sizeof(struct mlx5_l3t_level_tbl) +
901                                     sizeof(void *) * MLX5_L3T_MT_SIZE, 1,
902                                     SOCKET_ID_ANY);
903                 if (!m_tbl) {
904                         rte_errno = ENOMEM;
905                         return -1;
906                 }
907                 g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] =
908                                                                         m_tbl;
909                 g_tbl->ref_cnt++;
910         }
911         /*
912          * Check the entry table, create it if empty. Ref_cnt will be
913          * increased if new sub entry table created.
914          */
915         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
916         if (!e_tbl) {
917                 e_tbl = mlx5_ipool_zmalloc(tbl->eip, &tbl_idx);
918                 if (!e_tbl) {
919                         rte_errno = ENOMEM;
920                         return -1;
921                 }
922                 ((struct mlx5_l3t_entry_word *)e_tbl)->idx = tbl_idx;
923                 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
924                                                                         e_tbl;
925                 m_tbl->ref_cnt++;
926         }
927         entry_idx = idx & MLX5_L3T_ET_MASK;
928         switch (tbl->type) {
929         case MLX5_L3T_TYPE_WORD:
930                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
931                 if (w_e_tbl->entry[entry_idx].data) {
932                         data->word = w_e_tbl->entry[entry_idx].data;
933                         w_e_tbl->entry[entry_idx].ref_cnt++;
934                         rte_errno = EEXIST;
935                         return -1;
936                 }
937                 w_e_tbl->entry[entry_idx].data = data->word;
938                 w_e_tbl->entry[entry_idx].ref_cnt = 1;
939                 w_e_tbl->ref_cnt++;
940                 break;
941         case MLX5_L3T_TYPE_DWORD:
942                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
943                 if (dw_e_tbl->entry[entry_idx].data) {
944                         data->dword = dw_e_tbl->entry[entry_idx].data;
945                         dw_e_tbl->entry[entry_idx].ref_cnt++;
946                         rte_errno = EEXIST;
947                         return -1;
948                 }
949                 dw_e_tbl->entry[entry_idx].data = data->dword;
950                 dw_e_tbl->entry[entry_idx].ref_cnt = 1;
951                 dw_e_tbl->ref_cnt++;
952                 break;
953         case MLX5_L3T_TYPE_QWORD:
954                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
955                 if (qw_e_tbl->entry[entry_idx].data) {
956                         data->qword = qw_e_tbl->entry[entry_idx].data;
957                         qw_e_tbl->entry[entry_idx].ref_cnt++;
958                         rte_errno = EEXIST;
959                         return -1;
960                 }
961                 qw_e_tbl->entry[entry_idx].data = data->qword;
962                 qw_e_tbl->entry[entry_idx].ref_cnt = 1;
963                 qw_e_tbl->ref_cnt++;
964                 break;
965         default:
966                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
967                 if (ptr_e_tbl->entry[entry_idx].data) {
968                         data->ptr = ptr_e_tbl->entry[entry_idx].data;
969                         ptr_e_tbl->entry[entry_idx].ref_cnt++;
970                         rte_errno = EEXIST;
971                         return -1;
972                 }
973                 ptr_e_tbl->entry[entry_idx].data = data->ptr;
974                 ptr_e_tbl->entry[entry_idx].ref_cnt = 1;
975                 ptr_e_tbl->ref_cnt++;
976                 break;
977         }
978         return 0;
979 }
980
981 int32_t
982 mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
983                    union mlx5_l3t_data *data)
984 {
985         int ret;
986
987         rte_spinlock_lock(&tbl->sl);
988         ret = __l3t_set_entry(tbl, idx, data);
989         rte_spinlock_unlock(&tbl->sl);
990         return ret;
991 }
992
993 int32_t
994 mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
995                        union mlx5_l3t_data *data,
996                        mlx5_l3t_alloc_callback_fn cb, void *ctx)
997 {
998         int32_t ret;
999
1000         rte_spinlock_lock(&tbl->sl);
1001         /* Check if entry data is ready. */
1002         ret = __l3t_get_entry(tbl, idx, data);
1003         if (!ret) {
1004                 switch (tbl->type) {
1005                 case MLX5_L3T_TYPE_WORD:
1006                         if (data->word)
1007                                 goto out;
1008                         break;
1009                 case MLX5_L3T_TYPE_DWORD:
1010                         if (data->dword)
1011                                 goto out;
1012                         break;
1013                 case MLX5_L3T_TYPE_QWORD:
1014                         if (data->qword)
1015                                 goto out;
1016                         break;
1017                 default:
1018                         if (data->ptr)
1019                                 goto out;
1020                         break;
1021                 }
1022         }
1023         /* Entry data is not ready, use user callback to create it. */
1024         ret = cb(ctx, data);
1025         if (ret)
1026                 goto out;
1027         /* Save the new allocated data to entry. */
1028         ret = __l3t_set_entry(tbl, idx, data);
1029 out:
1030         rte_spinlock_unlock(&tbl->sl);
1031         return ret;
1032 }