net/mlx5: use indexed pool as id generator
[dpdk.git] / drivers / net / mlx5 / mlx5_utils.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4
5 #include <rte_malloc.h>
6 #include <rte_hash_crc.h>
7
8 #include <mlx5_malloc.h>
9
10 #include "mlx5_utils.h"
11
12 struct mlx5_hlist *
13 mlx5_hlist_create(const char *name, uint32_t size)
14 {
15         struct mlx5_hlist *h;
16         uint32_t act_size;
17         uint32_t alloc_size;
18
19         if (!size)
20                 return NULL;
21         /* Align to the next power of 2, 32bits integer is enough now. */
22         if (!rte_is_power_of_2(size)) {
23                 act_size = rte_align32pow2(size);
24                 DRV_LOG(WARNING, "Size 0x%" PRIX32 " is not power of 2, will "
25                         "be aligned to 0x%" PRIX32 ".", size, act_size);
26         } else {
27                 act_size = size;
28         }
29         alloc_size = sizeof(struct mlx5_hlist) +
30                      sizeof(struct mlx5_hlist_head) * act_size;
31         /* Using zmalloc, then no need to initialize the heads. */
32         h = mlx5_malloc(MLX5_MEM_ZERO, alloc_size, RTE_CACHE_LINE_SIZE,
33                         SOCKET_ID_ANY);
34         if (!h) {
35                 DRV_LOG(ERR, "No memory for hash list %s creation",
36                         name ? name : "None");
37                 return NULL;
38         }
39         if (name)
40                 snprintf(h->name, MLX5_HLIST_NAMESIZE, "%s", name);
41         h->table_sz = act_size;
42         h->mask = act_size - 1;
43         DRV_LOG(DEBUG, "Hash list with %s size 0x%" PRIX32 " is created.",
44                 h->name, act_size);
45         return h;
46 }
47
48 struct mlx5_hlist_entry *
49 mlx5_hlist_lookup(struct mlx5_hlist *h, uint64_t key)
50 {
51         uint32_t idx;
52         struct mlx5_hlist_head *first;
53         struct mlx5_hlist_entry *node;
54
55         MLX5_ASSERT(h);
56         idx = rte_hash_crc_8byte(key, 0) & h->mask;
57         first = &h->heads[idx];
58         LIST_FOREACH(node, first, next) {
59                 if (node->key == key)
60                         return node;
61         }
62         return NULL;
63 }
64
65 int
66 mlx5_hlist_insert(struct mlx5_hlist *h, struct mlx5_hlist_entry *entry)
67 {
68         uint32_t idx;
69         struct mlx5_hlist_head *first;
70         struct mlx5_hlist_entry *node;
71
72         MLX5_ASSERT(h && entry);
73         idx = rte_hash_crc_8byte(entry->key, 0) & h->mask;
74         first = &h->heads[idx];
75         /* No need to reuse the lookup function. */
76         LIST_FOREACH(node, first, next) {
77                 if (node->key == entry->key)
78                         return -EEXIST;
79         }
80         LIST_INSERT_HEAD(first, entry, next);
81         return 0;
82 }
83
84 struct mlx5_hlist_entry *
85 mlx5_hlist_lookup_ex(struct mlx5_hlist *h, uint64_t key,
86                      mlx5_hlist_match_callback_fn cb, void *ctx)
87 {
88         uint32_t idx;
89         struct mlx5_hlist_head *first;
90         struct mlx5_hlist_entry *node;
91
92         MLX5_ASSERT(h && cb && ctx);
93         idx = rte_hash_crc_8byte(key, 0) & h->mask;
94         first = &h->heads[idx];
95         LIST_FOREACH(node, first, next) {
96                 if (!cb(node, ctx))
97                         return node;
98         }
99         return NULL;
100 }
101
102 int
103 mlx5_hlist_insert_ex(struct mlx5_hlist *h, struct mlx5_hlist_entry *entry,
104                      mlx5_hlist_match_callback_fn cb, void *ctx)
105 {
106         uint32_t idx;
107         struct mlx5_hlist_head *first;
108         struct mlx5_hlist_entry *node;
109
110         MLX5_ASSERT(h && entry && cb && ctx);
111         idx = rte_hash_crc_8byte(entry->key, 0) & h->mask;
112         first = &h->heads[idx];
113         /* No need to reuse the lookup function. */
114         LIST_FOREACH(node, first, next) {
115                 if (!cb(node, ctx))
116                         return -EEXIST;
117         }
118         LIST_INSERT_HEAD(first, entry, next);
119         return 0;
120 }
121
122 void
123 mlx5_hlist_remove(struct mlx5_hlist *h __rte_unused,
124                   struct mlx5_hlist_entry *entry)
125 {
126         MLX5_ASSERT(entry && entry->next.le_prev);
127         LIST_REMOVE(entry, next);
128         /* Set to NULL to get rid of removing action for more than once. */
129         entry->next.le_prev = NULL;
130 }
131
132 void
133 mlx5_hlist_destroy(struct mlx5_hlist *h,
134                    mlx5_hlist_destroy_callback_fn cb, void *ctx)
135 {
136         uint32_t idx;
137         struct mlx5_hlist_entry *entry;
138
139         MLX5_ASSERT(h);
140         for (idx = 0; idx < h->table_sz; ++idx) {
141                 /* no LIST_FOREACH_SAFE, using while instead */
142                 while (!LIST_EMPTY(&h->heads[idx])) {
143                         entry = LIST_FIRST(&h->heads[idx]);
144                         LIST_REMOVE(entry, next);
145                         /*
146                          * The owner of whole element which contains data entry
147                          * is the user, so it's the user's duty to do the clean
148                          * up and the free work because someone may not put the
149                          * hlist entry at the beginning(suggested to locate at
150                          * the beginning). Or else the default free function
151                          * will be used.
152                          */
153                         if (cb)
154                                 cb(entry, ctx);
155                         else
156                                 mlx5_free(entry);
157                 }
158         }
159         mlx5_free(h);
160 }
161
162 static inline void
163 mlx5_ipool_lock(struct mlx5_indexed_pool *pool)
164 {
165         if (pool->cfg.need_lock)
166                 rte_spinlock_lock(&pool->lock);
167 }
168
169 static inline void
170 mlx5_ipool_unlock(struct mlx5_indexed_pool *pool)
171 {
172         if (pool->cfg.need_lock)
173                 rte_spinlock_unlock(&pool->lock);
174 }
175
176 static inline uint32_t
177 mlx5_trunk_idx_get(struct mlx5_indexed_pool *pool, uint32_t entry_idx)
178 {
179         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
180         uint32_t trunk_idx = 0;
181         uint32_t i;
182
183         if (!cfg->grow_trunk)
184                 return entry_idx / cfg->trunk_size;
185         if (entry_idx >= pool->grow_tbl[cfg->grow_trunk - 1]) {
186                 trunk_idx = (entry_idx - pool->grow_tbl[cfg->grow_trunk - 1]) /
187                             (cfg->trunk_size << (cfg->grow_shift *
188                             cfg->grow_trunk)) + cfg->grow_trunk;
189         } else {
190                 for (i = 0; i < cfg->grow_trunk; i++) {
191                         if (entry_idx < pool->grow_tbl[i])
192                                 break;
193                 }
194                 trunk_idx = i;
195         }
196         return trunk_idx;
197 }
198
199 static inline uint32_t
200 mlx5_trunk_size_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
201 {
202         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
203
204         return cfg->trunk_size << (cfg->grow_shift *
205                (trunk_idx > cfg->grow_trunk ? cfg->grow_trunk : trunk_idx));
206 }
207
208 static inline uint32_t
209 mlx5_trunk_idx_offset_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
210 {
211         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
212         uint32_t offset = 0;
213
214         if (!trunk_idx)
215                 return 0;
216         if (!cfg->grow_trunk)
217                 return cfg->trunk_size * trunk_idx;
218         if (trunk_idx < cfg->grow_trunk)
219                 offset = pool->grow_tbl[trunk_idx - 1];
220         else
221                 offset = pool->grow_tbl[cfg->grow_trunk - 1] +
222                          (cfg->trunk_size << (cfg->grow_shift *
223                          cfg->grow_trunk)) * (trunk_idx - cfg->grow_trunk);
224         return offset;
225 }
226
227 struct mlx5_indexed_pool *
228 mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg)
229 {
230         struct mlx5_indexed_pool *pool;
231         uint32_t i;
232
233         if (!cfg || (!cfg->malloc ^ !cfg->free) ||
234             (cfg->trunk_size && ((cfg->trunk_size & (cfg->trunk_size - 1)) ||
235             ((__builtin_ffs(cfg->trunk_size) + TRUNK_IDX_BITS) > 32))))
236                 return NULL;
237         pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool) + cfg->grow_trunk *
238                            sizeof(pool->grow_tbl[0]), RTE_CACHE_LINE_SIZE,
239                            SOCKET_ID_ANY);
240         if (!pool)
241                 return NULL;
242         pool->cfg = *cfg;
243         if (!pool->cfg.trunk_size)
244                 pool->cfg.trunk_size = MLX5_IPOOL_DEFAULT_TRUNK_SIZE;
245         if (!cfg->malloc && !cfg->free) {
246                 pool->cfg.malloc = mlx5_malloc;
247                 pool->cfg.free = mlx5_free;
248         }
249         pool->free_list = TRUNK_INVALID;
250         if (pool->cfg.need_lock)
251                 rte_spinlock_init(&pool->lock);
252         /*
253          * Initialize the dynamic grow trunk size lookup table to have a quick
254          * lookup for the trunk entry index offset.
255          */
256         for (i = 0; i < cfg->grow_trunk; i++) {
257                 pool->grow_tbl[i] = cfg->trunk_size << (cfg->grow_shift * i);
258                 if (i > 0)
259                         pool->grow_tbl[i] += pool->grow_tbl[i - 1];
260         }
261         return pool;
262 }
263
264 static int
265 mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
266 {
267         struct mlx5_indexed_trunk *trunk;
268         struct mlx5_indexed_trunk **trunk_tmp;
269         struct mlx5_indexed_trunk **p;
270         size_t trunk_size = 0;
271         size_t data_size;
272         size_t bmp_size;
273         uint32_t idx;
274
275         if (pool->n_trunk_valid == TRUNK_MAX_IDX)
276                 return -ENOMEM;
277         if (pool->n_trunk_valid == pool->n_trunk) {
278                 /* No free trunk flags, expand trunk list. */
279                 int n_grow = pool->n_trunk_valid ? pool->n_trunk :
280                              RTE_CACHE_LINE_SIZE / sizeof(void *);
281
282                 p = pool->cfg.malloc(0, (pool->n_trunk_valid + n_grow) *
283                                      sizeof(struct mlx5_indexed_trunk *),
284                                      RTE_CACHE_LINE_SIZE, rte_socket_id());
285                 if (!p)
286                         return -ENOMEM;
287                 if (pool->trunks)
288                         memcpy(p, pool->trunks, pool->n_trunk_valid *
289                                sizeof(struct mlx5_indexed_trunk *));
290                 memset(RTE_PTR_ADD(p, pool->n_trunk_valid * sizeof(void *)), 0,
291                        n_grow * sizeof(void *));
292                 trunk_tmp = pool->trunks;
293                 pool->trunks = p;
294                 if (trunk_tmp)
295                         pool->cfg.free(trunk_tmp);
296                 pool->n_trunk += n_grow;
297         }
298         if (!pool->cfg.release_mem_en) {
299                 idx = pool->n_trunk_valid;
300         } else {
301                 /* Find the first available slot in trunk list */
302                 for (idx = 0; idx < pool->n_trunk; idx++)
303                         if (pool->trunks[idx] == NULL)
304                                 break;
305         }
306         trunk_size += sizeof(*trunk);
307         data_size = mlx5_trunk_size_get(pool, idx);
308         bmp_size = rte_bitmap_get_memory_footprint(data_size);
309         /* rte_bitmap requires memory cacheline aligned. */
310         trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
311         trunk_size += bmp_size;
312         trunk = pool->cfg.malloc(0, trunk_size,
313                                  RTE_CACHE_LINE_SIZE, rte_socket_id());
314         if (!trunk)
315                 return -ENOMEM;
316         pool->trunks[idx] = trunk;
317         trunk->idx = idx;
318         trunk->free = data_size;
319         trunk->prev = TRUNK_INVALID;
320         trunk->next = TRUNK_INVALID;
321         MLX5_ASSERT(pool->free_list == TRUNK_INVALID);
322         pool->free_list = idx;
323         /* Mark all entries as available. */
324         trunk->bmp = rte_bitmap_init_with_all_set(data_size, &trunk->data
325                      [RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size)],
326                      bmp_size);
327         MLX5_ASSERT(trunk->bmp);
328         pool->n_trunk_valid++;
329 #ifdef POOL_DEBUG
330         pool->trunk_new++;
331         pool->trunk_avail++;
332 #endif
333         return 0;
334 }
335
336 void *
337 mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
338 {
339         struct mlx5_indexed_trunk *trunk;
340         uint64_t slab = 0;
341         uint32_t iidx = 0;
342         void *p;
343
344         mlx5_ipool_lock(pool);
345         if (pool->free_list == TRUNK_INVALID) {
346                 /* If no available trunks, grow new. */
347                 if (mlx5_ipool_grow(pool)) {
348                         mlx5_ipool_unlock(pool);
349                         return NULL;
350                 }
351         }
352         MLX5_ASSERT(pool->free_list != TRUNK_INVALID);
353         trunk = pool->trunks[pool->free_list];
354         MLX5_ASSERT(trunk->free);
355         if (!rte_bitmap_scan(trunk->bmp, &iidx, &slab)) {
356                 mlx5_ipool_unlock(pool);
357                 return NULL;
358         }
359         MLX5_ASSERT(slab);
360         iidx += __builtin_ctzll(slab);
361         MLX5_ASSERT(iidx != UINT32_MAX);
362         MLX5_ASSERT(iidx < mlx5_trunk_size_get(pool, trunk->idx));
363         rte_bitmap_clear(trunk->bmp, iidx);
364         p = &trunk->data[iidx * pool->cfg.size];
365         /*
366          * The ipool index should grow continually from small to big,
367          * some features as metering only accept limited bits of index.
368          * Random index with MSB set may be rejected.
369          */
370         iidx += mlx5_trunk_idx_offset_get(pool, trunk->idx);
371         iidx += 1; /* non-zero index. */
372         trunk->free--;
373 #ifdef POOL_DEBUG
374         pool->n_entry++;
375 #endif
376         if (!trunk->free) {
377                 /* Full trunk will be removed from free list in imalloc. */
378                 MLX5_ASSERT(pool->free_list == trunk->idx);
379                 pool->free_list = trunk->next;
380                 if (trunk->next != TRUNK_INVALID)
381                         pool->trunks[trunk->next]->prev = TRUNK_INVALID;
382                 trunk->prev = TRUNK_INVALID;
383                 trunk->next = TRUNK_INVALID;
384 #ifdef POOL_DEBUG
385                 pool->trunk_empty++;
386                 pool->trunk_avail--;
387 #endif
388         }
389         *idx = iidx;
390         mlx5_ipool_unlock(pool);
391         return p;
392 }
393
394 void *
395 mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
396 {
397         void *entry = mlx5_ipool_malloc(pool, idx);
398
399         if (entry && pool->cfg.size)
400                 memset(entry, 0, pool->cfg.size);
401         return entry;
402 }
403
404 void
405 mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx)
406 {
407         struct mlx5_indexed_trunk *trunk;
408         uint32_t trunk_idx;
409         uint32_t entry_idx;
410
411         if (!idx)
412                 return;
413         idx -= 1;
414         mlx5_ipool_lock(pool);
415         trunk_idx = mlx5_trunk_idx_get(pool, idx);
416         if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
417             (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
418                 goto out;
419         trunk = pool->trunks[trunk_idx];
420         if (!trunk)
421                 goto out;
422         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
423         if (trunk_idx != trunk->idx ||
424             rte_bitmap_get(trunk->bmp, entry_idx))
425                 goto out;
426         rte_bitmap_set(trunk->bmp, entry_idx);
427         trunk->free++;
428         if (pool->cfg.release_mem_en && trunk->free == mlx5_trunk_size_get
429            (pool, trunk->idx)) {
430                 if (pool->free_list == trunk->idx)
431                         pool->free_list = trunk->next;
432                 if (trunk->next != TRUNK_INVALID)
433                         pool->trunks[trunk->next]->prev = trunk->prev;
434                 if (trunk->prev != TRUNK_INVALID)
435                         pool->trunks[trunk->prev]->next = trunk->next;
436                 pool->cfg.free(trunk);
437                 pool->trunks[trunk_idx] = NULL;
438                 pool->n_trunk_valid--;
439 #ifdef POOL_DEBUG
440                 pool->trunk_avail--;
441                 pool->trunk_free++;
442 #endif
443                 if (pool->n_trunk_valid == 0) {
444                         pool->cfg.free(pool->trunks);
445                         pool->trunks = NULL;
446                         pool->n_trunk = 0;
447                 }
448         } else if (trunk->free == 1) {
449                 /* Put into free trunk list head. */
450                 MLX5_ASSERT(pool->free_list != trunk->idx);
451                 trunk->next = pool->free_list;
452                 trunk->prev = TRUNK_INVALID;
453                 if (pool->free_list != TRUNK_INVALID)
454                         pool->trunks[pool->free_list]->prev = trunk->idx;
455                 pool->free_list = trunk->idx;
456 #ifdef POOL_DEBUG
457                 pool->trunk_empty--;
458                 pool->trunk_avail++;
459 #endif
460         }
461 #ifdef POOL_DEBUG
462         pool->n_entry--;
463 #endif
464 out:
465         mlx5_ipool_unlock(pool);
466 }
467
468 void *
469 mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx)
470 {
471         struct mlx5_indexed_trunk *trunk;
472         void *p = NULL;
473         uint32_t trunk_idx;
474         uint32_t entry_idx;
475
476         if (!idx)
477                 return NULL;
478         idx -= 1;
479         mlx5_ipool_lock(pool);
480         trunk_idx = mlx5_trunk_idx_get(pool, idx);
481         if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
482             (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
483                 goto out;
484         trunk = pool->trunks[trunk_idx];
485         if (!trunk)
486                 goto out;
487         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
488         if (trunk_idx != trunk->idx ||
489             rte_bitmap_get(trunk->bmp, entry_idx))
490                 goto out;
491         p = &trunk->data[entry_idx * pool->cfg.size];
492 out:
493         mlx5_ipool_unlock(pool);
494         return p;
495 }
496
497 int
498 mlx5_ipool_destroy(struct mlx5_indexed_pool *pool)
499 {
500         struct mlx5_indexed_trunk **trunks;
501         uint32_t i;
502
503         MLX5_ASSERT(pool);
504         mlx5_ipool_lock(pool);
505         trunks = pool->trunks;
506         for (i = 0; i < pool->n_trunk; i++) {
507                 if (trunks[i])
508                         pool->cfg.free(trunks[i]);
509         }
510         if (!pool->trunks)
511                 pool->cfg.free(pool->trunks);
512         mlx5_ipool_unlock(pool);
513         mlx5_free(pool);
514         return 0;
515 }
516
517 void
518 mlx5_ipool_dump(struct mlx5_indexed_pool *pool)
519 {
520         printf("Pool %s entry size %u, trunks %u, %d entry per trunk, "
521                "total: %d\n",
522                pool->cfg.type, pool->cfg.size, pool->n_trunk_valid,
523                pool->cfg.trunk_size, pool->n_trunk_valid);
524 #ifdef POOL_DEBUG
525         printf("Pool %s entry %u, trunk alloc %u, empty: %u, "
526                "available %u free %u\n",
527                pool->cfg.type, pool->n_entry, pool->trunk_new,
528                pool->trunk_empty, pool->trunk_avail, pool->trunk_free);
529 #endif
530 }
531
532 struct mlx5_l3t_tbl *
533 mlx5_l3t_create(enum mlx5_l3t_type type)
534 {
535         struct mlx5_l3t_tbl *tbl;
536         struct mlx5_indexed_pool_config l3t_ip_cfg = {
537                 .trunk_size = 16,
538                 .grow_trunk = 6,
539                 .grow_shift = 1,
540                 .need_lock = 0,
541                 .release_mem_en = 1,
542                 .malloc = mlx5_malloc,
543                 .free = mlx5_free,
544         };
545
546         if (type >= MLX5_L3T_TYPE_MAX) {
547                 rte_errno = EINVAL;
548                 return NULL;
549         }
550         tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_l3t_tbl), 1,
551                           SOCKET_ID_ANY);
552         if (!tbl) {
553                 rte_errno = ENOMEM;
554                 return NULL;
555         }
556         tbl->type = type;
557         switch (type) {
558         case MLX5_L3T_TYPE_WORD:
559                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word);
560                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
561                 break;
562         case MLX5_L3T_TYPE_DWORD:
563                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword);
564                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
565                 break;
566         case MLX5_L3T_TYPE_QWORD:
567                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword);
568                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
569                 break;
570         default:
571                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr);
572                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
573                 break;
574         }
575         rte_spinlock_init(&tbl->sl);
576         tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
577         if (!tbl->eip) {
578                 rte_errno = ENOMEM;
579                 mlx5_free(tbl);
580                 tbl = NULL;
581         }
582         return tbl;
583 }
584
585 void
586 mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl)
587 {
588         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
589         uint32_t i, j;
590
591         if (!tbl)
592                 return;
593         g_tbl = tbl->tbl;
594         if (g_tbl) {
595                 for (i = 0; i < MLX5_L3T_GT_SIZE; i++) {
596                         m_tbl = g_tbl->tbl[i];
597                         if (!m_tbl)
598                                 continue;
599                         for (j = 0; j < MLX5_L3T_MT_SIZE; j++) {
600                                 if (!m_tbl->tbl[j])
601                                         continue;
602                                 MLX5_ASSERT(!((struct mlx5_l3t_entry_word *)
603                                             m_tbl->tbl[j])->ref_cnt);
604                                 mlx5_ipool_free(tbl->eip,
605                                                 ((struct mlx5_l3t_entry_word *)
606                                                 m_tbl->tbl[j])->idx);
607                                 m_tbl->tbl[j] = 0;
608                                 if (!(--m_tbl->ref_cnt))
609                                         break;
610                         }
611                         MLX5_ASSERT(!m_tbl->ref_cnt);
612                         mlx5_free(g_tbl->tbl[i]);
613                         g_tbl->tbl[i] = 0;
614                         if (!(--g_tbl->ref_cnt))
615                                 break;
616                 }
617                 MLX5_ASSERT(!g_tbl->ref_cnt);
618                 mlx5_free(tbl->tbl);
619                 tbl->tbl = 0;
620         }
621         mlx5_ipool_destroy(tbl->eip);
622         mlx5_free(tbl);
623 }
624
625 static int32_t
626 __l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
627                 union mlx5_l3t_data *data)
628 {
629         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
630         struct mlx5_l3t_entry_word *w_e_tbl;
631         struct mlx5_l3t_entry_dword *dw_e_tbl;
632         struct mlx5_l3t_entry_qword *qw_e_tbl;
633         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
634         void *e_tbl;
635         uint32_t entry_idx;
636
637         g_tbl = tbl->tbl;
638         if (!g_tbl)
639                 return -1;
640         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
641         if (!m_tbl)
642                 return -1;
643         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
644         if (!e_tbl)
645                 return -1;
646         entry_idx = idx & MLX5_L3T_ET_MASK;
647         switch (tbl->type) {
648         case MLX5_L3T_TYPE_WORD:
649                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
650                 data->word = w_e_tbl->entry[entry_idx].data;
651                 if (w_e_tbl->entry[entry_idx].data)
652                         w_e_tbl->entry[entry_idx].ref_cnt++;
653                 break;
654         case MLX5_L3T_TYPE_DWORD:
655                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
656                 data->dword = dw_e_tbl->entry[entry_idx].data;
657                 if (dw_e_tbl->entry[entry_idx].data)
658                         dw_e_tbl->entry[entry_idx].ref_cnt++;
659                 break;
660         case MLX5_L3T_TYPE_QWORD:
661                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
662                 data->qword = qw_e_tbl->entry[entry_idx].data;
663                 if (qw_e_tbl->entry[entry_idx].data)
664                         qw_e_tbl->entry[entry_idx].ref_cnt++;
665                 break;
666         default:
667                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
668                 data->ptr = ptr_e_tbl->entry[entry_idx].data;
669                 if (ptr_e_tbl->entry[entry_idx].data)
670                         ptr_e_tbl->entry[entry_idx].ref_cnt++;
671                 break;
672         }
673         return 0;
674 }
675
676 int32_t
677 mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
678                    union mlx5_l3t_data *data)
679 {
680         int ret;
681
682         rte_spinlock_lock(&tbl->sl);
683         ret = __l3t_get_entry(tbl, idx, data);
684         rte_spinlock_unlock(&tbl->sl);
685         return ret;
686 }
687
688 int32_t
689 mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
690 {
691         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
692         struct mlx5_l3t_entry_word *w_e_tbl;
693         struct mlx5_l3t_entry_dword *dw_e_tbl;
694         struct mlx5_l3t_entry_qword *qw_e_tbl;
695         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
696         void *e_tbl;
697         uint32_t entry_idx;
698         uint64_t ref_cnt;
699         int32_t ret = -1;
700
701         rte_spinlock_lock(&tbl->sl);
702         g_tbl = tbl->tbl;
703         if (!g_tbl)
704                 goto out;
705         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
706         if (!m_tbl)
707                 goto out;
708         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
709         if (!e_tbl)
710                 goto out;
711         entry_idx = idx & MLX5_L3T_ET_MASK;
712         switch (tbl->type) {
713         case MLX5_L3T_TYPE_WORD:
714                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
715                 MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt);
716                 ret = --w_e_tbl->entry[entry_idx].ref_cnt;
717                 if (ret)
718                         goto out;
719                 w_e_tbl->entry[entry_idx].data = 0;
720                 ref_cnt = --w_e_tbl->ref_cnt;
721                 break;
722         case MLX5_L3T_TYPE_DWORD:
723                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
724                 MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt);
725                 ret = --dw_e_tbl->entry[entry_idx].ref_cnt;
726                 if (ret)
727                         goto out;
728                 dw_e_tbl->entry[entry_idx].data = 0;
729                 ref_cnt = --dw_e_tbl->ref_cnt;
730                 break;
731         case MLX5_L3T_TYPE_QWORD:
732                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
733                 MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt);
734                 ret = --qw_e_tbl->entry[entry_idx].ref_cnt;
735                 if (ret)
736                         goto out;
737                 qw_e_tbl->entry[entry_idx].data = 0;
738                 ref_cnt = --qw_e_tbl->ref_cnt;
739                 break;
740         default:
741                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
742                 MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt);
743                 ret = --ptr_e_tbl->entry[entry_idx].ref_cnt;
744                 if (ret)
745                         goto out;
746                 ptr_e_tbl->entry[entry_idx].data = NULL;
747                 ref_cnt = --ptr_e_tbl->ref_cnt;
748                 break;
749         }
750         if (!ref_cnt) {
751                 mlx5_ipool_free(tbl->eip,
752                                 ((struct mlx5_l3t_entry_word *)e_tbl)->idx);
753                 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
754                                                                         NULL;
755                 if (!(--m_tbl->ref_cnt)) {
756                         mlx5_free(m_tbl);
757                         g_tbl->tbl
758                         [(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] = NULL;
759                         if (!(--g_tbl->ref_cnt)) {
760                                 mlx5_free(g_tbl);
761                                 tbl->tbl = 0;
762                         }
763                 }
764         }
765 out:
766         rte_spinlock_unlock(&tbl->sl);
767         return ret;
768 }
769
770 static int32_t
771 __l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
772                 union mlx5_l3t_data *data)
773 {
774         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
775         struct mlx5_l3t_entry_word *w_e_tbl;
776         struct mlx5_l3t_entry_dword *dw_e_tbl;
777         struct mlx5_l3t_entry_qword *qw_e_tbl;
778         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
779         void *e_tbl;
780         uint32_t entry_idx, tbl_idx = 0;
781
782         /* Check the global table, create it if empty. */
783         g_tbl = tbl->tbl;
784         if (!g_tbl) {
785                 g_tbl = mlx5_malloc(MLX5_MEM_ZERO,
786                                     sizeof(struct mlx5_l3t_level_tbl) +
787                                     sizeof(void *) * MLX5_L3T_GT_SIZE, 1,
788                                     SOCKET_ID_ANY);
789                 if (!g_tbl) {
790                         rte_errno = ENOMEM;
791                         return -1;
792                 }
793                 tbl->tbl = g_tbl;
794         }
795         /*
796          * Check the middle table, create it if empty. Ref_cnt will be
797          * increased if new sub table created.
798          */
799         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
800         if (!m_tbl) {
801                 m_tbl = mlx5_malloc(MLX5_MEM_ZERO,
802                                     sizeof(struct mlx5_l3t_level_tbl) +
803                                     sizeof(void *) * MLX5_L3T_MT_SIZE, 1,
804                                     SOCKET_ID_ANY);
805                 if (!m_tbl) {
806                         rte_errno = ENOMEM;
807                         return -1;
808                 }
809                 g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] =
810                                                                         m_tbl;
811                 g_tbl->ref_cnt++;
812         }
813         /*
814          * Check the entry table, create it if empty. Ref_cnt will be
815          * increased if new sub entry table created.
816          */
817         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
818         if (!e_tbl) {
819                 e_tbl = mlx5_ipool_zmalloc(tbl->eip, &tbl_idx);
820                 if (!e_tbl) {
821                         rte_errno = ENOMEM;
822                         return -1;
823                 }
824                 ((struct mlx5_l3t_entry_word *)e_tbl)->idx = tbl_idx;
825                 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
826                                                                         e_tbl;
827                 m_tbl->ref_cnt++;
828         }
829         entry_idx = idx & MLX5_L3T_ET_MASK;
830         switch (tbl->type) {
831         case MLX5_L3T_TYPE_WORD:
832                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
833                 if (w_e_tbl->entry[entry_idx].data) {
834                         data->word = w_e_tbl->entry[entry_idx].data;
835                         w_e_tbl->entry[entry_idx].ref_cnt++;
836                         rte_errno = EEXIST;
837                         return -1;
838                 }
839                 w_e_tbl->entry[entry_idx].data = data->word;
840                 w_e_tbl->entry[entry_idx].ref_cnt = 1;
841                 w_e_tbl->ref_cnt++;
842                 break;
843         case MLX5_L3T_TYPE_DWORD:
844                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
845                 if (dw_e_tbl->entry[entry_idx].data) {
846                         data->dword = dw_e_tbl->entry[entry_idx].data;
847                         dw_e_tbl->entry[entry_idx].ref_cnt++;
848                         rte_errno = EEXIST;
849                         return -1;
850                 }
851                 dw_e_tbl->entry[entry_idx].data = data->dword;
852                 dw_e_tbl->entry[entry_idx].ref_cnt = 1;
853                 dw_e_tbl->ref_cnt++;
854                 break;
855         case MLX5_L3T_TYPE_QWORD:
856                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
857                 if (qw_e_tbl->entry[entry_idx].data) {
858                         data->qword = qw_e_tbl->entry[entry_idx].data;
859                         qw_e_tbl->entry[entry_idx].ref_cnt++;
860                         rte_errno = EEXIST;
861                         return -1;
862                 }
863                 qw_e_tbl->entry[entry_idx].data = data->qword;
864                 qw_e_tbl->entry[entry_idx].ref_cnt = 1;
865                 qw_e_tbl->ref_cnt++;
866                 break;
867         default:
868                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
869                 if (ptr_e_tbl->entry[entry_idx].data) {
870                         data->ptr = ptr_e_tbl->entry[entry_idx].data;
871                         ptr_e_tbl->entry[entry_idx].ref_cnt++;
872                         rte_errno = EEXIST;
873                         return -1;
874                 }
875                 ptr_e_tbl->entry[entry_idx].data = data->ptr;
876                 ptr_e_tbl->entry[entry_idx].ref_cnt = 1;
877                 ptr_e_tbl->ref_cnt++;
878                 break;
879         }
880         return 0;
881 }
882
883 int32_t
884 mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
885                    union mlx5_l3t_data *data)
886 {
887         int ret;
888
889         rte_spinlock_lock(&tbl->sl);
890         ret = __l3t_set_entry(tbl, idx, data);
891         rte_spinlock_unlock(&tbl->sl);
892         return ret;
893 }
894
895 int32_t
896 mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
897                        union mlx5_l3t_data *data,
898                        mlx5_l3t_alloc_callback_fn cb, void *ctx)
899 {
900         int32_t ret;
901
902         rte_spinlock_lock(&tbl->sl);
903         /* Check if entry data is ready. */
904         ret = __l3t_get_entry(tbl, idx, data);
905         if (!ret) {
906                 switch (tbl->type) {
907                 case MLX5_L3T_TYPE_WORD:
908                         if (data->word)
909                                 goto out;
910                         break;
911                 case MLX5_L3T_TYPE_DWORD:
912                         if (data->dword)
913                                 goto out;
914                         break;
915                 case MLX5_L3T_TYPE_QWORD:
916                         if (data->qword)
917                                 goto out;
918                         break;
919                 default:
920                         if (data->ptr)
921                                 goto out;
922                         break;
923                 }
924         }
925         /* Entry data is not ready, use user callback to create it. */
926         ret = cb(ctx, data);
927         if (ret)
928                 goto out;
929         /* Save the new allocated data to entry. */
930         ret = __l3t_set_entry(tbl, idx, data);
931 out:
932         rte_spinlock_unlock(&tbl->sl);
933         return ret;
934 }