net/mlx5: make three level table thread safe
[dpdk.git] / drivers / net / mlx5 / mlx5_utils.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4
5 #include <rte_malloc.h>
6 #include <rte_hash_crc.h>
7
8 #include <mlx5_malloc.h>
9
10 #include "mlx5_utils.h"
11
12 struct mlx5_hlist *
13 mlx5_hlist_create(const char *name, uint32_t size)
14 {
15         struct mlx5_hlist *h;
16         uint32_t act_size;
17         uint32_t alloc_size;
18
19         if (!size)
20                 return NULL;
21         /* Align to the next power of 2, 32bits integer is enough now. */
22         if (!rte_is_power_of_2(size)) {
23                 act_size = rte_align32pow2(size);
24                 DRV_LOG(WARNING, "Size 0x%" PRIX32 " is not power of 2, will "
25                         "be aligned to 0x%" PRIX32 ".", size, act_size);
26         } else {
27                 act_size = size;
28         }
29         alloc_size = sizeof(struct mlx5_hlist) +
30                      sizeof(struct mlx5_hlist_head) * act_size;
31         /* Using zmalloc, then no need to initialize the heads. */
32         h = mlx5_malloc(MLX5_MEM_ZERO, alloc_size, RTE_CACHE_LINE_SIZE,
33                         SOCKET_ID_ANY);
34         if (!h) {
35                 DRV_LOG(ERR, "No memory for hash list %s creation",
36                         name ? name : "None");
37                 return NULL;
38         }
39         if (name)
40                 snprintf(h->name, MLX5_HLIST_NAMESIZE, "%s", name);
41         h->table_sz = act_size;
42         h->mask = act_size - 1;
43         DRV_LOG(DEBUG, "Hash list with %s size 0x%" PRIX32 " is created.",
44                 h->name, act_size);
45         return h;
46 }
47
48 struct mlx5_hlist_entry *
49 mlx5_hlist_lookup(struct mlx5_hlist *h, uint64_t key)
50 {
51         uint32_t idx;
52         struct mlx5_hlist_head *first;
53         struct mlx5_hlist_entry *node;
54
55         MLX5_ASSERT(h);
56         idx = rte_hash_crc_8byte(key, 0) & h->mask;
57         first = &h->heads[idx];
58         LIST_FOREACH(node, first, next) {
59                 if (node->key == key)
60                         return node;
61         }
62         return NULL;
63 }
64
65 int
66 mlx5_hlist_insert(struct mlx5_hlist *h, struct mlx5_hlist_entry *entry)
67 {
68         uint32_t idx;
69         struct mlx5_hlist_head *first;
70         struct mlx5_hlist_entry *node;
71
72         MLX5_ASSERT(h && entry);
73         idx = rte_hash_crc_8byte(entry->key, 0) & h->mask;
74         first = &h->heads[idx];
75         /* No need to reuse the lookup function. */
76         LIST_FOREACH(node, first, next) {
77                 if (node->key == entry->key)
78                         return -EEXIST;
79         }
80         LIST_INSERT_HEAD(first, entry, next);
81         return 0;
82 }
83
84 struct mlx5_hlist_entry *
85 mlx5_hlist_lookup_ex(struct mlx5_hlist *h, uint64_t key,
86                      mlx5_hlist_match_callback_fn cb, void *ctx)
87 {
88         uint32_t idx;
89         struct mlx5_hlist_head *first;
90         struct mlx5_hlist_entry *node;
91
92         MLX5_ASSERT(h && cb && ctx);
93         idx = rte_hash_crc_8byte(key, 0) & h->mask;
94         first = &h->heads[idx];
95         LIST_FOREACH(node, first, next) {
96                 if (!cb(node, ctx))
97                         return node;
98         }
99         return NULL;
100 }
101
102 int
103 mlx5_hlist_insert_ex(struct mlx5_hlist *h, struct mlx5_hlist_entry *entry,
104                      mlx5_hlist_match_callback_fn cb, void *ctx)
105 {
106         uint32_t idx;
107         struct mlx5_hlist_head *first;
108         struct mlx5_hlist_entry *node;
109
110         MLX5_ASSERT(h && entry && cb && ctx);
111         idx = rte_hash_crc_8byte(entry->key, 0) & h->mask;
112         first = &h->heads[idx];
113         /* No need to reuse the lookup function. */
114         LIST_FOREACH(node, first, next) {
115                 if (!cb(node, ctx))
116                         return -EEXIST;
117         }
118         LIST_INSERT_HEAD(first, entry, next);
119         return 0;
120 }
121
122 void
123 mlx5_hlist_remove(struct mlx5_hlist *h __rte_unused,
124                   struct mlx5_hlist_entry *entry)
125 {
126         MLX5_ASSERT(entry && entry->next.le_prev);
127         LIST_REMOVE(entry, next);
128         /* Set to NULL to get rid of removing action for more than once. */
129         entry->next.le_prev = NULL;
130 }
131
132 void
133 mlx5_hlist_destroy(struct mlx5_hlist *h,
134                    mlx5_hlist_destroy_callback_fn cb, void *ctx)
135 {
136         uint32_t idx;
137         struct mlx5_hlist_entry *entry;
138
139         MLX5_ASSERT(h);
140         for (idx = 0; idx < h->table_sz; ++idx) {
141                 /* no LIST_FOREACH_SAFE, using while instead */
142                 while (!LIST_EMPTY(&h->heads[idx])) {
143                         entry = LIST_FIRST(&h->heads[idx]);
144                         LIST_REMOVE(entry, next);
145                         /*
146                          * The owner of whole element which contains data entry
147                          * is the user, so it's the user's duty to do the clean
148                          * up and the free work because someone may not put the
149                          * hlist entry at the beginning(suggested to locate at
150                          * the beginning). Or else the default free function
151                          * will be used.
152                          */
153                         if (cb)
154                                 cb(entry, ctx);
155                         else
156                                 mlx5_free(entry);
157                 }
158         }
159         mlx5_free(h);
160 }
161
162 static inline void
163 mlx5_ipool_lock(struct mlx5_indexed_pool *pool)
164 {
165         if (pool->cfg.need_lock)
166                 rte_spinlock_lock(&pool->lock);
167 }
168
169 static inline void
170 mlx5_ipool_unlock(struct mlx5_indexed_pool *pool)
171 {
172         if (pool->cfg.need_lock)
173                 rte_spinlock_unlock(&pool->lock);
174 }
175
176 static inline uint32_t
177 mlx5_trunk_idx_get(struct mlx5_indexed_pool *pool, uint32_t entry_idx)
178 {
179         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
180         uint32_t trunk_idx = 0;
181         uint32_t i;
182
183         if (!cfg->grow_trunk)
184                 return entry_idx / cfg->trunk_size;
185         if (entry_idx >= pool->grow_tbl[cfg->grow_trunk - 1]) {
186                 trunk_idx = (entry_idx - pool->grow_tbl[cfg->grow_trunk - 1]) /
187                             (cfg->trunk_size << (cfg->grow_shift *
188                             cfg->grow_trunk)) + cfg->grow_trunk;
189         } else {
190                 for (i = 0; i < cfg->grow_trunk; i++) {
191                         if (entry_idx < pool->grow_tbl[i])
192                                 break;
193                 }
194                 trunk_idx = i;
195         }
196         return trunk_idx;
197 }
198
199 static inline uint32_t
200 mlx5_trunk_size_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
201 {
202         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
203
204         return cfg->trunk_size << (cfg->grow_shift *
205                (trunk_idx > cfg->grow_trunk ? cfg->grow_trunk : trunk_idx));
206 }
207
208 static inline uint32_t
209 mlx5_trunk_idx_offset_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
210 {
211         struct mlx5_indexed_pool_config *cfg = &pool->cfg;
212         uint32_t offset = 0;
213
214         if (!trunk_idx)
215                 return 0;
216         if (!cfg->grow_trunk)
217                 return cfg->trunk_size * trunk_idx;
218         if (trunk_idx < cfg->grow_trunk)
219                 offset = pool->grow_tbl[trunk_idx - 1];
220         else
221                 offset = pool->grow_tbl[cfg->grow_trunk - 1] +
222                          (cfg->trunk_size << (cfg->grow_shift *
223                          cfg->grow_trunk)) * (trunk_idx - cfg->grow_trunk);
224         return offset;
225 }
226
227 struct mlx5_indexed_pool *
228 mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg)
229 {
230         struct mlx5_indexed_pool *pool;
231         uint32_t i;
232
233         if (!cfg || !cfg->size || (!cfg->malloc ^ !cfg->free) ||
234             (cfg->trunk_size && ((cfg->trunk_size & (cfg->trunk_size - 1)) ||
235             ((__builtin_ffs(cfg->trunk_size) + TRUNK_IDX_BITS) > 32))))
236                 return NULL;
237         pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool) + cfg->grow_trunk *
238                            sizeof(pool->grow_tbl[0]), RTE_CACHE_LINE_SIZE,
239                            SOCKET_ID_ANY);
240         if (!pool)
241                 return NULL;
242         pool->cfg = *cfg;
243         if (!pool->cfg.trunk_size)
244                 pool->cfg.trunk_size = MLX5_IPOOL_DEFAULT_TRUNK_SIZE;
245         if (!cfg->malloc && !cfg->free) {
246                 pool->cfg.malloc = mlx5_malloc;
247                 pool->cfg.free = mlx5_free;
248         }
249         pool->free_list = TRUNK_INVALID;
250         if (pool->cfg.need_lock)
251                 rte_spinlock_init(&pool->lock);
252         /*
253          * Initialize the dynamic grow trunk size lookup table to have a quick
254          * lookup for the trunk entry index offset.
255          */
256         for (i = 0; i < cfg->grow_trunk; i++) {
257                 pool->grow_tbl[i] = cfg->trunk_size << (cfg->grow_shift * i);
258                 if (i > 0)
259                         pool->grow_tbl[i] += pool->grow_tbl[i - 1];
260         }
261         return pool;
262 }
263
264 static int
265 mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
266 {
267         struct mlx5_indexed_trunk *trunk;
268         struct mlx5_indexed_trunk **trunk_tmp;
269         struct mlx5_indexed_trunk **p;
270         size_t trunk_size = 0;
271         size_t data_size;
272         size_t bmp_size;
273         uint32_t idx;
274
275         if (pool->n_trunk_valid == TRUNK_MAX_IDX)
276                 return -ENOMEM;
277         if (pool->n_trunk_valid == pool->n_trunk) {
278                 /* No free trunk flags, expand trunk list. */
279                 int n_grow = pool->n_trunk_valid ? pool->n_trunk :
280                              RTE_CACHE_LINE_SIZE / sizeof(void *);
281
282                 p = pool->cfg.malloc(0, (pool->n_trunk_valid + n_grow) *
283                                      sizeof(struct mlx5_indexed_trunk *),
284                                      RTE_CACHE_LINE_SIZE, rte_socket_id());
285                 if (!p)
286                         return -ENOMEM;
287                 if (pool->trunks)
288                         memcpy(p, pool->trunks, pool->n_trunk_valid *
289                                sizeof(struct mlx5_indexed_trunk *));
290                 memset(RTE_PTR_ADD(p, pool->n_trunk_valid * sizeof(void *)), 0,
291                        n_grow * sizeof(void *));
292                 trunk_tmp = pool->trunks;
293                 pool->trunks = p;
294                 if (trunk_tmp)
295                         pool->cfg.free(trunk_tmp);
296                 pool->n_trunk += n_grow;
297         }
298         if (!pool->cfg.release_mem_en) {
299                 idx = pool->n_trunk_valid;
300         } else {
301                 /* Find the first available slot in trunk list */
302                 for (idx = 0; idx < pool->n_trunk; idx++)
303                         if (pool->trunks[idx] == NULL)
304                                 break;
305         }
306         trunk_size += sizeof(*trunk);
307         data_size = mlx5_trunk_size_get(pool, idx);
308         bmp_size = rte_bitmap_get_memory_footprint(data_size);
309         /* rte_bitmap requires memory cacheline aligned. */
310         trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
311         trunk_size += bmp_size;
312         trunk = pool->cfg.malloc(0, trunk_size,
313                                  RTE_CACHE_LINE_SIZE, rte_socket_id());
314         if (!trunk)
315                 return -ENOMEM;
316         pool->trunks[idx] = trunk;
317         trunk->idx = idx;
318         trunk->free = data_size;
319         trunk->prev = TRUNK_INVALID;
320         trunk->next = TRUNK_INVALID;
321         MLX5_ASSERT(pool->free_list == TRUNK_INVALID);
322         pool->free_list = idx;
323         /* Mark all entries as available. */
324         trunk->bmp = rte_bitmap_init_with_all_set(data_size, &trunk->data
325                      [RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size)],
326                      bmp_size);
327         MLX5_ASSERT(trunk->bmp);
328         pool->n_trunk_valid++;
329 #ifdef POOL_DEBUG
330         pool->trunk_new++;
331         pool->trunk_avail++;
332 #endif
333         return 0;
334 }
335
336 void *
337 mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
338 {
339         struct mlx5_indexed_trunk *trunk;
340         uint64_t slab = 0;
341         uint32_t iidx = 0;
342         void *p;
343
344         mlx5_ipool_lock(pool);
345         if (pool->free_list == TRUNK_INVALID) {
346                 /* If no available trunks, grow new. */
347                 if (mlx5_ipool_grow(pool)) {
348                         mlx5_ipool_unlock(pool);
349                         return NULL;
350                 }
351         }
352         MLX5_ASSERT(pool->free_list != TRUNK_INVALID);
353         trunk = pool->trunks[pool->free_list];
354         MLX5_ASSERT(trunk->free);
355         if (!rte_bitmap_scan(trunk->bmp, &iidx, &slab)) {
356                 mlx5_ipool_unlock(pool);
357                 return NULL;
358         }
359         MLX5_ASSERT(slab);
360         iidx += __builtin_ctzll(slab);
361         MLX5_ASSERT(iidx != UINT32_MAX);
362         MLX5_ASSERT(iidx < mlx5_trunk_size_get(pool, trunk->idx));
363         rte_bitmap_clear(trunk->bmp, iidx);
364         p = &trunk->data[iidx * pool->cfg.size];
365         iidx += mlx5_trunk_idx_offset_get(pool, trunk->idx);
366         iidx += 1; /* non-zero index. */
367         trunk->free--;
368 #ifdef POOL_DEBUG
369         pool->n_entry++;
370 #endif
371         if (!trunk->free) {
372                 /* Full trunk will be removed from free list in imalloc. */
373                 MLX5_ASSERT(pool->free_list == trunk->idx);
374                 pool->free_list = trunk->next;
375                 if (trunk->next != TRUNK_INVALID)
376                         pool->trunks[trunk->next]->prev = TRUNK_INVALID;
377                 trunk->prev = TRUNK_INVALID;
378                 trunk->next = TRUNK_INVALID;
379 #ifdef POOL_DEBUG
380                 pool->trunk_empty++;
381                 pool->trunk_avail--;
382 #endif
383         }
384         *idx = iidx;
385         mlx5_ipool_unlock(pool);
386         return p;
387 }
388
389 void *
390 mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
391 {
392         void *entry = mlx5_ipool_malloc(pool, idx);
393
394         if (entry)
395                 memset(entry, 0, pool->cfg.size);
396         return entry;
397 }
398
399 void
400 mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx)
401 {
402         struct mlx5_indexed_trunk *trunk;
403         uint32_t trunk_idx;
404         uint32_t entry_idx;
405
406         if (!idx)
407                 return;
408         idx -= 1;
409         mlx5_ipool_lock(pool);
410         trunk_idx = mlx5_trunk_idx_get(pool, idx);
411         if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
412             (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
413                 goto out;
414         trunk = pool->trunks[trunk_idx];
415         if (!trunk)
416                 goto out;
417         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
418         if (trunk_idx != trunk->idx ||
419             rte_bitmap_get(trunk->bmp, entry_idx))
420                 goto out;
421         rte_bitmap_set(trunk->bmp, entry_idx);
422         trunk->free++;
423         if (pool->cfg.release_mem_en && trunk->free == mlx5_trunk_size_get
424            (pool, trunk->idx)) {
425                 if (pool->free_list == trunk->idx)
426                         pool->free_list = trunk->next;
427                 if (trunk->next != TRUNK_INVALID)
428                         pool->trunks[trunk->next]->prev = trunk->prev;
429                 if (trunk->prev != TRUNK_INVALID)
430                         pool->trunks[trunk->prev]->next = trunk->next;
431                 pool->cfg.free(trunk);
432                 pool->trunks[trunk_idx] = NULL;
433                 pool->n_trunk_valid--;
434 #ifdef POOL_DEBUG
435                 pool->trunk_avail--;
436                 pool->trunk_free++;
437 #endif
438                 if (pool->n_trunk_valid == 0) {
439                         pool->cfg.free(pool->trunks);
440                         pool->trunks = NULL;
441                         pool->n_trunk = 0;
442                 }
443         } else if (trunk->free == 1) {
444                 /* Put into free trunk list head. */
445                 MLX5_ASSERT(pool->free_list != trunk->idx);
446                 trunk->next = pool->free_list;
447                 trunk->prev = TRUNK_INVALID;
448                 if (pool->free_list != TRUNK_INVALID)
449                         pool->trunks[pool->free_list]->prev = trunk->idx;
450                 pool->free_list = trunk->idx;
451 #ifdef POOL_DEBUG
452                 pool->trunk_empty--;
453                 pool->trunk_avail++;
454 #endif
455         }
456 #ifdef POOL_DEBUG
457         pool->n_entry--;
458 #endif
459 out:
460         mlx5_ipool_unlock(pool);
461 }
462
463 void *
464 mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx)
465 {
466         struct mlx5_indexed_trunk *trunk;
467         void *p = NULL;
468         uint32_t trunk_idx;
469         uint32_t entry_idx;
470
471         if (!idx)
472                 return NULL;
473         idx -= 1;
474         mlx5_ipool_lock(pool);
475         trunk_idx = mlx5_trunk_idx_get(pool, idx);
476         if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
477             (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
478                 goto out;
479         trunk = pool->trunks[trunk_idx];
480         if (!trunk)
481                 goto out;
482         entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
483         if (trunk_idx != trunk->idx ||
484             rte_bitmap_get(trunk->bmp, entry_idx))
485                 goto out;
486         p = &trunk->data[entry_idx * pool->cfg.size];
487 out:
488         mlx5_ipool_unlock(pool);
489         return p;
490 }
491
492 int
493 mlx5_ipool_destroy(struct mlx5_indexed_pool *pool)
494 {
495         struct mlx5_indexed_trunk **trunks;
496         uint32_t i;
497
498         MLX5_ASSERT(pool);
499         mlx5_ipool_lock(pool);
500         trunks = pool->trunks;
501         for (i = 0; i < pool->n_trunk; i++) {
502                 if (trunks[i])
503                         pool->cfg.free(trunks[i]);
504         }
505         if (!pool->trunks)
506                 pool->cfg.free(pool->trunks);
507         mlx5_ipool_unlock(pool);
508         mlx5_free(pool);
509         return 0;
510 }
511
512 void
513 mlx5_ipool_dump(struct mlx5_indexed_pool *pool)
514 {
515         printf("Pool %s entry size %u, trunks %u, %d entry per trunk, "
516                "total: %d\n",
517                pool->cfg.type, pool->cfg.size, pool->n_trunk_valid,
518                pool->cfg.trunk_size, pool->n_trunk_valid);
519 #ifdef POOL_DEBUG
520         printf("Pool %s entry %u, trunk alloc %u, empty: %u, "
521                "available %u free %u\n",
522                pool->cfg.type, pool->n_entry, pool->trunk_new,
523                pool->trunk_empty, pool->trunk_avail, pool->trunk_free);
524 #endif
525 }
526
527 struct mlx5_l3t_tbl *
528 mlx5_l3t_create(enum mlx5_l3t_type type)
529 {
530         struct mlx5_l3t_tbl *tbl;
531         struct mlx5_indexed_pool_config l3t_ip_cfg = {
532                 .trunk_size = 16,
533                 .grow_trunk = 6,
534                 .grow_shift = 1,
535                 .need_lock = 0,
536                 .release_mem_en = 1,
537                 .malloc = mlx5_malloc,
538                 .free = mlx5_free,
539         };
540
541         if (type >= MLX5_L3T_TYPE_MAX) {
542                 rte_errno = EINVAL;
543                 return NULL;
544         }
545         tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_l3t_tbl), 1,
546                           SOCKET_ID_ANY);
547         if (!tbl) {
548                 rte_errno = ENOMEM;
549                 return NULL;
550         }
551         tbl->type = type;
552         switch (type) {
553         case MLX5_L3T_TYPE_WORD:
554                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word);
555                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
556                 break;
557         case MLX5_L3T_TYPE_DWORD:
558                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword);
559                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
560                 break;
561         case MLX5_L3T_TYPE_QWORD:
562                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword);
563                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
564                 break;
565         default:
566                 l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr);
567                 l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
568                 break;
569         }
570         rte_spinlock_init(&tbl->sl);
571         tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
572         if (!tbl->eip) {
573                 rte_errno = ENOMEM;
574                 mlx5_free(tbl);
575                 tbl = NULL;
576         }
577         return tbl;
578 }
579
580 void
581 mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl)
582 {
583         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
584         uint32_t i, j;
585
586         if (!tbl)
587                 return;
588         g_tbl = tbl->tbl;
589         if (g_tbl) {
590                 for (i = 0; i < MLX5_L3T_GT_SIZE; i++) {
591                         m_tbl = g_tbl->tbl[i];
592                         if (!m_tbl)
593                                 continue;
594                         for (j = 0; j < MLX5_L3T_MT_SIZE; j++) {
595                                 if (!m_tbl->tbl[j])
596                                         continue;
597                                 MLX5_ASSERT(!((struct mlx5_l3t_entry_word *)
598                                             m_tbl->tbl[j])->ref_cnt);
599                                 mlx5_ipool_free(tbl->eip,
600                                                 ((struct mlx5_l3t_entry_word *)
601                                                 m_tbl->tbl[j])->idx);
602                                 m_tbl->tbl[j] = 0;
603                                 if (!(--m_tbl->ref_cnt))
604                                         break;
605                         }
606                         MLX5_ASSERT(!m_tbl->ref_cnt);
607                         mlx5_free(g_tbl->tbl[i]);
608                         g_tbl->tbl[i] = 0;
609                         if (!(--g_tbl->ref_cnt))
610                                 break;
611                 }
612                 MLX5_ASSERT(!g_tbl->ref_cnt);
613                 mlx5_free(tbl->tbl);
614                 tbl->tbl = 0;
615         }
616         mlx5_ipool_destroy(tbl->eip);
617         mlx5_free(tbl);
618 }
619
620 static int32_t
621 __l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
622                 union mlx5_l3t_data *data)
623 {
624         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
625         struct mlx5_l3t_entry_word *w_e_tbl;
626         struct mlx5_l3t_entry_dword *dw_e_tbl;
627         struct mlx5_l3t_entry_qword *qw_e_tbl;
628         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
629         void *e_tbl;
630         uint32_t entry_idx;
631
632         g_tbl = tbl->tbl;
633         if (!g_tbl)
634                 return -1;
635         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
636         if (!m_tbl)
637                 return -1;
638         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
639         if (!e_tbl)
640                 return -1;
641         entry_idx = idx & MLX5_L3T_ET_MASK;
642         switch (tbl->type) {
643         case MLX5_L3T_TYPE_WORD:
644                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
645                 data->word = w_e_tbl->entry[entry_idx].data;
646                 if (w_e_tbl->entry[entry_idx].data)
647                         w_e_tbl->entry[entry_idx].ref_cnt++;
648                 break;
649         case MLX5_L3T_TYPE_DWORD:
650                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
651                 data->dword = dw_e_tbl->entry[entry_idx].data;
652                 if (dw_e_tbl->entry[entry_idx].data)
653                         dw_e_tbl->entry[entry_idx].ref_cnt++;
654                 break;
655         case MLX5_L3T_TYPE_QWORD:
656                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
657                 data->qword = qw_e_tbl->entry[entry_idx].data;
658                 if (qw_e_tbl->entry[entry_idx].data)
659                         qw_e_tbl->entry[entry_idx].ref_cnt++;
660                 break;
661         default:
662                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
663                 data->ptr = ptr_e_tbl->entry[entry_idx].data;
664                 if (ptr_e_tbl->entry[entry_idx].data)
665                         ptr_e_tbl->entry[entry_idx].ref_cnt++;
666                 break;
667         }
668         return 0;
669 }
670
671 int32_t
672 mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
673                    union mlx5_l3t_data *data)
674 {
675         int ret;
676
677         rte_spinlock_lock(&tbl->sl);
678         ret = __l3t_get_entry(tbl, idx, data);
679         rte_spinlock_unlock(&tbl->sl);
680         return ret;
681 }
682
683 int32_t
684 mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
685 {
686         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
687         struct mlx5_l3t_entry_word *w_e_tbl;
688         struct mlx5_l3t_entry_dword *dw_e_tbl;
689         struct mlx5_l3t_entry_qword *qw_e_tbl;
690         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
691         void *e_tbl;
692         uint32_t entry_idx;
693         uint64_t ref_cnt;
694         int32_t ret = -1;
695
696         rte_spinlock_lock(&tbl->sl);
697         g_tbl = tbl->tbl;
698         if (!g_tbl)
699                 goto out;
700         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
701         if (!m_tbl)
702                 goto out;
703         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
704         if (!e_tbl)
705                 goto out;
706         entry_idx = idx & MLX5_L3T_ET_MASK;
707         switch (tbl->type) {
708         case MLX5_L3T_TYPE_WORD:
709                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
710                 MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt);
711                 ret = --w_e_tbl->entry[entry_idx].ref_cnt;
712                 if (ret)
713                         goto out;
714                 w_e_tbl->entry[entry_idx].data = 0;
715                 ref_cnt = --w_e_tbl->ref_cnt;
716                 break;
717         case MLX5_L3T_TYPE_DWORD:
718                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
719                 MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt);
720                 ret = --dw_e_tbl->entry[entry_idx].ref_cnt;
721                 if (ret)
722                         goto out;
723                 dw_e_tbl->entry[entry_idx].data = 0;
724                 ref_cnt = --dw_e_tbl->ref_cnt;
725                 break;
726         case MLX5_L3T_TYPE_QWORD:
727                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
728                 MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt);
729                 ret = --qw_e_tbl->entry[entry_idx].ref_cnt;
730                 if (ret)
731                         goto out;
732                 qw_e_tbl->entry[entry_idx].data = 0;
733                 ref_cnt = --qw_e_tbl->ref_cnt;
734                 break;
735         default:
736                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
737                 MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt);
738                 ret = --ptr_e_tbl->entry[entry_idx].ref_cnt;
739                 if (ret)
740                         goto out;
741                 ptr_e_tbl->entry[entry_idx].data = NULL;
742                 ref_cnt = --ptr_e_tbl->ref_cnt;
743                 break;
744         }
745         if (!ref_cnt) {
746                 mlx5_ipool_free(tbl->eip,
747                                 ((struct mlx5_l3t_entry_word *)e_tbl)->idx);
748                 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
749                                                                         NULL;
750                 if (!(--m_tbl->ref_cnt)) {
751                         mlx5_free(m_tbl);
752                         g_tbl->tbl
753                         [(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] = NULL;
754                         if (!(--g_tbl->ref_cnt)) {
755                                 mlx5_free(g_tbl);
756                                 tbl->tbl = 0;
757                         }
758                 }
759         }
760 out:
761         rte_spinlock_unlock(&tbl->sl);
762         return ret;
763 }
764
765 static int32_t
766 __l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
767                 union mlx5_l3t_data *data)
768 {
769         struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
770         struct mlx5_l3t_entry_word *w_e_tbl;
771         struct mlx5_l3t_entry_dword *dw_e_tbl;
772         struct mlx5_l3t_entry_qword *qw_e_tbl;
773         struct mlx5_l3t_entry_ptr *ptr_e_tbl;
774         void *e_tbl;
775         uint32_t entry_idx, tbl_idx = 0;
776
777         /* Check the global table, create it if empty. */
778         g_tbl = tbl->tbl;
779         if (!g_tbl) {
780                 g_tbl = mlx5_malloc(MLX5_MEM_ZERO,
781                                     sizeof(struct mlx5_l3t_level_tbl) +
782                                     sizeof(void *) * MLX5_L3T_GT_SIZE, 1,
783                                     SOCKET_ID_ANY);
784                 if (!g_tbl) {
785                         rte_errno = ENOMEM;
786                         return -1;
787                 }
788                 tbl->tbl = g_tbl;
789         }
790         /*
791          * Check the middle table, create it if empty. Ref_cnt will be
792          * increased if new sub table created.
793          */
794         m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
795         if (!m_tbl) {
796                 m_tbl = mlx5_malloc(MLX5_MEM_ZERO,
797                                     sizeof(struct mlx5_l3t_level_tbl) +
798                                     sizeof(void *) * MLX5_L3T_MT_SIZE, 1,
799                                     SOCKET_ID_ANY);
800                 if (!m_tbl) {
801                         rte_errno = ENOMEM;
802                         return -1;
803                 }
804                 g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] =
805                                                                         m_tbl;
806                 g_tbl->ref_cnt++;
807         }
808         /*
809          * Check the entry table, create it if empty. Ref_cnt will be
810          * increased if new sub entry table created.
811          */
812         e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
813         if (!e_tbl) {
814                 e_tbl = mlx5_ipool_zmalloc(tbl->eip, &tbl_idx);
815                 if (!e_tbl) {
816                         rte_errno = ENOMEM;
817                         return -1;
818                 }
819                 ((struct mlx5_l3t_entry_word *)e_tbl)->idx = tbl_idx;
820                 m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
821                                                                         e_tbl;
822                 m_tbl->ref_cnt++;
823         }
824         entry_idx = idx & MLX5_L3T_ET_MASK;
825         switch (tbl->type) {
826         case MLX5_L3T_TYPE_WORD:
827                 w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
828                 if (w_e_tbl->entry[entry_idx].data) {
829                         data->word = w_e_tbl->entry[entry_idx].data;
830                         w_e_tbl->entry[entry_idx].ref_cnt++;
831                         rte_errno = EEXIST;
832                         return -1;
833                 }
834                 w_e_tbl->entry[entry_idx].data = data->word;
835                 w_e_tbl->entry[entry_idx].ref_cnt = 1;
836                 w_e_tbl->ref_cnt++;
837                 break;
838         case MLX5_L3T_TYPE_DWORD:
839                 dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
840                 if (dw_e_tbl->entry[entry_idx].data) {
841                         data->dword = dw_e_tbl->entry[entry_idx].data;
842                         dw_e_tbl->entry[entry_idx].ref_cnt++;
843                         rte_errno = EEXIST;
844                         return -1;
845                 }
846                 dw_e_tbl->entry[entry_idx].data = data->dword;
847                 dw_e_tbl->entry[entry_idx].ref_cnt = 1;
848                 dw_e_tbl->ref_cnt++;
849                 break;
850         case MLX5_L3T_TYPE_QWORD:
851                 qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
852                 if (qw_e_tbl->entry[entry_idx].data) {
853                         data->qword = qw_e_tbl->entry[entry_idx].data;
854                         qw_e_tbl->entry[entry_idx].ref_cnt++;
855                         rte_errno = EEXIST;
856                         return -1;
857                 }
858                 qw_e_tbl->entry[entry_idx].data = data->qword;
859                 qw_e_tbl->entry[entry_idx].ref_cnt = 1;
860                 qw_e_tbl->ref_cnt++;
861                 break;
862         default:
863                 ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
864                 if (ptr_e_tbl->entry[entry_idx].data) {
865                         data->ptr = ptr_e_tbl->entry[entry_idx].data;
866                         ptr_e_tbl->entry[entry_idx].ref_cnt++;
867                         rte_errno = EEXIST;
868                         return -1;
869                 }
870                 ptr_e_tbl->entry[entry_idx].data = data->ptr;
871                 ptr_e_tbl->entry[entry_idx].ref_cnt = 1;
872                 ptr_e_tbl->ref_cnt++;
873                 break;
874         }
875         return 0;
876 }
877
878 int32_t
879 mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
880                    union mlx5_l3t_data *data)
881 {
882         int ret;
883
884         rte_spinlock_lock(&tbl->sl);
885         ret = __l3t_set_entry(tbl, idx, data);
886         rte_spinlock_unlock(&tbl->sl);
887         return ret;
888 }
889
890 int32_t
891 mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
892                        union mlx5_l3t_data *data,
893                        mlx5_l3t_alloc_callback_fn cb, void *ctx)
894 {
895         int32_t ret;
896
897         rte_spinlock_lock(&tbl->sl);
898         /* Check if entry data is ready. */
899         ret = __l3t_get_entry(tbl, idx, data);
900         if (!ret) {
901                 switch (tbl->type) {
902                 case MLX5_L3T_TYPE_WORD:
903                         if (data->word)
904                                 goto out;
905                         break;
906                 case MLX5_L3T_TYPE_DWORD:
907                         if (data->dword)
908                                 goto out;
909                         break;
910                 case MLX5_L3T_TYPE_QWORD:
911                         if (data->qword)
912                                 goto out;
913                         break;
914                 default:
915                         if (data->ptr)
916                                 goto out;
917                         break;
918                 }
919         }
920         /* Entry data is not ready, use user callback to create it. */
921         ret = cb(ctx, data);
922         if (ret)
923                 goto out;
924         /* Save the new allocated data to entry. */
925         ret = __l3t_set_entry(tbl, idx, data);
926 out:
927         rte_spinlock_unlock(&tbl->sl);
928         return ret;
929 }