net/mlx5: add indexed memory pool
authorSuanming Mou <suanmingm@mellanox.com>
Thu, 16 Apr 2020 02:41:59 +0000 (10:41 +0800)
committerFerruh Yigit <ferruh.yigit@intel.com>
Tue, 21 Apr 2020 11:57:09 +0000 (13:57 +0200)
Currently, the memory allocated by rte_malloc() also introduced more
than 64 bytes overhead. It means when allocate 64 bytes memory, the
real cost in memory maybe double. And the libc malloc() overhead is 16
bytes, If users try allocating millions of small memory blocks, the
overhead costing maybe huge. And save the memory pointer will also be
quite expensive.

Indexed memory pool is introduced to save the memory for allocating
huge amount of small memory blocks. The indexed memory uses trunk and
bitmap to manage the memory entries. While the pool is empty, the trunk
slot contains memory entry array will be allocated firstly. The bitmap
in the trunk records the entry allocation. The offset of trunk slot in
the pool and the offset of memory entry in the trunk slot compose the
index for the memory entry. So, by the index, it will be very easy to
address the memory of the entry. User saves the 32 bits index for the
memory resource instead of the 64 bits pointer.
User should create different pools for allocating different size of
small memory block. It means one pool provides one fixed size of small
memory blocked allocating.

Signed-off-by: Suanming Mou <suanmingm@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
drivers/net/mlx5/mlx5_utils.c
drivers/net/mlx5/mlx5_utils.h

index 4b4fc3c..c92914c 100644 (file)
@@ -117,3 +117,264 @@ mlx5_hlist_destroy(struct mlx5_hlist *h,
        }
        rte_free(h);
 }
+
+static inline void
+mlx5_ipool_lock(struct mlx5_indexed_pool *pool)
+{
+       if (pool->cfg.need_lock)
+               rte_spinlock_lock(&pool->lock);
+}
+
+static inline void
+mlx5_ipool_unlock(struct mlx5_indexed_pool *pool)
+{
+       if (pool->cfg.need_lock)
+               rte_spinlock_unlock(&pool->lock);
+}
+
+struct mlx5_indexed_pool *
+mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg)
+{
+       struct mlx5_indexed_pool *pool;
+
+       if (!cfg || !cfg->size || (!cfg->malloc ^ !cfg->free) ||
+           (cfg->trunk_size && ((cfg->trunk_size & (cfg->trunk_size - 1)) ||
+           ((__builtin_ffs(cfg->trunk_size) + TRUNK_IDX_BITS) > 32))))
+               return NULL;
+       pool = rte_zmalloc("mlx5_ipool", sizeof(*pool), RTE_CACHE_LINE_SIZE);
+       if (!pool)
+               return NULL;
+       pool->cfg = *cfg;
+       if (!pool->cfg.trunk_size)
+               pool->cfg.trunk_size = MLX5_IPOOL_DEFAULT_TRUNK_SIZE;
+       if (!cfg->malloc && !cfg->free) {
+               pool->cfg.malloc = rte_malloc_socket;
+               pool->cfg.free = rte_free;
+       }
+       pool->free_list = TRUNK_INVALID;
+       if (pool->cfg.need_lock)
+               rte_spinlock_init(&pool->lock);
+       return pool;
+}
+
+static int
+mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
+{
+       struct mlx5_indexed_trunk *trunk;
+       struct mlx5_indexed_trunk **trunk_tmp;
+       struct mlx5_indexed_trunk **p;
+       size_t trunk_size = 0;
+       size_t bmp_size;
+       uint32_t idx;
+
+       if (pool->n_trunk_valid == TRUNK_MAX_IDX)
+               return -ENOMEM;
+       if (pool->n_trunk_valid == pool->n_trunk) {
+               /* No free trunk flags, expand trunk list. */
+               int n_grow = pool->n_trunk_valid ? pool->n_trunk :
+                            RTE_CACHE_LINE_SIZE / sizeof(void *);
+
+               p = pool->cfg.malloc(pool->cfg.type,
+                                (pool->n_trunk_valid + n_grow) *
+                                sizeof(struct mlx5_indexed_trunk *),
+                                RTE_CACHE_LINE_SIZE, rte_socket_id());
+               if (!p)
+                       return -ENOMEM;
+               if (pool->trunks)
+                       memcpy(p, pool->trunks, pool->n_trunk_valid *
+                              sizeof(struct mlx5_indexed_trunk *));
+               memset(RTE_PTR_ADD(p, pool->n_trunk_valid * sizeof(void *)), 0,
+                      n_grow * sizeof(void *));
+               trunk_tmp = pool->trunks;
+               pool->trunks = p;
+               if (trunk_tmp)
+                       pool->cfg.free(trunk_tmp);
+               pool->n_trunk += n_grow;
+       }
+       idx = pool->n_trunk_valid;
+       trunk_size += sizeof(*trunk);
+       bmp_size = rte_bitmap_get_memory_footprint(pool->cfg.trunk_size);
+       trunk_size += pool->cfg.trunk_size * pool->cfg.size + bmp_size;
+       trunk = pool->cfg.malloc(pool->cfg.type, trunk_size,
+                                RTE_CACHE_LINE_SIZE, rte_socket_id());
+       if (!trunk)
+               return -ENOMEM;
+       pool->trunks[idx] = trunk;
+       trunk->idx = idx;
+       trunk->free = pool->cfg.trunk_size;
+       trunk->prev = TRUNK_INVALID;
+       trunk->next = TRUNK_INVALID;
+       MLX5_ASSERT(pool->free_list == TRUNK_INVALID);
+       pool->free_list = idx;
+       /* Mark all entries as available. */
+       trunk->bmp = rte_bitmap_init_with_all_set(pool->cfg.trunk_size,
+                    &trunk->data[pool->cfg.trunk_size  * pool->cfg.size],
+                    bmp_size);
+       pool->n_trunk_valid++;
+#ifdef POOL_DEBUG
+       pool->trunk_new++;
+       pool->trunk_avail++;
+#endif
+       return 0;
+}
+
+void *
+mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
+{
+       struct mlx5_indexed_trunk *trunk;
+       uint64_t slab = 0;
+       uint32_t iidx = 0;
+       void *p;
+
+       mlx5_ipool_lock(pool);
+       if (pool->free_list == TRUNK_INVALID) {
+               /* If no available trunks, grow new. */
+               if (mlx5_ipool_grow(pool)) {
+                       mlx5_ipool_unlock(pool);
+                       return NULL;
+               }
+       }
+       MLX5_ASSERT(pool->free_list != TRUNK_INVALID);
+       trunk = pool->trunks[pool->free_list];
+       MLX5_ASSERT(trunk->free);
+       if (!rte_bitmap_scan(trunk->bmp, &iidx, &slab)) {
+               mlx5_ipool_unlock(pool);
+               return NULL;
+       }
+       MLX5_ASSERT(slab);
+       iidx += __builtin_ctzll(slab);
+       MLX5_ASSERT(iidx != UINT32_MAX);
+       MLX5_ASSERT(iidx < pool->cfg.trunk_size);
+       rte_bitmap_clear(trunk->bmp, iidx);
+       p = &trunk->data[iidx * pool->cfg.size];
+       iidx += trunk->idx * pool->cfg.trunk_size;
+       iidx += 1; /* non-zero index. */
+       trunk->free--;
+#ifdef POOL_DEBUG
+       pool->n_entry++;
+#endif
+       if (!trunk->free) {
+               /* Full trunk will be removed from free list in imalloc. */
+               MLX5_ASSERT(pool->free_list == trunk->idx);
+               pool->free_list = trunk->next;
+               if (trunk->next != TRUNK_INVALID)
+                       pool->trunks[trunk->next]->prev = TRUNK_INVALID;
+               trunk->prev = TRUNK_INVALID;
+               trunk->next = TRUNK_INVALID;
+#ifdef POOL_DEBUG
+               pool->trunk_empty++;
+               pool->trunk_avail--;
+#endif
+       }
+       *idx = iidx;
+       mlx5_ipool_unlock(pool);
+       return p;
+}
+
+void *
+mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
+{
+       void *entry = mlx5_ipool_malloc(pool, idx);
+
+       if (entry)
+               memset(entry, 0, pool->cfg.size);
+       return entry;
+}
+
+void
+mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx)
+{
+       struct mlx5_indexed_trunk *trunk;
+       uint32_t trunk_idx;
+
+       if (!idx)
+               return;
+       idx -= 1;
+       mlx5_ipool_lock(pool);
+       trunk_idx = idx / pool->cfg.trunk_size;
+       if (trunk_idx >= pool->n_trunk_valid)
+               goto out;
+       trunk = pool->trunks[trunk_idx];
+       if (!trunk || trunk_idx != trunk->idx ||
+           rte_bitmap_get(trunk->bmp, idx % pool->cfg.trunk_size))
+               goto out;
+       rte_bitmap_set(trunk->bmp, idx % pool->cfg.trunk_size);
+       trunk->free++;
+       if (trunk->free == 1) {
+               /* Put into free trunk list head. */
+               MLX5_ASSERT(pool->free_list != trunk->idx);
+               trunk->next = pool->free_list;
+               trunk->prev = TRUNK_INVALID;
+               if (pool->free_list != TRUNK_INVALID)
+                       pool->trunks[pool->free_list]->prev = trunk->idx;
+               pool->free_list = trunk->idx;
+#ifdef POOL_DEBUG
+               pool->trunk_empty--;
+               pool->trunk_avail++;
+#endif
+       }
+#ifdef POOL_DEBUG
+       pool->n_entry--;
+#endif
+out:
+       mlx5_ipool_unlock(pool);
+}
+
+void *
+mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx)
+{
+       struct mlx5_indexed_trunk *trunk;
+       void *p = NULL;
+       uint32_t trunk_idx;
+
+       if (!idx)
+               return NULL;
+       idx -= 1;
+       mlx5_ipool_lock(pool);
+       trunk_idx = idx / pool->cfg.trunk_size;
+       if (trunk_idx >= pool->n_trunk_valid)
+               goto out;
+       trunk = pool->trunks[trunk_idx];
+       if (!trunk || trunk_idx != trunk->idx ||
+           rte_bitmap_get(trunk->bmp, idx % pool->cfg.trunk_size))
+               goto out;
+       p = &trunk->data[(idx % pool->cfg.trunk_size) * pool->cfg.size];
+out:
+       mlx5_ipool_unlock(pool);
+       return p;
+}
+
+int
+mlx5_ipool_destroy(struct mlx5_indexed_pool *pool)
+{
+       struct mlx5_indexed_trunk **trunks;
+       uint32_t i;
+
+       MLX5_ASSERT(pool);
+       mlx5_ipool_lock(pool);
+       trunks = pool->trunks;
+       for (i = 0; i < pool->n_trunk; i++) {
+               if (trunks[i])
+                       pool->cfg.free(trunks[i]);
+       }
+       if (!pool->trunks)
+               pool->cfg.free(pool->trunks);
+       mlx5_ipool_unlock(pool);
+       rte_free(pool);
+       return 0;
+}
+
+void
+mlx5_ipool_dump(struct mlx5_indexed_pool *pool)
+{
+       printf("Pool %s entry size %u, trunks %u, %d entry per trunk, "
+              "total: %d\n",
+              pool->cfg.type, pool->cfg.size, pool->n_trunk_valid,
+              pool->cfg.trunk_size, pool->n_trunk_valid);
+#ifdef POOL_DEBUG
+       printf("Pool %s entry %u, trunk alloc %u, empty: %u, "
+              "available %u free %u\n",
+              pool->cfg.type, pool->n_entry, pool->trunk_new,
+              pool->trunk_empty, pool->trunk_avail, pool->trunk_free);
+#endif
+}
index 8f305c3..f11d6cc 100644 (file)
 #include <limits.h>
 #include <errno.h>
 
+#include <rte_spinlock.h>
+#include <rte_memory.h>
+#include <rte_bitmap.h>
+
 #include <mlx5_common.h>
 
 #include "mlx5_defs.h"
@@ -60,6 +64,60 @@ extern int mlx5_logtype;
         (((val) & (from)) / ((from) / (to))) : \
         (((val) & (from)) * ((to) / (from))))
 
+/*
+ * The indexed memory entry index is made up of trunk index and offset of
+ * the entry in the trunk. Since the entry index is 32 bits, in case user
+ * prefers to have small trunks, user can change the macro below to a big
+ * number which helps the pool contains more trunks with lots of entries
+ * allocated.
+ */
+#define TRUNK_IDX_BITS 16
+#define TRUNK_MAX_IDX ((1 << TRUNK_IDX_BITS) - 1)
+#define TRUNK_INVALID TRUNK_MAX_IDX
+#define MLX5_IPOOL_DEFAULT_TRUNK_SIZE (1 << (28 - TRUNK_IDX_BITS))
+#ifdef RTE_LIBRTE_MLX5_DEBUG
+#define POOL_DEBUG 1
+#endif
+
+struct mlx5_indexed_pool_config {
+       uint32_t size; /* Pool entry size. */
+       uint32_t trunk_size;
+       /* Trunk entry number. Must be power of 2. */
+       uint32_t need_lock;
+       /* Lock is needed for multiple thread usage. */
+       const char *type; /* Memory allocate type name. */
+       void *(*malloc)(const char *type, size_t size, unsigned int align,
+                       int socket);
+       /* User defined memory allocator. */
+       void (*free)(void *addr); /* User defined memory release. */
+};
+
+struct mlx5_indexed_trunk {
+       uint32_t idx; /* Trunk id. */
+       uint32_t prev; /* Previous free trunk in free list. */
+       uint32_t next; /* Next free trunk in free list. */
+       uint32_t free; /* Free entries available */
+       struct rte_bitmap *bmp;
+       uint8_t data[] __rte_cache_min_aligned; /* Entry data start. */
+};
+
+struct mlx5_indexed_pool {
+       struct mlx5_indexed_pool_config cfg; /* Indexed pool configuration. */
+       rte_spinlock_t lock; /* Pool lock for multiple thread usage. */
+       uint32_t n_trunk_valid; /* Trunks allocated. */
+       uint32_t n_trunk; /* Trunk pointer array size. */
+       /* Dim of trunk pointer array. */
+       struct mlx5_indexed_trunk **trunks;
+       uint32_t free_list; /* Index to first free trunk. */
+#ifdef POOL_DEBUG
+       uint32_t n_entry;
+       uint32_t trunk_new;
+       uint32_t trunk_avail;
+       uint32_t trunk_empty;
+       uint32_t trunk_free;
+#endif
+};
+
 /**
  * Return logarithm of the nearest power of two above input value.
  *
@@ -183,4 +241,175 @@ void mlx5_hlist_remove(struct mlx5_hlist *h __rte_unused,
 void mlx5_hlist_destroy(struct mlx5_hlist *h,
                        mlx5_hlist_destroy_callback_fn cb, void *ctx);
 
+/**
+ * This function allocates non-initialized memory entry from pool.
+ * In NUMA systems, the memory entry allocated resides on the same
+ * NUMA socket as the core that calls this function.
+ *
+ * Memory entry is allocated from memory trunk, no alignment.
+ *
+ * @param pool
+ *   Pointer to indexed memory entry pool.
+ *   No initialization required.
+ * @param[out] idx
+ *   Pointer to memory to save allocated index.
+ *   Memory index always positive value.
+ * @return
+ *   - Pointer to the allocated memory entry.
+ *   - NULL on error. Not enough memory, or invalid arguments.
+ */
+void *mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx);
+
+/**
+ * This function allocates zero initialized memory entry from pool.
+ * In NUMA systems, the memory entry allocated resides on the same
+ * NUMA socket as the core that calls this function.
+ *
+ * Memory entry is allocated from memory trunk, no alignment.
+ *
+ * @param pool
+ *   Pointer to indexed memory pool.
+ *   No initialization required.
+ * @param[out] idx
+ *   Pointer to memory to save allocated index.
+ *   Memory index always positive value.
+ * @return
+ *   - Pointer to the allocated memory entry .
+ *   - NULL on error. Not enough memory, or invalid arguments.
+ */
+void *mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx);
+
+/**
+ * This function frees indexed memory entry to pool.
+ * Caller has to make sure that the index is allocated from same pool.
+ *
+ * @param pool
+ *   Pointer to indexed memory pool.
+ * @param idx
+ *   Allocated memory entry index.
+ */
+void mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx);
+
+/**
+ * This function returns pointer of indexed memory entry from index.
+ * Caller has to make sure that the index is valid, and allocated
+ * from same pool.
+ *
+ * @param pool
+ *   Pointer to indexed memory pool.
+ * @param idx
+ *   Allocated memory index.
+ * @return
+ *   - Pointer to indexed memory entry.
+ */
+void *mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx);
+
+/**
+ * This function creates indexed memory pool.
+ * Caller has to configure the configuration accordingly.
+ *
+ * @param pool
+ *   Pointer to indexed memory pool.
+ * @param cfg
+ *   Allocated memory index.
+ */
+struct mlx5_indexed_pool *
+mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg);
+
+/**
+ * This function releases all resources of pool.
+ * Caller has to make sure that all indexes and memories allocated
+ * from this pool not referenced anymore.
+ *
+ * @param pool
+ *   Pointer to indexed memory pool.
+ * @return
+ *   - non-zero value on error.
+ *   - 0 on success.
+ */
+int mlx5_ipool_destroy(struct mlx5_indexed_pool *pool);
+
+/**
+ * This function dumps debug info of pool.
+ *
+ * @param pool
+ *   Pointer to indexed memory pool.
+ */
+void mlx5_ipool_dump(struct mlx5_indexed_pool *pool);
+
+/*
+ * Macros for linked list based on indexed memory.
+ * Example data structure:
+ * struct Foo {
+ *     ILIST_ENTRY(uint16_t) next;
+ *     ...
+ * }
+ *
+ */
+#define ILIST_ENTRY(type)                                              \
+struct {                                                               \
+       type prev; /* Index of previous element. */                     \
+       type next; /* Index of next element. */                         \
+}
+
+#define ILIST_INSERT(pool, head, idx, elem, field)                     \
+       do {                                                            \
+               typeof(elem) peer;                                      \
+               MLX5_ASSERT((elem) && (idx));                           \
+               (elem)->field.next = *(head);                           \
+               (elem)->field.prev = 0;                                 \
+               if (*(head)) {                                          \
+                       (peer) = mlx5_ipool_get(pool, *(head));         \
+                       if (peer)                                       \
+                               (peer)->field.prev = (idx);             \
+               }                                                       \
+               *(head) = (idx);                                        \
+       } while (0)
+
+#define ILIST_REMOVE(pool, head, idx, elem, field)                     \
+       do {                                                            \
+               typeof(elem) peer;                                      \
+               MLX5_ASSERT(elem);                                      \
+               MLX5_ASSERT(head);                                      \
+               if ((elem)->field.prev) {                               \
+                       (peer) = mlx5_ipool_get                         \
+                                (pool, (elem)->field.prev);            \
+                       if (peer)                                       \
+                               (peer)->field.next = (elem)->field.next;\
+               }                                                       \
+               if ((elem)->field.next) {                               \
+                       (peer) = mlx5_ipool_get                         \
+                                (pool, (elem)->field.next);            \
+                       if (peer)                                       \
+                               (peer)->field.prev = (elem)->field.prev;\
+               }                                                       \
+               if (*(head) == (idx))                                   \
+                       *(head) = (elem)->field.next;                   \
+       } while (0)
+
+#define ILIST_FOREACH(pool, head, idx, elem, field)                    \
+       for ((idx) = (head), (elem) =                                   \
+            (idx) ? mlx5_ipool_get(pool, (idx)) : NULL; (elem);        \
+            idx = (elem)->field.next, (elem) =                         \
+            (idx) ? mlx5_ipool_get(pool, idx) : NULL)
+
+/* Single index list. */
+#define SILIST_ENTRY(type)                                             \
+struct {                                                               \
+       type next; /* Index of next element. */                         \
+}
+
+#define SILIST_INSERT(head, idx, elem, field)                          \
+       do {                                                            \
+               MLX5_ASSERT((elem) && (idx));                           \
+               (elem)->field.next = *(head);                           \
+               *(head) = (idx);                                        \
+       } while (0)
+
+#define SILIST_FOREACH(pool, head, idx, elem, field)                   \
+       for ((idx) = (head), (elem) =                                   \
+            (idx) ? mlx5_ipool_get(pool, (idx)) : NULL; (elem);        \
+            idx = (elem)->field.next, (elem) =                         \
+            (idx) ? mlx5_ipool_get(pool, idx) : NULL)
+
 #endif /* RTE_PMD_MLX5_UTILS_H_ */