From 213af31e0960e5086bce280364b2eb0d9c6cd261 Mon Sep 17 00:00:00 2001 From: Keith Wiles Date: Thu, 14 Apr 2016 11:42:36 +0200 Subject: [PATCH] mempool: reduce structure size if no cache needed The rte_mempool structure is changed, which will cause an ABI change for this structure. Providing backward compat is not reasonable here as this structure is used in multiple defines/inlines. Allow mempool cache support to be dynamic depending on if the mempool being created needs cache support. Saves about 1.5M of memory used by the rte_mempool structure. Allocating small mempools which do not require cache can consume larges amounts of memory if you have a number of these mempools. Signed-off-by: Keith Wiles Acked-by: Olivier Matz --- app/test/test_mempool.c | 4 +- doc/guides/rel_notes/deprecation.rst | 9 ----- doc/guides/rel_notes/release_16_07.rst | 6 ++- lib/librte_mempool/Makefile | 2 +- lib/librte_mempool/rte_mempool.c | 55 ++++++++++++-------------- lib/librte_mempool/rte_mempool.h | 31 +++++++-------- 6 files changed, 48 insertions(+), 59 deletions(-) diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c index f0f823b9eb..10e1fa467a 100644 --- a/app/test/test_mempool.c +++ b/app/test/test_mempool.c @@ -122,8 +122,8 @@ test_mempool_basic(void) return -1; printf("get private data\n"); - if (rte_mempool_get_priv(mp) != - (char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num)) + if (rte_mempool_get_priv(mp) != (char *)mp + + MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size)) return -1; printf("get physical address of an object\n"); diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst index fffe9c740b..7d94ba5ac4 100644 --- a/doc/guides/rel_notes/deprecation.rst +++ b/doc/guides/rel_notes/deprecation.rst @@ -35,15 +35,6 @@ Deprecation Notices RTE_ETH_FLOW_MAX. The release 2.2 does not contain these ABI changes, but release 2.3 will. [postponed] -* ABI change is planned for the rte_mempool structure to allow mempool - cache support to be dynamic depending on the mempool being created - needing cache support. Saves about 1.5M of memory per rte_mempool structure - by removing the per lcore cache memory. Change will occur in DPDK 16.07 - release and will skip the define RTE_NEXT_ABI in DPDK 16.04 release. The - code affected is app/test/test_mempool.c and librte_mempool/rte_mempool.[ch]. - The rte_mempool.local_cache will be converted from an array to a pointer to - allow for dynamic allocation of the per lcore cache memory. - * ABI will change for rte_mempool struct to move the cache-related fields to the more appropriate rte_mempool_cache struct. The mempool API is also changed to enable external cache management that is not tied to EAL diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst index 71d35408b5..58c8ef9f87 100644 --- a/doc/guides/rel_notes/release_16_07.rst +++ b/doc/guides/rel_notes/release_16_07.rst @@ -34,6 +34,10 @@ New Features Refer to the previous release notes for examples. +* **Removed mempool cache if not needed.** + + The size of the mempool structure is reduced if the per-lcore cache is disabled. + Resolved Issues --------------- @@ -132,7 +136,7 @@ The libraries prepended with a plus sign were incremented in this version. librte_kvargs.so.1 librte_lpm.so.2 librte_mbuf.so.2 - librte_mempool.so.1 + + librte_mempool.so.2 librte_meter.so.1 librte_pipeline.so.3 librte_pmd_bond.so.1 diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile index a6898eff8d..706f844e44 100644 --- a/lib/librte_mempool/Makefile +++ b/lib/librte_mempool/Makefile @@ -38,7 +38,7 @@ CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 EXPORT_MAP := rte_mempool_version.map -LIBABIVER := 1 +LIBABIVER := 2 # all source are stored in SRCS-y SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool.c diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index 70812d9554..0724942390 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -452,12 +452,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, /* compilation-time checks */ RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) & RTE_CACHE_LINE_MASK) != 0); -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) & RTE_CACHE_LINE_MASK) != 0); - RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) & - RTE_CACHE_LINE_MASK) != 0); -#endif #ifdef RTE_LIBRTE_MEMPOOL_DEBUG RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) & RTE_CACHE_LINE_MASK) != 0); @@ -527,9 +523,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, */ int head = sizeof(struct rte_mempool); int new_size = (private_data_size + head) % page_size; - if (new_size) { + if (new_size) private_data_size += page_size - new_size; - } } /* try to allocate tailq entry */ @@ -544,7 +539,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, * store mempool objects. Otherwise reserve a memzone that is large * enough to hold mempool header and metadata plus mempool objects. */ - mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size; + mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size); + mempool_size += private_data_size; mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN); if (vaddr == NULL) mempool_size += (size_t)objsz.total_size * n; @@ -591,8 +587,15 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size); mp->private_data_size = private_data_size; + /* + * local_cache pointer is set even if cache_size is zero. + * The local_cache points to just past the elt_pa[] array. + */ + mp->local_cache = (struct rte_mempool_cache *) + RTE_PTR_ADD(mp, MEMPOOL_HEADER_SIZE(mp, pg_num, 0)); + /* calculate address of the first element for continuous mempool. */ - obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) + + obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) + private_data_size; obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN); @@ -606,9 +609,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, mp->elt_va_start = (uintptr_t)obj; mp->elt_pa[0] = mp->phys_addr + (mp->elt_va_start - (uintptr_t)mp); - - /* mempool elements in a separate chunk of memory. */ } else { + /* mempool elements in a separate chunk of memory. */ mp->elt_va_start = (uintptr_t)vaddr; memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num); } @@ -643,19 +645,15 @@ unsigned rte_mempool_count(const struct rte_mempool *mp) { unsigned count; + unsigned lcore_id; count = rte_ring_count(mp->ring); -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 - { - unsigned lcore_id; - if (mp->cache_size == 0) - return count; + if (mp->cache_size == 0) + return count; - for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) - count += mp->local_cache[lcore_id].len; - } -#endif + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) + count += mp->local_cache[lcore_id].len; /* * due to race condition (access to len is not locked), the @@ -670,13 +668,16 @@ rte_mempool_count(const struct rte_mempool *mp) static unsigned rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp) { -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 unsigned lcore_id; unsigned count = 0; unsigned cache_count; fprintf(f, " cache infos:\n"); fprintf(f, " cache_size=%"PRIu32"\n", mp->cache_size); + + if (mp->cache_size == 0) + return count; + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { cache_count = mp->local_cache[lcore_id].len; fprintf(f, " cache_count[%u]=%u\n", lcore_id, cache_count); @@ -684,11 +685,6 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp) } fprintf(f, " total_cache_count=%u\n", count); return count; -#else - RTE_SET_USED(mp); - fprintf(f, " cache disabled\n"); - return 0; -#endif } #ifdef RTE_LIBRTE_MEMPOOL_DEBUG @@ -753,13 +749,16 @@ mempool_audit_cookies(const struct rte_mempool *mp) #define mempool_audit_cookies(mp) do {} while(0) #endif -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 /* check cookies before and after objects */ static void mempool_audit_cache(const struct rte_mempool *mp) { /* check cache size consistency */ unsigned lcore_id; + + if (mp->cache_size == 0) + return; + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) { RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n", @@ -768,10 +767,6 @@ mempool_audit_cache(const struct rte_mempool *mp) } } } -#else -#define mempool_audit_cache(mp) do {} while(0) -#endif - /* check the consistency of mempool (size, cookies, ...) */ void diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h index 9745bf0d70..0f3ef4a03f 100644 --- a/lib/librte_mempool/rte_mempool.h +++ b/lib/librte_mempool/rte_mempool.h @@ -95,7 +95,6 @@ struct rte_mempool_debug_stats { } __rte_cache_aligned; #endif -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 /** * A structure that stores a per-core object cache. */ @@ -107,7 +106,6 @@ struct rte_mempool_cache { */ void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */ } __rte_cache_aligned; -#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ /** * A structure that stores the size of mempool elements. @@ -194,10 +192,7 @@ struct rte_mempool { unsigned private_data_size; /**< Size of private data. */ -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 - /** Per-lcore local cache. */ - struct rte_mempool_cache local_cache[RTE_MAX_LCORE]; -#endif + struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */ #ifdef RTE_LIBRTE_MEMPOOL_DEBUG /** Per-lcore statistics. */ @@ -246,6 +241,13 @@ struct rte_mempool { #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0) #endif +/** + * Size of elt_pa array size based on number of pages. (Internal use) + */ +#define __PA_SIZE(mp, pgn) \ + RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \ + sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE) + /** * Calculate the size of the mempool header. * @@ -253,10 +255,12 @@ struct rte_mempool { * Pointer to the memory pool. * @param pgn * Number of pages used to store mempool objects. + * @param cs + * Size of the per-lcore cache. */ -#define MEMPOOL_HEADER_SIZE(mp, pgn) (sizeof(*(mp)) + \ - RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \ - sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE)) +#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \ + (sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \ + (sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE))) /** * Return true if the whole mempool is in contiguous memory. @@ -755,19 +759,16 @@ static inline void __attribute__((always_inline)) __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table, unsigned n, int is_mp) { -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 struct rte_mempool_cache *cache; uint32_t index; void **cache_objs; unsigned lcore_id = rte_lcore_id(); uint32_t cache_size = mp->cache_size; uint32_t flushthresh = mp->cache_flushthresh; -#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ /* increment stat now, adding in mempool always success */ __MEMPOOL_STAT_ADD(mp, put, n); -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 /* cache is not enabled or single producer or non-EAL thread */ if (unlikely(cache_size == 0 || is_mp == 0 || lcore_id >= RTE_MAX_LCORE)) @@ -802,7 +803,6 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table, return; ring_enqueue: -#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ /* push remaining objects in ring */ #ifdef RTE_LIBRTE_MEMPOOL_DEBUG @@ -946,7 +946,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n, int is_mc) { int ret; -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 struct rte_mempool_cache *cache; uint32_t index, len; void **cache_objs; @@ -992,7 +991,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table, return 0; ring_dequeue: -#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ /* get remaining objects from ring */ if (is_mc) @@ -1293,7 +1291,8 @@ void rte_mempool_audit(const struct rte_mempool *mp); */ static inline void *rte_mempool_get_priv(struct rte_mempool *mp) { - return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num); + return (char *)mp + + MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size); } /** -- 2.20.1