From 0a4864689329639e0bd1291ed7a95a8086bdd18e Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Mon, 16 Apr 2018 14:24:33 +0100 Subject: [PATCH] mempool: add op to calculate memory size to be allocated Size of memory chunk required to populate mempool objects depends on how objects are stored in the memory. Different mempool drivers may have different requirements and a new operation allows to calculate memory size in accordance with driver requirements and advertise requirements on minimum memory chunk size and alignment in a generic way. Bump ABI version since the patch breaks it. Suggested-by: Olivier Matz Signed-off-by: Andrew Rybchenko Acked-by: Olivier Matz Acked-by: Anatoly Burakov --- doc/guides/rel_notes/deprecation.rst | 3 +- doc/guides/rel_notes/release_18_05.rst | 7 +- lib/librte_mempool/Makefile | 3 +- lib/librte_mempool/meson.build | 5 +- lib/librte_mempool/rte_mempool.c | 114 +++++++++++-------- lib/librte_mempool/rte_mempool.h | 86 +++++++++++++- lib/librte_mempool/rte_mempool_ops.c | 18 +++ lib/librte_mempool/rte_mempool_ops_default.c | 38 +++++++ lib/librte_mempool/rte_mempool_version.map | 7 ++ 9 files changed, 224 insertions(+), 57 deletions(-) create mode 100644 lib/librte_mempool/rte_mempool_ops_default.c diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst index 1f814b4b1e..982112418f 100644 --- a/doc/guides/rel_notes/deprecation.rst +++ b/doc/guides/rel_notes/deprecation.rst @@ -60,8 +60,7 @@ Deprecation Notices - removal of ``get_capabilities`` mempool ops and related flags. - substitute ``register_memory_area`` with ``populate`` ops. - - addition of new ops to customize required memory chunk calculation, - customize objects population and allocate contiguous + - addition of new ops to customize objects population and allocate contiguous block of objects if underlying driver supports it. * mbuf: The opaque ``mbuf->hash.sched`` field will be updated to support generic diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst index 17ff4521a4..16417060c3 100644 --- a/doc/guides/rel_notes/release_18_05.rst +++ b/doc/guides/rel_notes/release_18_05.rst @@ -226,6 +226,11 @@ ABI Changes structure and the offset of the fields remains the same on platforms with 64B cache line, but change on other platforms. +* mempool: ops have changed. + + A new callback ``calc_mem_size`` has been added to ``rte_mempool_ops`` + to allow to customize required memory size calculation. + * **Additional fields in rte_eth_dev_info.** The ``rte_eth_dev_info`` structure has had two extra entries appended to the @@ -314,7 +319,7 @@ The libraries prepended with a plus sign were incremented in this version. librte_latencystats.so.1 librte_lpm.so.2 + librte_mbuf.so.4 - librte_mempool.so.3 + + librte_mempool.so.4 + librte_meter.so.2 librte_metrics.so.1 librte_net.so.1 diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile index 1f85d34002..421e2a7cf0 100644 --- a/lib/librte_mempool/Makefile +++ b/lib/librte_mempool/Makefile @@ -11,7 +11,7 @@ LDLIBS += -lrte_eal -lrte_ring EXPORT_MAP := rte_mempool_version.map -LIBABIVER := 3 +LIBABIVER := 4 # memseg walk is not yet part of stable API CFLAGS += -DALLOW_EXPERIMENTAL_API @@ -19,6 +19,7 @@ CFLAGS += -DALLOW_EXPERIMENTAL_API # all source are stored in SRCS-y SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool.c SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool_ops.c +SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool_ops_default.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_MEMPOOL)-include := rte_mempool.h diff --git a/lib/librte_mempool/meson.build b/lib/librte_mempool/meson.build index 89506c5268..6181ad8abd 100644 --- a/lib/librte_mempool/meson.build +++ b/lib/librte_mempool/meson.build @@ -1,8 +1,9 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation -version = 3 -sources = files('rte_mempool.c', 'rte_mempool_ops.c') +version = 4 +sources = files('rte_mempool.c', 'rte_mempool_ops.c', + 'rte_mempool_ops_default.c') headers = files('rte_mempool.h') deps += ['ring'] diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index b15b79bbb5..fdcee0569b 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -574,12 +574,12 @@ rte_mempool_populate_default(struct rte_mempool *mp) unsigned int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY; char mz_name[RTE_MEMZONE_NAMESIZE]; const struct rte_memzone *mz; - size_t size, total_elt_sz, align, pg_sz, pg_shift; + ssize_t mem_size; + size_t align, pg_sz, pg_shift; rte_iova_t iova; unsigned mz_id, n; - unsigned int mp_flags; int ret; - bool force_contig, no_contig, try_contig, no_pageshift; + bool no_contig, try_contig, no_pageshift; ret = mempool_ops_alloc_once(mp); if (ret != 0) @@ -589,22 +589,12 @@ rte_mempool_populate_default(struct rte_mempool *mp) if (mp->nb_mem_chunks != 0) return -EEXIST; - /* Get mempool capabilities */ - mp_flags = 0; - ret = rte_mempool_ops_get_capabilities(mp, &mp_flags); - if ((ret < 0) && (ret != -ENOTSUP)) - return ret; - - /* update mempool capabilities */ - mp->flags |= mp_flags; - no_contig = mp->flags & MEMPOOL_F_NO_IOVA_CONTIG; - force_contig = mp->flags & MEMPOOL_F_CAPA_PHYS_CONTIG; /* * the following section calculates page shift and page size values. * - * these values impact the result of rte_mempool_xmem_size(), which + * these values impact the result of calc_mem_size operation, which * returns the amount of memory that should be allocated to store the * desired number of objects. when not zero, it allocates more memory * for the padding between objects, to ensure that an object does not @@ -625,7 +615,7 @@ rte_mempool_populate_default(struct rte_mempool *mp) * * if our IO addresses are virtual, not actual physical (IOVA as VA * case), then no page shift needed - our memory allocation will give us - * contiguous physical memory as far as the hardware is concerned, so + * contiguous IO memory as far as the hardware is concerned, so * act as if we're getting contiguous memory. * * if our IO addresses are physical, we may get memory from bigger @@ -643,39 +633,35 @@ rte_mempool_populate_default(struct rte_mempool *mp) * 1G page on a 10MB memzone). If we fail to get enough contiguous * memory, then we'll go and reserve space page-by-page. */ - no_pageshift = no_contig || force_contig || - rte_eal_iova_mode() == RTE_IOVA_VA; + no_pageshift = no_contig || rte_eal_iova_mode() == RTE_IOVA_VA; try_contig = !no_contig && !no_pageshift && rte_eal_has_hugepages(); - if (force_contig) - mz_flags |= RTE_MEMZONE_IOVA_CONTIG; if (no_pageshift) { pg_sz = 0; pg_shift = 0; - align = RTE_CACHE_LINE_SIZE; } else if (try_contig) { pg_sz = get_min_page_size(); pg_shift = rte_bsf32(pg_sz); - /* we're trying to reserve contiguous memzone first, so try - * align to cache line; if we fail to reserve a contiguous - * memzone, we'll adjust alignment to equal pagesize later. - */ - align = RTE_CACHE_LINE_SIZE; } else { pg_sz = getpagesize(); pg_shift = rte_bsf32(pg_sz); - align = pg_sz; } - total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; for (mz_id = 0, n = mp->size; n > 0; mz_id++, n -= ret) { + size_t min_chunk_size; unsigned int flags; + if (try_contig || no_pageshift) - size = rte_mempool_xmem_size(n, total_elt_sz, 0, - mp->flags); + mem_size = rte_mempool_ops_calc_mem_size(mp, n, + 0, &min_chunk_size, &align); else - size = rte_mempool_xmem_size(n, total_elt_sz, pg_shift, - mp->flags); + mem_size = rte_mempool_ops_calc_mem_size(mp, n, + pg_shift, &min_chunk_size, &align); + + if (mem_size < 0) { + ret = mem_size; + goto fail; + } ret = snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT "_%d", mp->name, mz_id); @@ -692,27 +678,31 @@ rte_mempool_populate_default(struct rte_mempool *mp) if (try_contig) flags |= RTE_MEMZONE_IOVA_CONTIG; - mz = rte_memzone_reserve_aligned(mz_name, size, mp->socket_id, - flags, align); + mz = rte_memzone_reserve_aligned(mz_name, mem_size, + mp->socket_id, flags, align); - /* if we were trying to allocate contiguous memory, adjust - * memzone size and page size to fit smaller page sizes, and - * try again. + /* if we were trying to allocate contiguous memory, failed and + * minimum required contiguous chunk fits minimum page, adjust + * memzone size to the page size, and try again. */ - if (mz == NULL && try_contig) { + if (mz == NULL && try_contig && min_chunk_size <= pg_sz) { try_contig = false; flags &= ~RTE_MEMZONE_IOVA_CONTIG; - align = pg_sz; - size = rte_mempool_xmem_size(n, total_elt_sz, - pg_shift, mp->flags); - mz = rte_memzone_reserve_aligned(mz_name, size, + mem_size = rte_mempool_ops_calc_mem_size(mp, n, + pg_shift, &min_chunk_size, &align); + if (mem_size < 0) { + ret = mem_size; + goto fail; + } + + mz = rte_memzone_reserve_aligned(mz_name, mem_size, mp->socket_id, flags, align); } /* don't try reserving with 0 size if we were asked to reserve * IOVA-contiguous memory. */ - if (!force_contig && mz == NULL) { + if (min_chunk_size < (size_t)mem_size && mz == NULL) { /* not enough memory, retry with the biggest zone we * have */ @@ -724,6 +714,12 @@ rte_mempool_populate_default(struct rte_mempool *mp) goto fail; } + if (mz->len < min_chunk_size) { + rte_memzone_free(mz); + ret = -ENOMEM; + goto fail; + } + if (no_contig) iova = RTE_BAD_IOVA; else @@ -753,16 +749,18 @@ rte_mempool_populate_default(struct rte_mempool *mp) } /* return the memory size required for mempool objects in anonymous mem */ -static size_t +static ssize_t get_anon_size(const struct rte_mempool *mp) { - size_t size, total_elt_sz, pg_sz, pg_shift; + ssize_t size; + size_t pg_sz, pg_shift; + size_t min_chunk_size; + size_t align; pg_sz = getpagesize(); pg_shift = rte_bsf32(pg_sz); - total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; - size = rte_mempool_xmem_size(mp->size, total_elt_sz, pg_shift, - mp->flags); + size = rte_mempool_ops_calc_mem_size(mp, mp->size, pg_shift, + &min_chunk_size, &align); return size; } @@ -772,14 +770,25 @@ static void rte_mempool_memchunk_anon_free(struct rte_mempool_memhdr *memhdr, void *opaque) { - munmap(opaque, get_anon_size(memhdr->mp)); + ssize_t size; + + /* + * Calculate size since memhdr->len has contiguous chunk length + * which may be smaller if anon map is split into many contiguous + * chunks. Result must be the same as we calculated on populate. + */ + size = get_anon_size(memhdr->mp); + if (size < 0) + return; + + munmap(opaque, size); } /* populate the mempool with an anonymous mapping */ int rte_mempool_populate_anon(struct rte_mempool *mp) { - size_t size; + ssize_t size; int ret; char *addr; @@ -793,8 +802,13 @@ rte_mempool_populate_anon(struct rte_mempool *mp) if (ret != 0) return ret; - /* get chunk of virtually continuous memory */ size = get_anon_size(mp); + if (size < 0) { + rte_errno = -size; + return 0; + } + + /* get chunk of virtually continuous memory */ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (addr == MAP_FAILED) { diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h index e531a15398..191255d1c9 100644 --- a/lib/librte_mempool/rte_mempool.h +++ b/lib/librte_mempool/rte_mempool.h @@ -400,6 +400,62 @@ typedef int (*rte_mempool_get_capabilities_t)(const struct rte_mempool *mp, typedef int (*rte_mempool_ops_register_memory_area_t) (const struct rte_mempool *mp, char *vaddr, rte_iova_t iova, size_t len); +/** + * Calculate memory size required to store given number of objects. + * + * If mempool objects are not required to be IOVA-contiguous + * (the flag MEMPOOL_F_NO_IOVA_CONTIG is set), min_chunk_size defines + * virtually contiguous chunk size. Otherwise, if mempool objects must + * be IOVA-contiguous (the flag MEMPOOL_F_NO_IOVA_CONTIG is clear), + * min_chunk_size defines IOVA-contiguous chunk size. + * + * @param[in] mp + * Pointer to the memory pool. + * @param[in] obj_num + * Number of objects. + * @param[in] pg_shift + * LOG2 of the physical pages size. If set to 0, ignore page boundaries. + * @param[out] min_chunk_size + * Location for minimum size of the memory chunk which may be used to + * store memory pool objects. + * @param[out] align + * Location for required memory chunk alignment. + * @return + * Required memory size aligned at page boundary. + */ +typedef ssize_t (*rte_mempool_calc_mem_size_t)(const struct rte_mempool *mp, + uint32_t obj_num, uint32_t pg_shift, + size_t *min_chunk_size, size_t *align); + +/** + * Default way to calculate memory size required to store given number of + * objects. + * + * If page boundaries may be ignored, it is just a product of total + * object size including header and trailer and number of objects. + * Otherwise, it is a number of pages required to store given number of + * objects without crossing page boundary. + * + * Note that if object size is bigger than page size, then it assumes + * that pages are grouped in subsets of physically continuous pages big + * enough to store at least one object. + * + * If mempool driver requires object addresses to be block size aligned + * (MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS), space for one extra element is + * reserved to be able to meet the requirement. + * + * Minimum size of memory chunk is either all required space, if + * capabilities say that whole memory area must be physically contiguous + * (MEMPOOL_F_CAPA_PHYS_CONTIG), or a maximum of the page size and total + * element size. + * + * Required memory chunk alignment is a maximum of page size and cache + * line size. + */ +ssize_t rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp, + uint32_t obj_num, uint32_t pg_shift, + size_t *min_chunk_size, size_t *align); + /** Structure defining mempool operations structure */ struct rte_mempool_ops { char name[RTE_MEMPOOL_OPS_NAMESIZE]; /**< Name of mempool ops struct. */ @@ -416,6 +472,11 @@ struct rte_mempool_ops { * Notify new memory area to mempool */ rte_mempool_ops_register_memory_area_t register_memory_area; + /** + * Optional callback to calculate memory size required to + * store specified number of objects. + */ + rte_mempool_calc_mem_size_t calc_mem_size; } __rte_cache_aligned; #define RTE_MEMPOOL_MAX_OPS_IDX 16 /**< Max registered ops structs */ @@ -564,6 +625,29 @@ int rte_mempool_ops_register_memory_area(const struct rte_mempool *mp, char *vaddr, rte_iova_t iova, size_t len); +/** + * @internal wrapper for mempool_ops calc_mem_size callback. + * API to calculate size of memory required to store specified number of + * object. + * + * @param[in] mp + * Pointer to the memory pool. + * @param[in] obj_num + * Number of objects. + * @param[in] pg_shift + * LOG2 of the physical pages size. If set to 0, ignore page boundaries. + * @param[out] min_chunk_size + * Location for minimum size of the memory chunk which may be used to + * store memory pool objects. + * @param[out] align + * Location for required memory chunk alignment. + * @return + * Required memory size aligned at page boundary. + */ +ssize_t rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp, + uint32_t obj_num, uint32_t pg_shift, + size_t *min_chunk_size, size_t *align); + /** * @internal wrapper for mempool_ops free callback. * @@ -1534,7 +1618,7 @@ uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags, * of objects. Assume that the memory buffer will be aligned at page * boundary. * - * Note that if object size is bigger then page size, then it assumes + * Note that if object size is bigger than page size, then it assumes * that pages are grouped in subsets of physically continuous pages big * enough to store at least one object. * diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c index 0732255c02..26908cc316 100644 --- a/lib/librte_mempool/rte_mempool_ops.c +++ b/lib/librte_mempool/rte_mempool_ops.c @@ -59,6 +59,7 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h) ops->get_count = h->get_count; ops->get_capabilities = h->get_capabilities; ops->register_memory_area = h->register_memory_area; + ops->calc_mem_size = h->calc_mem_size; rte_spinlock_unlock(&rte_mempool_ops_table.sl); @@ -123,6 +124,23 @@ rte_mempool_ops_register_memory_area(const struct rte_mempool *mp, char *vaddr, return ops->register_memory_area(mp, vaddr, iova, len); } +/* wrapper to notify new memory area to external mempool */ +ssize_t +rte_mempool_ops_calc_mem_size(const struct rte_mempool *mp, + uint32_t obj_num, uint32_t pg_shift, + size_t *min_chunk_size, size_t *align) +{ + struct rte_mempool_ops *ops; + + ops = rte_mempool_get_ops(mp->ops_index); + + if (ops->calc_mem_size == NULL) + return rte_mempool_op_calc_mem_size_default(mp, obj_num, + pg_shift, min_chunk_size, align); + + return ops->calc_mem_size(mp, obj_num, pg_shift, min_chunk_size, align); +} + /* sets mempool ops previously registered by rte_mempool_register_ops. */ int rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name, diff --git a/lib/librte_mempool/rte_mempool_ops_default.c b/lib/librte_mempool/rte_mempool_ops_default.c new file mode 100644 index 0000000000..57fe79b062 --- /dev/null +++ b/lib/librte_mempool/rte_mempool_ops_default.c @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 6WIND S.A. + * Copyright(c) 2018 Solarflare Communications Inc. + */ + +#include + +ssize_t +rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp, + uint32_t obj_num, uint32_t pg_shift, + size_t *min_chunk_size, size_t *align) +{ + unsigned int mp_flags; + int ret; + size_t total_elt_sz; + size_t mem_size; + + /* Get mempool capabilities */ + mp_flags = 0; + ret = rte_mempool_ops_get_capabilities(mp, &mp_flags); + if ((ret < 0) && (ret != -ENOTSUP)) + return ret; + + total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; + + mem_size = rte_mempool_xmem_size(obj_num, total_elt_sz, pg_shift, + mp->flags | mp_flags); + + if (mp_flags & MEMPOOL_F_CAPA_PHYS_CONTIG) + *min_chunk_size = mem_size; + else + *min_chunk_size = RTE_MAX((size_t)1 << pg_shift, total_elt_sz); + + *align = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, (size_t)1 << pg_shift); + + return mem_size; +} diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map index 62b76f9129..cb381898da 100644 --- a/lib/librte_mempool/rte_mempool_version.map +++ b/lib/librte_mempool/rte_mempool_version.map @@ -51,3 +51,10 @@ DPDK_17.11 { rte_mempool_populate_iova_tab; } DPDK_16.07; + +DPDK_18.05 { + global: + + rte_mempool_op_calc_mem_size_default; + +} DPDK_17.11; -- 2.20.1