From: Olivier Matz Date: Tue, 5 Nov 2019 15:37:05 +0000 (+0100) Subject: mempool: prevent objects from being across pages X-Git-Url: http://git.droids-corp.org/?p=dpdk.git;a=commitdiff_plain;h=84626a0d61a624dad11614946accc8eebd52353b mempool: prevent objects from being across pages When populating a mempool, ensure that objects are not located across several pages, except if user did not request IOVA-contiguous objects. Signed-off-by: Vamsi Attunuru Signed-off-by: Olivier Matz Acked-by: Nipun Gupta Acked-by: Andrew Rybchenko --- diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst index a009bc0689..810548f1de 100644 --- a/doc/guides/rel_notes/release_19_11.rst +++ b/doc/guides/rel_notes/release_19_11.rst @@ -65,6 +65,11 @@ New Features The lock-free stack implementation is enabled for aarch64 platforms. +* **Changed mempool allocation behaviour.** + + Objects are no longer across pages by default. + It may consume more memory when using small memory pages. + * **Added support of dynamic fields and flags in mbuf.** This new feature adds the ability to dynamically register some room diff --git a/drivers/mempool/bucket/rte_mempool_bucket.c b/drivers/mempool/bucket/rte_mempool_bucket.c index dfeaf4e454..5ce1ef16fb 100644 --- a/drivers/mempool/bucket/rte_mempool_bucket.c +++ b/drivers/mempool/bucket/rte_mempool_bucket.c @@ -401,6 +401,11 @@ bucket_alloc(struct rte_mempool *mp) struct bucket_data *bd; unsigned int i; unsigned int bucket_header_size; + size_t pg_sz; + + rc = rte_mempool_get_page_size(mp, &pg_sz); + if (rc < 0) + return rc; bd = rte_zmalloc_socket("bucket_pool", sizeof(*bd), RTE_CACHE_LINE_SIZE, mp->socket_id); @@ -416,7 +421,8 @@ bucket_alloc(struct rte_mempool *mp) RTE_BUILD_BUG_ON(sizeof(struct bucket_header) > RTE_CACHE_LINE_SIZE); bd->header_size = mp->header_size + bucket_header_size; bd->total_elt_size = mp->header_size + mp->elt_size + mp->trailer_size; - bd->bucket_mem_size = RTE_DRIVER_MEMPOOL_BUCKET_SIZE_KB * 1024; + bd->bucket_mem_size = RTE_MIN(pg_sz, + (size_t)(RTE_DRIVER_MEMPOOL_BUCKET_SIZE_KB * 1024)); bd->obj_per_bucket = (bd->bucket_mem_size - bucket_header_size) / bd->total_elt_size; bd->bucket_page_mask = ~(rte_align64pow2(bd->bucket_mem_size) - 1); @@ -585,7 +591,7 @@ bucket_populate(struct rte_mempool *mp, unsigned int max_objs, hdr->fill_cnt = 0; hdr->lcore_id = LCORE_ID_ANY; - rc = rte_mempool_op_populate_helper(mp, + rc = rte_mempool_op_populate_helper(mp, 0, RTE_MIN(bd->obj_per_bucket, max_objs - n_objs), iter + bucket_header_sz, diff --git a/drivers/mempool/dpaa/dpaa_mempool.c b/drivers/mempool/dpaa/dpaa_mempool.c index 27736e6c22..3a2528331c 100644 --- a/drivers/mempool/dpaa/dpaa_mempool.c +++ b/drivers/mempool/dpaa/dpaa_mempool.c @@ -341,8 +341,8 @@ dpaa_populate(struct rte_mempool *mp, unsigned int max_objs, */ TAILQ_INSERT_HEAD(&rte_dpaa_memsegs, ms, next); - return rte_mempool_op_populate_helper(mp, max_objs, vaddr, paddr, len, - obj_cb, obj_cb_arg); + return rte_mempool_op_populate_helper(mp, 0, max_objs, vaddr, paddr, + len, obj_cb, obj_cb_arg); } static const struct rte_mempool_ops dpaa_mpool_ops = { diff --git a/drivers/mempool/dpaa2/dpaa2_hw_mempool.c b/drivers/mempool/dpaa2/dpaa2_hw_mempool.c index 2ecbb10e81..b5084c1cb4 100644 --- a/drivers/mempool/dpaa2/dpaa2_hw_mempool.c +++ b/drivers/mempool/dpaa2/dpaa2_hw_mempool.c @@ -432,8 +432,8 @@ dpaa2_populate(struct rte_mempool *mp, unsigned int max_objs, /* Insert entry into the PA->VA Table */ dpaax_iova_table_update(paddr, vaddr, len); - return rte_mempool_op_populate_helper(mp, max_objs, vaddr, paddr, len, - obj_cb, obj_cb_arg); + return rte_mempool_op_populate_helper(mp, 0, max_objs, vaddr, paddr, + len, obj_cb, obj_cb_arg); } static const struct rte_mempool_ops dpaa2_mpool_ops = { diff --git a/drivers/mempool/octeontx/rte_mempool_octeontx.c b/drivers/mempool/octeontx/rte_mempool_octeontx.c index fff33e5c62..bd00700202 100644 --- a/drivers/mempool/octeontx/rte_mempool_octeontx.c +++ b/drivers/mempool/octeontx/rte_mempool_octeontx.c @@ -132,14 +132,15 @@ octeontx_fpavf_calc_mem_size(const struct rte_mempool *mp, size_t *min_chunk_size, size_t *align) { ssize_t mem_size; + size_t total_elt_sz; - /* - * Simply need space for one more object to be able to - * fulfil alignment requirements. + /* Need space for one more obj on each chunk to fulfill + * alignment requirements. */ - mem_size = rte_mempool_op_calc_mem_size_helper(mp, obj_num + 1, - pg_shift, - min_chunk_size, align); + total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; + mem_size = rte_mempool_op_calc_mem_size_helper(mp, obj_num, pg_shift, + total_elt_sz, min_chunk_size, + align); if (mem_size >= 0) { /* * Memory area which contains objects must be physically @@ -168,7 +169,7 @@ octeontx_fpavf_populate(struct rte_mempool *mp, unsigned int max_objs, total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; /* align object start address to a multiple of total_elt_sz */ - off = total_elt_sz - ((uintptr_t)vaddr % total_elt_sz); + off = total_elt_sz - ((((uintptr_t)vaddr - 1) % total_elt_sz) + 1); if (len < off) return -EINVAL; @@ -184,8 +185,10 @@ octeontx_fpavf_populate(struct rte_mempool *mp, unsigned int max_objs, if (ret < 0) return ret; - return rte_mempool_op_populate_helper(mp, max_objs, vaddr, iova, len, - obj_cb, obj_cb_arg); + return rte_mempool_op_populate_helper(mp, + RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ, + max_objs, vaddr, iova, len, + obj_cb, obj_cb_arg); } static struct rte_mempool_ops octeontx_fpavf_ops = { diff --git a/drivers/mempool/octeontx2/otx2_mempool_ops.c b/drivers/mempool/octeontx2/otx2_mempool_ops.c index 3aea92a01b..ea4b1c45d2 100644 --- a/drivers/mempool/octeontx2/otx2_mempool_ops.c +++ b/drivers/mempool/octeontx2/otx2_mempool_ops.c @@ -713,12 +713,15 @@ static ssize_t otx2_npa_calc_mem_size(const struct rte_mempool *mp, uint32_t obj_num, uint32_t pg_shift, size_t *min_chunk_size, size_t *align) { - /* - * Simply need space for one more object to be able to - * fulfill alignment requirements. + size_t total_elt_sz; + + /* Need space for one more obj on each chunk to fulfill + * alignment requirements. */ - return rte_mempool_op_calc_mem_size_helper(mp, obj_num + 1, pg_shift, - min_chunk_size, align); + total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; + return rte_mempool_op_calc_mem_size_helper(mp, obj_num, pg_shift, + total_elt_sz, min_chunk_size, + align); } static int @@ -735,7 +738,7 @@ otx2_npa_populate(struct rte_mempool *mp, unsigned int max_objs, void *vaddr, total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; /* Align object start address to a multiple of total_elt_sz */ - off = total_elt_sz - ((uintptr_t)vaddr % total_elt_sz); + off = total_elt_sz - ((((uintptr_t)vaddr - 1) % total_elt_sz) + 1); if (len < off) return -EINVAL; @@ -749,8 +752,10 @@ otx2_npa_populate(struct rte_mempool *mp, unsigned int max_objs, void *vaddr, if (npa_lf_aura_range_update_check(mp->pool_id) < 0) return -EBUSY; - return rte_mempool_op_populate_helper(mp, max_objs, vaddr, iova, len, - obj_cb, obj_cb_arg); + return rte_mempool_op_populate_helper(mp, + RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ, + max_objs, vaddr, iova, len, + obj_cb, obj_cb_arg); } static struct rte_mempool_ops otx2_npa_ops = { diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index 758c5410b4..d3db9273db 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -431,8 +431,6 @@ rte_mempool_get_page_size(struct rte_mempool *mp, size_t *pg_sz) if (!need_iova_contig_obj) *pg_sz = 0; - else if (!alloc_in_ext_mem && rte_eal_iova_mode() == RTE_IOVA_VA) - *pg_sz = 0; else if (rte_eal_has_hugepages() || alloc_in_ext_mem) *pg_sz = get_min_page_size(mp->socket_id); else @@ -481,17 +479,15 @@ rte_mempool_populate_default(struct rte_mempool *mp) * then just set page shift and page size to 0, because the user has * indicated that there's no need to care about anything. * - * if we do need contiguous objects, there is also an option to reserve - * the entire mempool memory as one contiguous block of memory, in - * which case the page shift and alignment wouldn't matter as well. + * if we do need contiguous objects (if a mempool driver has its + * own calc_size() method returning min_chunk_size = mem_size), + * there is also an option to reserve the entire mempool memory + * as one contiguous block of memory. * * if we require contiguous objects, but not necessarily the entire - * mempool reserved space to be contiguous, then there are two options. - * - * if our IO addresses are virtual, not actual physical (IOVA as VA - * case), then no page shift needed - our memory allocation will give us - * contiguous IO memory as far as the hardware is concerned, so - * act as if we're getting contiguous memory. + * mempool reserved space to be contiguous, pg_sz will be != 0, + * and the default ops->populate() will take care of not placing + * objects across pages. * * if our IO addresses are physical, we may get memory from bigger * pages, or we might get memory from smaller pages, and how much of it @@ -504,11 +500,6 @@ rte_mempool_populate_default(struct rte_mempool *mp) * * If we fail to get enough contiguous memory, then we'll go and * reserve space in smaller chunks. - * - * We also have to take into account the fact that memory that we're - * going to allocate from can belong to an externally allocated memory - * area, in which case the assumption of IOVA as VA mode being - * synonymous with IOVA contiguousness will not hold. */ need_iova_contig_obj = !(mp->flags & MEMPOOL_F_NO_IOVA_CONTIG); diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h index ad7cc6ad2e..225bf9fc94 100644 --- a/lib/librte_mempool/rte_mempool.h +++ b/lib/librte_mempool/rte_mempool.h @@ -491,6 +491,9 @@ typedef ssize_t (*rte_mempool_calc_mem_size_t)(const struct rte_mempool *mp, * Number of objects to be added in mempool. * @param[in] pg_shift * LOG2 of the physical pages size. If set to 0, ignore page boundaries. + * @param[in] chunk_reserve + * Amount of memory that must be reserved at the beginning of each page, + * or at the beginning of the memory area if pg_shift is 0. * @param[out] min_chunk_size * Location for minimum size of the memory chunk which may be used to * store memory pool objects. @@ -501,7 +504,7 @@ typedef ssize_t (*rte_mempool_calc_mem_size_t)(const struct rte_mempool *mp, */ __rte_experimental ssize_t rte_mempool_op_calc_mem_size_helper(const struct rte_mempool *mp, - uint32_t obj_num, uint32_t pg_shift, + uint32_t obj_num, uint32_t pg_shift, size_t chunk_reserve, size_t *min_chunk_size, size_t *align); /** @@ -509,7 +512,7 @@ ssize_t rte_mempool_op_calc_mem_size_helper(const struct rte_mempool *mp, * objects. * * Equivalent to rte_mempool_op_calc_mem_size_helper(mp, obj_num, pg_shift, - * min_chunk_size, align). + * 0, min_chunk_size, align). */ ssize_t rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp, uint32_t obj_num, uint32_t pg_shift, @@ -563,17 +566,31 @@ typedef int (*rte_mempool_populate_t)(struct rte_mempool *mp, void *vaddr, rte_iova_t iova, size_t len, rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg); +/** + * Align objects on addresses multiple of total_elt_sz. + */ +#define RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ 0x0001 + /** * @warning * @b EXPERIMENTAL: this API may change without prior notice. * * @internal Helper to populate memory pool object using provided memory - * chunk: just slice objects one by one. + * chunk: just slice objects one by one, taking care of not + * crossing page boundaries. + * + * If RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ is set in flags, the addresses + * of object headers will be aligned on a multiple of total_elt_sz. + * This feature is used by octeontx hardware. * * This function is internal to mempool library and mempool drivers. * * @param[in] mp * A pointer to the mempool structure. + * @param[in] flags + * Logical OR of following flags: + * - RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ: align objects on addresses + * multiple of total_elt_sz. * @param[in] max_objs * Maximum number of objects to be added in mempool. * @param[in] vaddr @@ -591,14 +608,14 @@ typedef int (*rte_mempool_populate_t)(struct rte_mempool *mp, */ __rte_experimental int rte_mempool_op_populate_helper(struct rte_mempool *mp, - unsigned int max_objs, + unsigned int flags, unsigned int max_objs, void *vaddr, rte_iova_t iova, size_t len, rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg); /** * Default way to populate memory pool object using provided memory chunk. * - * Equivalent to rte_mempool_op_populate_helper(mp, max_objs, vaddr, iova, + * Equivalent to rte_mempool_op_populate_helper(mp, 0, max_objs, vaddr, iova, * len, obj_cb, obj_cb_arg). */ int rte_mempool_op_populate_default(struct rte_mempool *mp, diff --git a/lib/librte_mempool/rte_mempool_ops_default.c b/lib/librte_mempool/rte_mempool_ops_default.c index 0bfc63497b..e6be7152b6 100644 --- a/lib/librte_mempool/rte_mempool_ops_default.c +++ b/lib/librte_mempool/rte_mempool_ops_default.c @@ -9,6 +9,7 @@ ssize_t rte_mempool_op_calc_mem_size_helper(const struct rte_mempool *mp, uint32_t obj_num, uint32_t pg_shift, + size_t chunk_reserve, size_t *min_chunk_size, size_t *align) { size_t total_elt_sz; @@ -19,10 +20,12 @@ rte_mempool_op_calc_mem_size_helper(const struct rte_mempool *mp, if (total_elt_sz == 0) { mem_size = 0; } else if (pg_shift == 0) { - mem_size = total_elt_sz * obj_num; + mem_size = total_elt_sz * obj_num + chunk_reserve; } else { pg_sz = (size_t)1 << pg_shift; - obj_per_page = pg_sz / total_elt_sz; + if (chunk_reserve >= pg_sz) + return -EINVAL; + obj_per_page = (pg_sz - chunk_reserve) / total_elt_sz; if (obj_per_page == 0) { /* * Note that if object size is bigger than page size, @@ -30,8 +33,8 @@ rte_mempool_op_calc_mem_size_helper(const struct rte_mempool *mp, * of physically continuous pages big enough to store * at least one object. */ - mem_size = - RTE_ALIGN_CEIL(total_elt_sz, pg_sz) * obj_num; + mem_size = RTE_ALIGN_CEIL(total_elt_sz + chunk_reserve, + pg_sz) * obj_num; } else { /* In the best case, the allocator will return a * page-aligned address. For example, with 5 objs, @@ -42,7 +45,8 @@ rte_mempool_op_calc_mem_size_helper(const struct rte_mempool *mp, */ objs_in_last_page = ((obj_num - 1) % obj_per_page) + 1; /* room required for the last page */ - mem_size = objs_in_last_page * total_elt_sz; + mem_size = objs_in_last_page * total_elt_sz + + chunk_reserve; /* room required for other pages */ mem_size += ((obj_num - objs_in_last_page) / obj_per_page) << pg_shift; @@ -67,24 +71,60 @@ rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp, size_t *min_chunk_size, size_t *align) { return rte_mempool_op_calc_mem_size_helper(mp, obj_num, pg_shift, - min_chunk_size, align); + 0, min_chunk_size, align); +} + +/* Returns -1 if object crosses a page boundary, else returns 0 */ +static int +check_obj_bounds(char *obj, size_t pg_sz, size_t elt_sz) +{ + if (pg_sz == 0) + return 0; + if (elt_sz > pg_sz) + return 0; + if (RTE_PTR_ALIGN(obj, pg_sz) != RTE_PTR_ALIGN(obj + elt_sz - 1, pg_sz)) + return -1; + return 0; } int -rte_mempool_op_populate_helper(struct rte_mempool *mp, unsigned int max_objs, - void *vaddr, rte_iova_t iova, size_t len, - rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg) +rte_mempool_op_populate_helper(struct rte_mempool *mp, unsigned int flags, + unsigned int max_objs, void *vaddr, rte_iova_t iova, + size_t len, rte_mempool_populate_obj_cb_t *obj_cb, + void *obj_cb_arg) { - size_t total_elt_sz; + char *va = vaddr; + size_t total_elt_sz, pg_sz; size_t off; unsigned int i; void *obj; + int ret; + + ret = rte_mempool_get_page_size(mp, &pg_sz); + if (ret < 0) + return ret; total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; - for (off = 0, i = 0; off + total_elt_sz <= len && i < max_objs; i++) { + if (flags & RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ) + off = total_elt_sz - (((uintptr_t)(va - 1) % total_elt_sz) + 1); + else + off = 0; + for (i = 0; i < max_objs; i++) { + /* avoid objects to cross page boundaries */ + if (check_obj_bounds(va + off, pg_sz, total_elt_sz) < 0) { + off += RTE_PTR_ALIGN_CEIL(va + off, pg_sz) - (va + off); + if (flags & RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ) + off += total_elt_sz - + (((uintptr_t)(va + off - 1) % + total_elt_sz) + 1); + } + + if (off + total_elt_sz > len) + break; + off += mp->header_size; - obj = (char *)vaddr + off; + obj = va + off; obj_cb(mp, obj_cb_arg, obj, (iova == RTE_BAD_IOVA) ? RTE_BAD_IOVA : (iova + off)); rte_mempool_ops_enqueue_bulk(mp, &obj, 1); @@ -100,6 +140,6 @@ rte_mempool_op_populate_default(struct rte_mempool *mp, unsigned int max_objs, rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg) { - return rte_mempool_op_populate_helper(mp, max_objs, vaddr, iova, + return rte_mempool_op_populate_helper(mp, 0, max_objs, vaddr, iova, len, obj_cb, obj_cb_arg); }