#include <rte_errno.h>
#include <rte_string_fns.h>
#include <rte_spinlock.h>
+#include <rte_tailq.h>
#include "rte_mempool.h"
return new_obj_size * RTE_MEMPOOL_ALIGN;
}
+struct pagesz_walk_arg {
+ int socket_id;
+ size_t min;
+};
+
static int
find_min_pagesz(const struct rte_memseg_list *msl, void *arg)
{
- size_t *min = arg;
+ struct pagesz_walk_arg *wa = arg;
+ bool valid;
- if (msl->page_sz < *min)
- *min = msl->page_sz;
+ /*
+ * we need to only look at page sizes available for a particular socket
+ * ID. so, we either need an exact match on socket ID (can match both
+ * native and external memory), or, if SOCKET_ID_ANY was specified as a
+ * socket ID argument, we must only look at native memory and ignore any
+ * page sizes associated with external memory.
+ */
+ valid = msl->socket_id == wa->socket_id;
+ valid |= wa->socket_id == SOCKET_ID_ANY && msl->external == 0;
+
+ if (valid && msl->page_sz < wa->min)
+ wa->min = msl->page_sz;
return 0;
}
static size_t
-get_min_page_size(void)
+get_min_page_size(int socket_id)
{
- size_t min_pagesz = SIZE_MAX;
+ struct pagesz_walk_arg wa;
+
+ wa.min = SIZE_MAX;
+ wa.socket_id = socket_id;
- rte_memseg_list_walk(find_min_pagesz, &min_pagesz);
+ rte_memseg_list_walk(find_min_pagesz, &wa);
- return min_pagesz == SIZE_MAX ? (size_t) getpagesize() : min_pagesz;
+ return wa.min == SIZE_MAX ? (size_t) getpagesize() : wa.min;
}
return sz->total_size;
}
-
-/*
- * Internal function to calculate required memory chunk size.
- */
-size_t
-rte_mempool_calc_mem_size_helper(uint32_t elt_num, size_t total_elt_sz,
- uint32_t pg_shift)
-{
- size_t obj_per_page, pg_num, pg_sz;
-
- if (total_elt_sz == 0)
- return 0;
-
- if (pg_shift == 0)
- return total_elt_sz * elt_num;
-
- pg_sz = (size_t)1 << pg_shift;
- obj_per_page = pg_sz / total_elt_sz;
- if (obj_per_page == 0)
- return RTE_ALIGN_CEIL(total_elt_sz, pg_sz) * elt_num;
-
- pg_num = (elt_num + obj_per_page - 1) / obj_per_page;
- return pg_num << pg_shift;
-}
-
/* free a memchunk allocated with rte_memzone_reserve() */
static void
rte_mempool_memchunk_mz_free(__rte_unused struct rte_mempool_memhdr *memhdr,
if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)
off = RTE_PTR_ALIGN_CEIL(vaddr, 8) - vaddr;
else
- off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_CACHE_LINE_SIZE) - vaddr;
+ off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_MEMPOOL_ALIGN) - vaddr;
if (off > len) {
ret = -EINVAL;
return ret;
}
+static rte_iova_t
+get_iova(void *addr)
+{
+ struct rte_memseg *ms;
+
+ /* try registered memory first */
+ ms = rte_mem_virt2memseg(addr, NULL);
+ if (ms == NULL || ms->iova == RTE_BAD_IOVA)
+ /* fall back to actual physical address */
+ return rte_mem_virt2iova(addr);
+ return ms->iova + RTE_PTR_DIFF(addr, ms->addr);
+}
+
/* Populate the mempool with a virtual area. Return the number of
* objects added, or a negative value on error.
*/
size_t off, phys_len;
int ret, cnt = 0;
- /* address and len must be page-aligned */
- if (RTE_PTR_ALIGN_CEIL(addr, pg_sz) != addr)
- return -EINVAL;
- if (RTE_ALIGN_CEIL(len, pg_sz) != len)
- return -EINVAL;
-
if (mp->flags & MEMPOOL_F_NO_IOVA_CONTIG)
return rte_mempool_populate_iova(mp, addr, RTE_BAD_IOVA,
len, free_cb, opaque);
- for (off = 0; off + pg_sz <= len &&
+ for (off = 0; off < len &&
mp->populated_size < mp->size; off += phys_len) {
- iova = rte_mem_virt2iova(addr + off);
-
- if (iova == RTE_BAD_IOVA && rte_eal_has_hugepages()) {
- ret = -EINVAL;
- goto fail;
- }
+ iova = get_iova(addr + off);
/* populate with the largest group of contiguous pages */
- for (phys_len = pg_sz; off + phys_len < len; phys_len += pg_sz) {
+ for (phys_len = RTE_MIN(
+ (size_t)(RTE_PTR_ALIGN_CEIL(addr + off + 1, pg_sz) -
+ (addr + off)),
+ len - off);
+ off + phys_len < len;
+ phys_len = RTE_MIN(phys_len + pg_sz, len - off)) {
rte_iova_t iova_tmp;
- iova_tmp = rte_mem_virt2iova(addr + off + phys_len);
+ iova_tmp = get_iova(addr + off + phys_len);
- if (iova_tmp != iova + phys_len)
+ if (iova_tmp == RTE_BAD_IOVA ||
+ iova_tmp != iova + phys_len)
break;
}
return ret;
}
+/* Get the minimal page size used in a mempool before populating it. */
+int
+rte_mempool_get_page_size(struct rte_mempool *mp, size_t *pg_sz)
+{
+ bool need_iova_contig_obj;
+ bool alloc_in_ext_mem;
+ int ret;
+
+ /* check if we can retrieve a valid socket ID */
+ ret = rte_malloc_heap_socket_is_external(mp->socket_id);
+ if (ret < 0)
+ return -EINVAL;
+ alloc_in_ext_mem = (ret == 1);
+ need_iova_contig_obj = !(mp->flags & MEMPOOL_F_NO_IOVA_CONTIG);
+
+ if (!need_iova_contig_obj)
+ *pg_sz = 0;
+ else if (rte_eal_has_hugepages() || alloc_in_ext_mem)
+ *pg_sz = get_min_page_size(mp->socket_id);
+ else
+ *pg_sz = getpagesize();
+
+ return 0;
+}
+
/* Default function to populate the mempool: allocate memory in memzones,
* and populate them. Return the number of objects added, or a negative
* value on error.
char mz_name[RTE_MEMZONE_NAMESIZE];
const struct rte_memzone *mz;
ssize_t mem_size;
- size_t align, pg_sz, pg_shift;
+ size_t align, pg_sz, pg_shift = 0;
rte_iova_t iova;
unsigned mz_id, n;
int ret;
- bool no_contig, try_contig, no_pageshift;
+ bool need_iova_contig_obj;
ret = mempool_ops_alloc_once(mp);
if (ret != 0)
if (mp->nb_mem_chunks != 0)
return -EEXIST;
- no_contig = mp->flags & MEMPOOL_F_NO_IOVA_CONTIG;
-
/*
* the following section calculates page shift and page size values.
*
* then just set page shift and page size to 0, because the user has
* indicated that there's no need to care about anything.
*
- * if we do need contiguous objects, there is also an option to reserve
- * the entire mempool memory as one contiguous block of memory, in
- * which case the page shift and alignment wouldn't matter as well.
+ * if we do need contiguous objects (if a mempool driver has its
+ * own calc_size() method returning min_chunk_size = mem_size),
+ * there is also an option to reserve the entire mempool memory
+ * as one contiguous block of memory.
*
* if we require contiguous objects, but not necessarily the entire
- * mempool reserved space to be contiguous, then there are two options.
- *
- * if our IO addresses are virtual, not actual physical (IOVA as VA
- * case), then no page shift needed - our memory allocation will give us
- * contiguous IO memory as far as the hardware is concerned, so
- * act as if we're getting contiguous memory.
+ * mempool reserved space to be contiguous, pg_sz will be != 0,
+ * and the default ops->populate() will take care of not placing
+ * objects across pages.
*
* if our IO addresses are physical, we may get memory from bigger
* pages, or we might get memory from smaller pages, and how much of it
* wasting some space this way, but it's much nicer than looping around
* trying to reserve each and every page size.
*
- * However, since size calculation will produce page-aligned sizes, it
- * makes sense to first try and see if we can reserve the entire memzone
- * in one contiguous chunk as well (otherwise we might end up wasting a
- * 1G page on a 10MB memzone). If we fail to get enough contiguous
- * memory, then we'll go and reserve space page-by-page.
+ * If we fail to get enough contiguous memory, then we'll go and
+ * reserve space in smaller chunks.
*/
- no_pageshift = no_contig || rte_eal_iova_mode() == RTE_IOVA_VA;
- try_contig = !no_contig && !no_pageshift && rte_eal_has_hugepages();
-
- if (no_pageshift) {
- pg_sz = 0;
- pg_shift = 0;
- } else if (try_contig) {
- pg_sz = get_min_page_size();
- pg_shift = rte_bsf32(pg_sz);
- } else {
- pg_sz = getpagesize();
+
+ need_iova_contig_obj = !(mp->flags & MEMPOOL_F_NO_IOVA_CONTIG);
+ ret = rte_mempool_get_page_size(mp, &pg_sz);
+ if (ret < 0)
+ return ret;
+
+ if (pg_sz != 0)
pg_shift = rte_bsf32(pg_sz);
- }
for (mz_id = 0, n = mp->size; n > 0; mz_id++, n -= ret) {
size_t min_chunk_size;
- unsigned int flags;
- if (try_contig || no_pageshift)
- mem_size = rte_mempool_ops_calc_mem_size(mp, n,
- 0, &min_chunk_size, &align);
- else
- mem_size = rte_mempool_ops_calc_mem_size(mp, n,
- pg_shift, &min_chunk_size, &align);
+ mem_size = rte_mempool_ops_calc_mem_size(
+ mp, n, pg_shift, &min_chunk_size, &align);
if (mem_size < 0) {
ret = mem_size;
goto fail;
}
- flags = mz_flags;
-
/* if we're trying to reserve contiguous memory, add appropriate
* memzone flag.
*/
- if (try_contig)
- flags |= RTE_MEMZONE_IOVA_CONTIG;
+ if (min_chunk_size == (size_t)mem_size)
+ mz_flags |= RTE_MEMZONE_IOVA_CONTIG;
mz = rte_memzone_reserve_aligned(mz_name, mem_size,
- mp->socket_id, flags, align);
+ mp->socket_id, mz_flags, align);
- /* if we were trying to allocate contiguous memory, failed and
- * minimum required contiguous chunk fits minimum page, adjust
- * memzone size to the page size, and try again.
- */
- if (mz == NULL && try_contig && min_chunk_size <= pg_sz) {
- try_contig = false;
- flags &= ~RTE_MEMZONE_IOVA_CONTIG;
-
- mem_size = rte_mempool_ops_calc_mem_size(mp, n,
- pg_shift, &min_chunk_size, &align);
- if (mem_size < 0) {
- ret = mem_size;
- goto fail;
- }
-
- mz = rte_memzone_reserve_aligned(mz_name, mem_size,
- mp->socket_id, flags, align);
- }
/* don't try reserving with 0 size if we were asked to reserve
* IOVA-contiguous memory.
*/
* have
*/
mz = rte_memzone_reserve_aligned(mz_name, 0,
- mp->socket_id, flags,
- RTE_MAX(pg_sz, align));
+ mp->socket_id, mz_flags, align);
}
if (mz == NULL) {
ret = -rte_errno;
goto fail;
}
- if (no_contig)
- iova = RTE_BAD_IOVA;
- else
+ if (need_iova_contig_obj)
iova = mz->iova;
+ else
+ iova = RTE_BAD_IOVA;
- if (no_pageshift || try_contig)
+ if (pg_sz == 0 || (mz_flags & RTE_MEMZONE_IOVA_CONTIG))
ret = rte_mempool_populate_iova(mp, mz->addr,
iova, mz->len,
rte_mempool_memchunk_mz_free,
(void *)(uintptr_t)mz);
else
ret = rte_mempool_populate_virt(mp, mz->addr,
- RTE_ALIGN_FLOOR(mz->len, pg_sz), pg_sz,
+ mz->len, pg_sz,
rte_mempool_memchunk_mz_free,
(void *)(uintptr_t)mz);
if (ret < 0) {
return;
mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
- rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+ rte_mcfg_tailq_write_lock();
/* find out tailq entry */
TAILQ_FOREACH(te, mempool_list, next) {
if (te->data == (void *)mp)
TAILQ_REMOVE(mempool_list, te, next);
rte_free(te);
}
- rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+ rte_mcfg_tailq_write_unlock();
rte_mempool_free_memchunks(mp);
rte_mempool_ops_free(mp);
return NULL;
}
- rte_rwlock_write_lock(RTE_EAL_MEMPOOL_RWLOCK);
+ rte_mcfg_mempool_write_lock();
/*
* reserve a memory zone for this mempool: private data is
/* init the mempool structure */
mp = mz->addr;
memset(mp, 0, MEMPOOL_HEADER_SIZE(mp, cache_size));
- ret = snprintf(mp->name, sizeof(mp->name), "%s", name);
+ ret = strlcpy(mp->name, name, sizeof(mp->name));
if (ret < 0 || ret >= (int)sizeof(mp->name)) {
rte_errno = ENAMETOOLONG;
goto exit_unlock;
te->data = mp;
- rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+ rte_mcfg_tailq_write_lock();
TAILQ_INSERT_TAIL(mempool_list, te, next);
- rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
- rte_rwlock_write_unlock(RTE_EAL_MEMPOOL_RWLOCK);
+ rte_mcfg_tailq_write_unlock();
+ rte_mcfg_mempool_write_unlock();
return mp;
exit_unlock:
- rte_rwlock_write_unlock(RTE_EAL_MEMPOOL_RWLOCK);
+ rte_mcfg_mempool_write_unlock();
rte_free(te);
rte_mempool_free(mp);
return NULL;
mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
- rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK);
+ rte_mcfg_mempool_read_lock();
TAILQ_FOREACH(te, mempool_list, next) {
mp = (struct rte_mempool *) te->data;
rte_mempool_dump(f, mp);
}
- rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK);
+ rte_mcfg_mempool_read_unlock();
}
/* search a mempool from its name */
mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
- rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK);
+ rte_mcfg_mempool_read_lock();
TAILQ_FOREACH(te, mempool_list, next) {
mp = (struct rte_mempool *) te->data;
break;
}
- rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK);
+ rte_mcfg_mempool_read_unlock();
if (te == NULL) {
rte_errno = ENOENT;
mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
- rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK);
+ rte_mcfg_mempool_read_lock();
TAILQ_FOREACH_SAFE(te, mempool_list, next, tmp_te) {
(*func)((struct rte_mempool *) te->data, arg);
}
- rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK);
+ rte_mcfg_mempool_read_unlock();
}