+ unsigned int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
+ char mz_name[RTE_MEMZONE_NAMESIZE];
+ const struct rte_memzone *mz;
+ ssize_t mem_size;
+ size_t align, pg_sz, pg_shift;
+ rte_iova_t iova;
+ unsigned mz_id, n;
+ int ret;
+ bool no_contig, try_contig, no_pageshift, external;
+
+ ret = mempool_ops_alloc_once(mp);
+ if (ret != 0)
+ return ret;
+
+ /* check if we can retrieve a valid socket ID */
+ ret = rte_malloc_heap_socket_is_external(mp->socket_id);
+ if (ret < 0)
+ return -EINVAL;
+ external = ret;
+
+ /* mempool must not be populated */
+ if (mp->nb_mem_chunks != 0)
+ return -EEXIST;
+
+ no_contig = mp->flags & MEMPOOL_F_NO_IOVA_CONTIG;
+
+ /*
+ * the following section calculates page shift and page size values.
+ *
+ * these values impact the result of calc_mem_size operation, which
+ * returns the amount of memory that should be allocated to store the
+ * desired number of objects. when not zero, it allocates more memory
+ * for the padding between objects, to ensure that an object does not
+ * cross a page boundary. in other words, page size/shift are to be set
+ * to zero if mempool elements won't care about page boundaries.
+ * there are several considerations for page size and page shift here.
+ *
+ * if we don't need our mempools to have physically contiguous objects,
+ * then just set page shift and page size to 0, because the user has
+ * indicated that there's no need to care about anything.
+ *
+ * if we do need contiguous objects, there is also an option to reserve
+ * the entire mempool memory as one contiguous block of memory, in
+ * which case the page shift and alignment wouldn't matter as well.
+ *
+ * if we require contiguous objects, but not necessarily the entire
+ * mempool reserved space to be contiguous, then there are two options.
+ *
+ * if our IO addresses are virtual, not actual physical (IOVA as VA
+ * case), then no page shift needed - our memory allocation will give us
+ * contiguous IO memory as far as the hardware is concerned, so
+ * act as if we're getting contiguous memory.
+ *
+ * if our IO addresses are physical, we may get memory from bigger
+ * pages, or we might get memory from smaller pages, and how much of it
+ * we require depends on whether we want bigger or smaller pages.
+ * However, requesting each and every memory size is too much work, so
+ * what we'll do instead is walk through the page sizes available, pick
+ * the smallest one and set up page shift to match that one. We will be
+ * wasting some space this way, but it's much nicer than looping around
+ * trying to reserve each and every page size.
+ *
+ * However, since size calculation will produce page-aligned sizes, it
+ * makes sense to first try and see if we can reserve the entire memzone
+ * in one contiguous chunk as well (otherwise we might end up wasting a
+ * 1G page on a 10MB memzone). If we fail to get enough contiguous
+ * memory, then we'll go and reserve space page-by-page.
+ *
+ * We also have to take into account the fact that memory that we're
+ * going to allocate from can belong to an externally allocated memory
+ * area, in which case the assumption of IOVA as VA mode being
+ * synonymous with IOVA contiguousness will not hold. We should also try
+ * to go for contiguous memory even if we're in no-huge mode, because
+ * external memory may in fact be IOVA-contiguous.
+ */
+ external = rte_malloc_heap_socket_is_external(mp->socket_id) == 1;
+ no_pageshift = no_contig ||
+ (!external && rte_eal_iova_mode() == RTE_IOVA_VA);
+ try_contig = !no_contig && !no_pageshift &&
+ (rte_eal_has_hugepages() || external);
+
+ if (no_pageshift) {
+ pg_sz = 0;
+ pg_shift = 0;
+ } else if (try_contig) {
+ pg_sz = get_min_page_size(mp->socket_id);
+ pg_shift = rte_bsf32(pg_sz);
+ } else {
+ pg_sz = getpagesize();
+ pg_shift = rte_bsf32(pg_sz);
+ }
+
+ for (mz_id = 0, n = mp->size; n > 0; mz_id++, n -= ret) {
+ size_t min_chunk_size;
+ unsigned int flags;
+
+ if (try_contig || no_pageshift)
+ mem_size = rte_mempool_ops_calc_mem_size(mp, n,
+ 0, &min_chunk_size, &align);
+ else
+ mem_size = rte_mempool_ops_calc_mem_size(mp, n,
+ pg_shift, &min_chunk_size, &align);
+
+ if (mem_size < 0) {
+ ret = mem_size;
+ goto fail;
+ }
+
+ ret = snprintf(mz_name, sizeof(mz_name),
+ RTE_MEMPOOL_MZ_FORMAT "_%d", mp->name, mz_id);
+ if (ret < 0 || ret >= (int)sizeof(mz_name)) {
+ ret = -ENAMETOOLONG;
+ goto fail;
+ }