From: Elena Agostini Date: Sat, 8 Jan 2022 00:20:01 +0000 (+0000) Subject: gpudev: add alignment for memory allocation X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=c8557ed434213fe11ecd867819823ea6865311d2;p=dpdk.git gpudev: add alignment for memory allocation Similarly to rte_malloc, rte_gpu_mem_alloc accepts as input the memory alignment size. GPU driver should return GPU memory address aligned with the input value. Signed-off-by: Elena Agostini --- diff --git a/app/test-gpudev/main.c b/app/test-gpudev/main.c index 5c1aa3d52f..4500a8660b 100644 --- a/app/test-gpudev/main.c +++ b/app/test-gpudev/main.c @@ -68,12 +68,13 @@ alloc_gpu_memory(uint16_t gpu_id) void *ptr_1 = NULL; void *ptr_2 = NULL; size_t buf_bytes = 1024; + unsigned int align = 4096; int ret; printf("\n=======> TEST: Allocate GPU memory\n\n"); - /* Alloc memory on GPU 0 */ - ptr_1 = rte_gpu_mem_alloc(gpu_id, buf_bytes); + /* Alloc memory on GPU 0 without any specific alignment */ + ptr_1 = rte_gpu_mem_alloc(gpu_id, buf_bytes, 0); if (ptr_1 == NULL) { fprintf(stderr, "rte_gpu_mem_alloc GPU memory returned error\n"); goto error; @@ -81,7 +82,8 @@ alloc_gpu_memory(uint16_t gpu_id) printf("GPU memory allocated at 0x%p size is %zd bytes\n", ptr_1, buf_bytes); - ptr_2 = rte_gpu_mem_alloc(gpu_id, buf_bytes); + /* Alloc memory on GPU 0 with 4kB alignment */ + ptr_2 = rte_gpu_mem_alloc(gpu_id, buf_bytes, align); if (ptr_2 == NULL) { fprintf(stderr, "rte_gpu_mem_alloc GPU memory returned error\n"); goto error; @@ -89,6 +91,11 @@ alloc_gpu_memory(uint16_t gpu_id) printf("GPU memory allocated at 0x%p size is %zd bytes\n", ptr_2, buf_bytes); + if (((uintptr_t)ptr_2) % align) { + fprintf(stderr, "Memory address 0x%p is not aligned to %u\n", ptr_2, align); + goto error; + } + ret = rte_gpu_mem_free(gpu_id, (uint8_t *)(ptr_1)+0x700); if (ret < 0) { printf("GPU memory 0x%p NOT freed: GPU driver didn't find this memory address internally.\n", diff --git a/drivers/gpu/cuda/cuda.c b/drivers/gpu/cuda/cuda.c index fa2302621e..0ece1bb612 100644 --- a/drivers/gpu/cuda/cuda.c +++ b/drivers/gpu/cuda/cuda.c @@ -144,8 +144,10 @@ typedef uintptr_t cuda_ptr_key; /* Single entry of the memory list */ struct mem_entry { CUdeviceptr ptr_d; + CUdeviceptr ptr_orig_d; void *ptr_h; size_t size; + size_t size_orig; struct rte_gpu *dev; CUcontext ctx; cuda_ptr_key pkey; @@ -576,7 +578,7 @@ cuda_dev_info_get(struct rte_gpu *dev, struct rte_gpu_info *info) */ static int -cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr) +cuda_mem_alloc(struct rte_gpu *dev, size_t size, unsigned int align, void **ptr) { CUresult res; const char *err_string; @@ -617,8 +619,10 @@ cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr) /* Allocate memory */ mem_alloc_list_tail->size = size; - res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_d), - mem_alloc_list_tail->size); + mem_alloc_list_tail->size_orig = size + align; + + res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_orig_d), + mem_alloc_list_tail->size_orig); if (res != 0) { pfn_cuGetErrorString(res, &(err_string)); rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s", @@ -627,6 +631,12 @@ cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr) return -rte_errno; } + /* Align memory address */ + mem_alloc_list_tail->ptr_d = mem_alloc_list_tail->ptr_orig_d; + if (align && ((uintptr_t)mem_alloc_list_tail->ptr_d) % align) + mem_alloc_list_tail->ptr_d += (align - + (((uintptr_t)mem_alloc_list_tail->ptr_d) % align)); + /* GPUDirect RDMA attribute required */ res = pfn_cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, @@ -641,7 +651,6 @@ cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr) mem_alloc_list_tail->pkey = get_hash_from_ptr((void *)mem_alloc_list_tail->ptr_d); mem_alloc_list_tail->ptr_h = NULL; - mem_alloc_list_tail->size = size; mem_alloc_list_tail->dev = dev; mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); mem_alloc_list_tail->mtype = GPU_MEM; @@ -768,6 +777,7 @@ cuda_mem_register(struct rte_gpu *dev, size_t size, void *ptr) mem_alloc_list_tail->dev = dev; mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context); mem_alloc_list_tail->mtype = CPU_REGISTERED; + mem_alloc_list_tail->ptr_orig_d = mem_alloc_list_tail->ptr_d; /* Restore original ctx as current ctx */ res = pfn_cuCtxSetCurrent(current_ctx); @@ -803,7 +813,7 @@ cuda_mem_free(struct rte_gpu *dev, void *ptr) } if (mem_item->mtype == GPU_MEM) { - res = pfn_cuMemFree(mem_item->ptr_d); + res = pfn_cuMemFree(mem_item->ptr_orig_d); if (res != 0) { pfn_cuGetErrorString(res, &(err_string)); rte_cuda_log(ERR, "cuMemFree current failed with %s", diff --git a/lib/gpudev/gpudev.c b/lib/gpudev/gpudev.c index 9ae36dbae9..59e2169292 100644 --- a/lib/gpudev/gpudev.c +++ b/lib/gpudev/gpudev.c @@ -527,7 +527,7 @@ rte_gpu_info_get(int16_t dev_id, struct rte_gpu_info *info) } void * -rte_gpu_mem_alloc(int16_t dev_id, size_t size) +rte_gpu_mem_alloc(int16_t dev_id, size_t size, unsigned int align) { struct rte_gpu *dev; void *ptr; @@ -549,7 +549,13 @@ rte_gpu_mem_alloc(int16_t dev_id, size_t size) if (size == 0) /* dry-run */ return NULL; - ret = dev->ops.mem_alloc(dev, size, &ptr); + if (align && !rte_is_power_of_2(align)) { + GPU_LOG(ERR, "requested alignment is not a power of two %u", align); + rte_errno = EINVAL; + return NULL; + } + + ret = dev->ops.mem_alloc(dev, size, align, &ptr); switch (ret) { case 0: diff --git a/lib/gpudev/gpudev_driver.h b/lib/gpudev/gpudev_driver.h index cb7b101f2f..0ed7478e9b 100644 --- a/lib/gpudev/gpudev_driver.h +++ b/lib/gpudev/gpudev_driver.h @@ -27,7 +27,7 @@ enum rte_gpu_state { struct rte_gpu; typedef int (rte_gpu_close_t)(struct rte_gpu *dev); typedef int (rte_gpu_info_get_t)(struct rte_gpu *dev, struct rte_gpu_info *info); -typedef int (rte_gpu_mem_alloc_t)(struct rte_gpu *dev, size_t size, void **ptr); +typedef int (rte_gpu_mem_alloc_t)(struct rte_gpu *dev, size_t size, unsigned int align, void **ptr); typedef int (rte_gpu_mem_free_t)(struct rte_gpu *dev, void *ptr); typedef int (rte_gpu_mem_register_t)(struct rte_gpu *dev, size_t size, void *ptr); typedef int (rte_gpu_mem_unregister_t)(struct rte_gpu *dev, void *ptr); diff --git a/lib/gpudev/rte_gpudev.h b/lib/gpudev/rte_gpudev.h index fa3f3aad4f..ff3ca78c89 100644 --- a/lib/gpudev/rte_gpudev.h +++ b/lib/gpudev/rte_gpudev.h @@ -364,18 +364,23 @@ int rte_gpu_info_get(int16_t dev_id, struct rte_gpu_info *info); * @param size * Number of bytes to allocate. * Requesting 0 will do nothing. + * @param align + * If 0, the return is a pointer that is suitably aligned + * for any kind of variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. + * In this case, it must obviously be a power of two. * * @return * A pointer to the allocated memory, otherwise NULL and rte_errno is set: * - ENODEV if invalid dev_id - * - EINVAL if reserved flags + * - EINVAL if align is not a power of two * - ENOTSUP if operation not supported by the driver * - E2BIG if size is higher than limit * - ENOMEM if out of space * - EPERM if driver error */ __rte_experimental -void *rte_gpu_mem_alloc(int16_t dev_id, size_t size) +void *rte_gpu_mem_alloc(int16_t dev_id, size_t size, unsigned int align) __rte_alloc_size(2); /**