/* NVIDIA GPU device IDs */
#define NVIDIA_GPU_A100_40GB_DEVICE_ID (0x20f1)
#define NVIDIA_GPU_A100_80GB_DEVICE_ID (0x20b5)
+#define NVIDIA_GPU_A100_80GB_DPU_DEVICE_ID (0x20b8)
#define NVIDIA_GPU_A30_24GB_DEVICE_ID (0x20b7)
#define NVIDIA_GPU_A10_24GB_DEVICE_ID (0x2236)
RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID,
NVIDIA_GPU_A100_80GB_DEVICE_ID)
},
+ {
+ RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID,
+ NVIDIA_GPU_A100_80GB_DPU_DEVICE_ID)
+ },
{
RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID,
NVIDIA_GPU_A30_24GB_DEVICE_ID)
/* Single entry of the memory list */
struct mem_entry {
CUdeviceptr ptr_d;
+ CUdeviceptr ptr_orig_d;
void *ptr_h;
size_t size;
+ size_t size_orig;
struct rte_gpu *dev;
CUcontext ctx;
cuda_ptr_key pkey;
*/
static int
-cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr)
+cuda_mem_alloc(struct rte_gpu *dev, size_t size, unsigned int align, void **ptr)
{
CUresult res;
const char *err_string;
/* Allocate memory */
mem_alloc_list_tail->size = size;
- res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_d),
- mem_alloc_list_tail->size);
+ mem_alloc_list_tail->size_orig = size + align;
+
+ res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_orig_d),
+ mem_alloc_list_tail->size_orig);
if (res != 0) {
pfn_cuGetErrorString(res, &(err_string));
rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s",
return -rte_errno;
}
+ /* Align memory address */
+ mem_alloc_list_tail->ptr_d = mem_alloc_list_tail->ptr_orig_d;
+ if (align && ((uintptr_t)mem_alloc_list_tail->ptr_d) % align)
+ mem_alloc_list_tail->ptr_d += (align -
+ (((uintptr_t)mem_alloc_list_tail->ptr_d) % align));
+
/* GPUDirect RDMA attribute required */
res = pfn_cuPointerSetAttribute(&flag,
CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
mem_alloc_list_tail->pkey = get_hash_from_ptr((void *)mem_alloc_list_tail->ptr_d);
mem_alloc_list_tail->ptr_h = NULL;
- mem_alloc_list_tail->size = size;
mem_alloc_list_tail->dev = dev;
mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context);
mem_alloc_list_tail->mtype = GPU_MEM;
mem_alloc_list_tail->dev = dev;
mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context);
mem_alloc_list_tail->mtype = CPU_REGISTERED;
+ mem_alloc_list_tail->ptr_orig_d = mem_alloc_list_tail->ptr_d;
/* Restore original ctx as current ctx */
res = pfn_cuCtxSetCurrent(current_ctx);
}
if (mem_item->mtype == GPU_MEM) {
- res = pfn_cuMemFree(mem_item->ptr_d);
+ res = pfn_cuMemFree(mem_item->ptr_orig_d);
if (res != 0) {
pfn_cuGetErrorString(res, &(err_string));
rte_cuda_log(ERR, "cuMemFree current failed with %s",
dev->ops.mem_free = cuda_mem_free;
dev->ops.mem_register = cuda_mem_register;
dev->ops.mem_unregister = cuda_mem_unregister;
+ dev->ops.mem_cpu_map = NULL;
+ dev->ops.mem_cpu_unmap = NULL;
dev->ops.wmb = cuda_wmb;
rte_gpu_complete_new(dev);