/* NVIDIA GPU device IDs */
#define NVIDIA_GPU_A100_40GB_DEVICE_ID (0x20f1)
#define NVIDIA_GPU_A100_80GB_DEVICE_ID (0x20b5)
+#define NVIDIA_GPU_A100_80GB_DPU_DEVICE_ID (0x20b8)
#define NVIDIA_GPU_A30_24GB_DEVICE_ID (0x20b7)
#define NVIDIA_GPU_A10_24GB_DEVICE_ID (0x2236)
RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID,
NVIDIA_GPU_A100_80GB_DEVICE_ID)
},
+ {
+ RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID,
+ NVIDIA_GPU_A100_80GB_DPU_DEVICE_ID)
+ },
{
RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID,
NVIDIA_GPU_A30_24GB_DEVICE_ID)
/* Single entry of the memory list */
struct mem_entry {
CUdeviceptr ptr_d;
+ CUdeviceptr ptr_orig_d;
void *ptr_h;
size_t size;
+ size_t size_orig;
struct rte_gpu *dev;
CUcontext ctx;
cuda_ptr_key pkey;
return -EINVAL;
/* if key is in head */
- if (mem_alloc_list_cur->prev == NULL)
+ if (mem_alloc_list_cur->prev == NULL) {
mem_alloc_list_head = mem_alloc_list_cur->next;
- else {
+ if (mem_alloc_list_head != NULL)
+ mem_alloc_list_head->prev = NULL;
+ } else {
mem_alloc_list_cur->prev->next = mem_alloc_list_cur->next;
if (mem_alloc_list_cur->next != NULL)
mem_alloc_list_cur->next->prev = mem_alloc_list_cur->prev;
*/
static int
-cuda_mem_alloc(struct rte_gpu *dev, size_t size, void **ptr)
+cuda_mem_alloc(struct rte_gpu *dev, size_t size, unsigned int align, void **ptr)
{
CUresult res;
const char *err_string;
/* Allocate memory */
mem_alloc_list_tail->size = size;
- res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_d),
- mem_alloc_list_tail->size);
+ mem_alloc_list_tail->size_orig = size + align;
+
+ res = pfn_cuMemAlloc(&(mem_alloc_list_tail->ptr_orig_d),
+ mem_alloc_list_tail->size_orig);
if (res != 0) {
pfn_cuGetErrorString(res, &(err_string));
rte_cuda_log(ERR, "cuCtxSetCurrent current failed with %s",
return -rte_errno;
}
+ /* Align memory address */
+ mem_alloc_list_tail->ptr_d = mem_alloc_list_tail->ptr_orig_d;
+ if (align && ((uintptr_t)mem_alloc_list_tail->ptr_d) % align)
+ mem_alloc_list_tail->ptr_d += (align -
+ (((uintptr_t)mem_alloc_list_tail->ptr_d) % align));
+
/* GPUDirect RDMA attribute required */
res = pfn_cuPointerSetAttribute(&flag,
CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
mem_alloc_list_tail->pkey = get_hash_from_ptr((void *)mem_alloc_list_tail->ptr_d);
mem_alloc_list_tail->ptr_h = NULL;
- mem_alloc_list_tail->size = size;
mem_alloc_list_tail->dev = dev;
mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context);
mem_alloc_list_tail->mtype = GPU_MEM;
mem_alloc_list_tail->dev = dev;
mem_alloc_list_tail->ctx = (CUcontext)((uintptr_t)dev->mpshared->info.context);
mem_alloc_list_tail->mtype = CPU_REGISTERED;
+ mem_alloc_list_tail->ptr_orig_d = mem_alloc_list_tail->ptr_d;
/* Restore original ctx as current ctx */
res = pfn_cuCtxSetCurrent(current_ctx);
}
if (mem_item->mtype == GPU_MEM) {
- res = pfn_cuMemFree(mem_item->ptr_d);
+ res = pfn_cuMemFree(mem_item->ptr_orig_d);
if (res != 0) {
pfn_cuGetErrorString(res, &(err_string));
rte_cuda_log(ERR, "cuMemFree current failed with %s",