lib/gpudev/rte_gpudev.h

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright (c) 2021 NVIDIA Corporation & Affiliates
   3  */
   4
   5 #ifndef RTE_GPUDEV_H
   6 #define RTE_GPUDEV_H
   7
   8 #include <stddef.h>
   9 #include <stdint.h>
  10 #include <stdbool.h>
  11
  12 #include <rte_bitops.h>
  13 #include <rte_compat.h>
  14
  15 /**
  16  * @file
  17  * Generic library to interact with GPU computing device.
  18  *
  19  * The API is not thread-safe.
  20  * Device management must be done by a single thread.
  21  *
  22  * @warning
  23  * @b EXPERIMENTAL: this API may change without prior notice.
  24  */
  25
  26 #ifdef __cplusplus
  27 extern "C" {
  28 #endif
  29
  30 /** Maximum number of devices if rte_gpu_init() is not called. */
  31 #define RTE_GPU_DEFAULT_MAX 32
  32
  33 /** Empty device ID. */
  34 #define RTE_GPU_ID_NONE -1
  35 /** Catch-all device ID. */
  36 #define RTE_GPU_ID_ANY INT16_MIN
  37
  38 /** Catch-all callback data. */
  39 #define RTE_GPU_CALLBACK_ANY_DATA ((void *)-1)
  40
  41 /** Access variable as volatile. */
  42 #define RTE_GPU_VOLATILE(x) (*(volatile typeof(x) *)&(x))
  43
  44 /** Store device info. */
  45 struct rte_gpu_info {
  46         /** Unique identifier name. */
  47         const char *name;
  48         /** Opaque handler of the device context. */
  49         uint64_t context;
  50         /** Device ID. */
  51         int16_t dev_id;
  52         /** ID of the parent device, RTE_GPU_ID_NONE if no parent */
  53         int16_t parent;
  54         /** Total processors available on device. */
  55         uint32_t processor_count;
  56         /** Total memory available on device. */
  57         size_t total_memory;
  58         /* Local NUMA memory ID. -1 if unknown. */
  59         int16_t numa_node;
  60 };
  61
  62 /** Flags passed in notification callback. */
  63 enum rte_gpu_event {
  64         /** Device is just initialized. */
  65         RTE_GPU_EVENT_NEW,
  66         /** Device is going to be released. */
  67         RTE_GPU_EVENT_DEL,
  68 };
  69
  70 /** Prototype of event callback function. */
  71 typedef void (rte_gpu_callback_t)(int16_t dev_id,
  72                 enum rte_gpu_event event, void *user_data);
  73
  74 /** Memory where communication flag is allocated. */
  75 enum rte_gpu_comm_flag_type {
  76         /** Allocate flag on CPU memory visible from device. */
  77         RTE_GPU_COMM_FLAG_CPU = 0,
  78 };
  79
  80 /** Communication flag to coordinate CPU with the device. */
  81 struct rte_gpu_comm_flag {
  82         /** Device that will use the device flag. */
  83         uint16_t dev_id;
  84         /** Pointer to flag memory area. */
  85         uint32_t *ptr;
  86         /** Type of memory used to allocate the flag. */
  87         enum rte_gpu_comm_flag_type mtype;
  88 };
  89
  90 /**
  91  * @warning
  92  * @b EXPERIMENTAL: this API may change without prior notice.
  93  *
  94  * Initialize the device array before probing devices.
  95  * If not called, the maximum of probed devices is RTE_GPU_DEFAULT_MAX.
  96  *
  97  * @param dev_max
  98  *   Maximum number of devices.
  99  *
 100  * @return
 101  *   0 on success, -rte_errno otherwise:
 102  *   - ENOMEM if out of memory
 103  *   - EINVAL if 0 size
 104  *   - EBUSY if already initialized
 105  */
 106 __rte_experimental
 107 int rte_gpu_init(size_t dev_max);
 108
 109 /**
 110  * @warning
 111  * @b EXPERIMENTAL: this API may change without prior notice.
 112  *
 113  * Return the number of GPU detected and associated to DPDK.
 114  *
 115  * @return
 116  *   The number of available computing devices.
 117  */
 118 __rte_experimental
 119 uint16_t rte_gpu_count_avail(void);
 120
 121 /**
 122  * @warning
 123  * @b EXPERIMENTAL: this API may change without prior notice.
 124  *
 125  * Check if the device is valid and initialized in DPDK.
 126  *
 127  * @param dev_id
 128  *   The input device ID.
 129  *
 130  * @return
 131  *   - True if dev_id is a valid and initialized computing device.
 132  *   - False otherwise.
 133  */
 134 __rte_experimental
 135 bool rte_gpu_is_valid(int16_t dev_id);
 136
 137 /**
 138  * @warning
 139  * @b EXPERIMENTAL: this API may change without prior notice.
 140  *
 141  * Create a virtual device representing a context in the parent device.
 142  *
 143  * @param name
 144  *   Unique string to identify the device.
 145  * @param parent
 146  *   Device ID of the parent.
 147  * @param child_context
 148  *   Opaque context handler.
 149  *
 150  * @return
 151  *   Device ID of the new created child, -rte_errno otherwise:
 152  *   - EINVAL if empty name
 153  *   - ENAMETOOLONG if long name
 154  *   - EEXIST if existing device name
 155  *   - ENODEV if invalid parent
 156  *   - EPERM if secondary process
 157  *   - ENOENT if too many devices
 158  *   - ENOMEM if out of space
 159  */
 160 __rte_experimental
 161 int16_t rte_gpu_add_child(const char *name,
 162                 int16_t parent, uint64_t child_context);
 163
 164 /**
 165  * @warning
 166  * @b EXPERIMENTAL: this API may change without prior notice.
 167  *
 168  * Get the ID of the next valid GPU initialized in DPDK.
 169  *
 170  * @param dev_id
 171  *   The initial device ID to start the research.
 172  * @param parent
 173  *   The device ID of the parent.
 174  *   RTE_GPU_ID_NONE means no parent.
 175  *   RTE_GPU_ID_ANY means no or any parent.
 176  *
 177  * @return
 178  *   Next device ID corresponding to a valid and initialized computing device,
 179  *   RTE_GPU_ID_NONE if there is none.
 180  */
 181 __rte_experimental
 182 int16_t rte_gpu_find_next(int16_t dev_id, int16_t parent);
 183
 184 /**
 185  * @warning
 186  * @b EXPERIMENTAL: this API may change without prior notice.
 187  *
 188  * Macro to iterate over all valid GPU devices.
 189  *
 190  * @param dev_id
 191  *   The ID of the next possible valid device, usually 0 to iterate all.
 192  */
 193 #define RTE_GPU_FOREACH(dev_id) \
 194         RTE_GPU_FOREACH_CHILD(dev_id, RTE_GPU_ID_ANY)
 195
 196 /**
 197  * @warning
 198  * @b EXPERIMENTAL: this API may change without prior notice.
 199  *
 200  * Macro to iterate over all valid computing devices having no parent.
 201  *
 202  * @param dev_id
 203  *   The ID of the next possible valid device, usually 0 to iterate all.
 204  */
 205 #define RTE_GPU_FOREACH_PARENT(dev_id) \
 206         RTE_GPU_FOREACH_CHILD(dev_id, RTE_GPU_ID_NONE)
 207
 208 /**
 209  * @warning
 210  * @b EXPERIMENTAL: this API may change without prior notice.
 211  *
 212  * Macro to iterate over all valid children of a computing device parent.
 213  *
 214  * @param dev_id
 215  *   The ID of the next possible valid device, usually 0 to iterate all.
 216  * @param parent
 217  *   The device ID of the parent.
 218  */
 219 #define RTE_GPU_FOREACH_CHILD(dev_id, parent) \
 220         for (dev_id = rte_gpu_find_next(0, parent); \
 221              dev_id >= 0; \
 222              dev_id = rte_gpu_find_next(dev_id + 1, parent))
 223
 224 /**
 225  * @warning
 226  * @b EXPERIMENTAL: this API may change without prior notice.
 227  *
 228  * Close device or child context.
 229  * All resources are released.
 230  *
 231  * @param dev_id
 232  *   Device ID to close.
 233  *
 234  * @return
 235  *   0 on success, -rte_errno otherwise:
 236  *   - ENODEV if invalid dev_id
 237  *   - EPERM if driver error
 238  */
 239 __rte_experimental
 240 int rte_gpu_close(int16_t dev_id);
 241
 242 /**
 243  * @warning
 244  * @b EXPERIMENTAL: this API may change without prior notice.
 245  *
 246  * Register a function as event callback.
 247  * A function may be registered multiple times for different events.
 248  *
 249  * @param dev_id
 250  *   Device ID to get notified about.
 251  *   RTE_GPU_ID_ANY means all devices.
 252  * @param event
 253  *   Device event to be registered for.
 254  * @param function
 255  *   Callback function to be called on event.
 256  * @param user_data
 257  *   Optional parameter passed in the callback.
 258  *
 259  * @return
 260  *   0 on success, -rte_errno otherwise:
 261  *   - ENODEV if invalid dev_id
 262  *   - EINVAL if NULL function
 263  *   - ENOMEM if out of memory
 264  */
 265 __rte_experimental
 266 int rte_gpu_callback_register(int16_t dev_id, enum rte_gpu_event event,
 267                 rte_gpu_callback_t *function, void *user_data);
 268
 269 /**
 270  * @warning
 271  * @b EXPERIMENTAL: this API may change without prior notice.
 272  *
 273  * Unregister for an event.
 274  *
 275  * @param dev_id
 276  *   Device ID to be silenced.
 277  *   RTE_GPU_ID_ANY means all devices.
 278  * @param event
 279  *   Registered event.
 280  * @param function
 281  *   Registered function.
 282  * @param user_data
 283  *   Optional parameter as registered.
 284  *   RTE_GPU_CALLBACK_ANY_DATA is a catch-all.
 285  *
 286  * @return
 287  *   0 on success, -rte_errno otherwise:
 288  *   - ENODEV if invalid dev_id
 289  *   - EINVAL if NULL function
 290  */
 291 __rte_experimental
 292 int rte_gpu_callback_unregister(int16_t dev_id, enum rte_gpu_event event,
 293                 rte_gpu_callback_t *function, void *user_data);
 294
 295 /**
 296  * @warning
 297  * @b EXPERIMENTAL: this API may change without prior notice.
 298  *
 299  * Return device specific info.
 300  *
 301  * @param dev_id
 302  *   Device ID to get info.
 303  * @param info
 304  *   Memory structure to fill with the info.
 305  *
 306  * @return
 307  *   0 on success, -rte_errno otherwise:
 308  *   - ENODEV if invalid dev_id
 309  *   - EINVAL if NULL info
 310  *   - EPERM if driver error
 311  */
 312 __rte_experimental
 313 int rte_gpu_info_get(int16_t dev_id, struct rte_gpu_info *info);
 314
 315 /**
 316  * @warning
 317  * @b EXPERIMENTAL: this API may change without prior notice.
 318  *
 319  * Allocate a chunk of memory in the device.
 320  *
 321  * @param dev_id
 322  *   Device ID requiring allocated memory.
 323  * @param size
 324  *   Number of bytes to allocate.
 325  *   Requesting 0 will do nothing.
 326  *
 327  * @return
 328  *   A pointer to the allocated memory, otherwise NULL and rte_errno is set:
 329  *   - ENODEV if invalid dev_id
 330  *   - EINVAL if reserved flags
 331  *   - ENOTSUP if operation not supported by the driver
 332  *   - E2BIG if size is higher than limit
 333  *   - ENOMEM if out of space
 334  *   - EPERM if driver error
 335  */
 336 __rte_experimental
 337 void *rte_gpu_mem_alloc(int16_t dev_id, size_t size)
 338 __rte_alloc_size(2);
 339
 340 /**
 341  * @warning
 342  * @b EXPERIMENTAL: this API may change without prior notice.
 343  *
 344  * Deallocate a chunk of memory allocated with rte_gpu_mem_alloc().
 345  *
 346  * @param dev_id
 347  *   Reference device ID.
 348  * @param ptr
 349  *   Pointer to the memory area to be deallocated.
 350  *   NULL is a no-op accepted value.
 351  *
 352  * @return
 353  *   0 on success, -rte_errno otherwise:
 354  *   - ENODEV if invalid dev_id
 355  *   - ENOTSUP if operation not supported by the driver
 356  *   - EPERM if driver error
 357  */
 358 __rte_experimental
 359 int rte_gpu_mem_free(int16_t dev_id, void *ptr);
 360
 361 /**
 362  * @warning
 363  * @b EXPERIMENTAL: this API may change without prior notice.
 364  *
 365  * Register a chunk of memory on the CPU usable by the device.
 366  *
 367  * @param dev_id
 368  *   Device ID requiring allocated memory.
 369  * @param size
 370  *   Number of bytes to allocate.
 371  *   Requesting 0 will do nothing.
 372  * @param ptr
 373  *   Pointer to the memory area to be registered.
 374  *   NULL is a no-op accepted value.
 375
 376  * @return
 377  *   A pointer to the allocated memory, otherwise NULL and rte_errno is set:
 378  *   - ENODEV if invalid dev_id
 379  *   - EINVAL if reserved flags
 380  *   - ENOTSUP if operation not supported by the driver
 381  *   - E2BIG if size is higher than limit
 382  *   - ENOMEM if out of space
 383  *   - EPERM if driver error
 384  */
 385 __rte_experimental
 386 int rte_gpu_mem_register(int16_t dev_id, size_t size, void *ptr);
 387
 388 /**
 389  * @warning
 390  * @b EXPERIMENTAL: this API may change without prior notice.
 391  *
 392  * Deregister a chunk of memory previously registered with rte_gpu_mem_register()
 393  *
 394  * @param dev_id
 395  *   Reference device ID.
 396  * @param ptr
 397  *   Pointer to the memory area to be unregistered.
 398  *   NULL is a no-op accepted value.
 399  *
 400  * @return
 401  *   0 on success, -rte_errno otherwise:
 402  *   - ENODEV if invalid dev_id
 403  *   - ENOTSUP if operation not supported by the driver
 404  *   - EPERM if driver error
 405  */
 406 __rte_experimental
 407 int rte_gpu_mem_unregister(int16_t dev_id, void *ptr);
 408
 409 /**
 410  * @warning
 411  * @b EXPERIMENTAL: this API may change without prior notice.
 412  *
 413  * Enforce a GPU write memory barrier.
 414  *
 415  * @param dev_id
 416  *   Reference device ID.
 417  *
 418  * @return
 419  *   0 on success, -rte_errno otherwise:
 420  *   - ENODEV if invalid dev_id
 421  *   - ENOTSUP if operation not supported by the driver
 422  *   - EPERM if driver error
 423  */
 424 __rte_experimental
 425 int rte_gpu_wmb(int16_t dev_id);
 426
 427 /**
 428  * @warning
 429  * @b EXPERIMENTAL: this API may change without prior notice.
 430  *
 431  * Create a communication flag that can be shared
 432  * between CPU threads and device workload to exchange some status info
 433  * (e.g. work is done, processing can start, etc..).
 434  *
 435  * @param dev_id
 436  *   Reference device ID.
 437  * @param devflag
 438  *   Pointer to the memory area of the devflag structure.
 439  * @param mtype
 440  *   Type of memory to allocate the communication flag.
 441  *
 442  * @return
 443  *   0 on success, -rte_errno otherwise:
 444  *   - ENODEV if invalid dev_id
 445  *   - EINVAL if invalid inputs
 446  *   - ENOTSUP if operation not supported by the driver
 447  *   - ENOMEM if out of space
 448  *   - EPERM if driver error
 449  */
 450 __rte_experimental
 451 int rte_gpu_comm_create_flag(uint16_t dev_id,
 452                 struct rte_gpu_comm_flag *devflag,
 453                 enum rte_gpu_comm_flag_type mtype);
 454
 455 /**
 456  * @warning
 457  * @b EXPERIMENTAL: this API may change without prior notice.
 458  *
 459  * Deallocate a communication flag.
 460  *
 461  * @param devflag
 462  *   Pointer to the memory area of the devflag structure.
 463  *
 464  * @return
 465  *   0 on success, -rte_errno otherwise:
 466  *   - ENODEV if invalid dev_id
 467  *   - EINVAL if NULL devflag
 468  *   - ENOTSUP if operation not supported by the driver
 469  *   - EPERM if driver error
 470  */
 471 __rte_experimental
 472 int rte_gpu_comm_destroy_flag(struct rte_gpu_comm_flag *devflag);
 473
 474 /**
 475  * @warning
 476  * @b EXPERIMENTAL: this API may change without prior notice.
 477  *
 478  * Set the value of a communication flag as the input value.
 479  * Flag memory area is treated as volatile.
 480  * The flag must have been allocated with RTE_GPU_COMM_FLAG_CPU.
 481  *
 482  * @param devflag
 483  *   Pointer to the memory area of the devflag structure.
 484  * @param val
 485  *   Value to set in the flag.
 486  *
 487  * @return
 488  *   0 on success, -rte_errno otherwise:
 489  *   - EINVAL if invalid input params
 490  */
 491 __rte_experimental
 492 int rte_gpu_comm_set_flag(struct rte_gpu_comm_flag *devflag,
 493                 uint32_t val);
 494
 495 /**
 496  * @warning
 497  * @b EXPERIMENTAL: this API may change without prior notice.
 498  *
 499  * Get the value of the communication flag.
 500  * Flag memory area is treated as volatile.
 501  * The flag must have been allocated with RTE_GPU_COMM_FLAG_CPU.
 502  *
 503  * @param devflag
 504  *   Pointer to the memory area of the devflag structure.
 505  * @param val
 506  *   Flag output value.
 507  *
 508  * @return
 509  *   0 on success, -rte_errno otherwise:
 510  *   - EINVAL if invalid input params
 511  */
 512 __rte_experimental
 513 int rte_gpu_comm_get_flag_value(struct rte_gpu_comm_flag *devflag,
 514                 uint32_t *val);
 515
 516 #ifdef __cplusplus
 517 }
 518 #endif
 519
 520 #endif /* RTE_GPUDEV_H */