gpudev: add communication list

author Elena Agostini <eagostini@nvidia.com>

Mon, 8 Nov 2021 18:58:04 +0000 (18:58 +0000)

committer Thomas Monjalon <thomas@monjalon.net>

Mon, 8 Nov 2021 16:20:53 +0000 (17:20 +0100)
author Elena Agostini <eagostini@nvidia.com>
Mon, 8 Nov 2021 18:58:04 +0000 (18:58 +0000)
committer Thomas Monjalon <thomas@monjalon.net>
Mon, 8 Nov 2021 16:20:53 +0000 (17:20 +0100)
diff --git a/app/test-gpudev/main.c b/app/test-gpudev/main.c

index 3b371741f263501f0d2b6356c7e14953918d19c2..1db3f69e10f9a2390d810cfd9f6c03481e3ecbee 100644 (file)
--- a/app/test-gpudev/main.c
+++ b/app/test-gpudev/main.c
@@ -209,6 +209,99 @@ create_update_comm_flag(uint16_t gpu_id)
         return 0;
  }
  
+static int
+simulate_gpu_task(struct rte_gpu_comm_list *comm_list_item, int num_pkts)
+{
+       int idx;
+
+       if (comm_list_item == NULL)
+               return -1;
+
+       for (idx = 0; idx < num_pkts; idx++) {
+               /**
+                * consume(comm_list_item->pkt_list[idx].addr);
+                */
+       }
+       comm_list_item->status = RTE_GPU_COMM_LIST_DONE;
+
+       return 0;
+}
+
+static int
+create_update_comm_list(uint16_t gpu_id)
+{
+       int ret = 0;
+       int i = 0;
+       struct rte_gpu_comm_list *comm_list;
+       uint32_t num_comm_items = 1024;
+       struct rte_mbuf *mbufs[10];
+
+       printf("\n=======> TEST: Communication list\n");
+
+       comm_list = rte_gpu_comm_create_list(gpu_id, num_comm_items);
+       if (comm_list == NULL) {
+               fprintf(stderr, "rte_gpu_comm_create_list returned error %d\n", ret);
+               return -1;
+       }
+
+       /**
+        * Simulate DPDK receive functions like rte_eth_rx_burst()
+        */
+       for (i = 0; i < 10; i++) {
+               mbufs[i] = rte_zmalloc(NULL, sizeof(struct rte_mbuf), 0);
+               if (mbufs[i] == NULL) {
+                       fprintf(stderr, "Failed to allocate fake mbufs in CPU memory.\n");
+                       return -1;
+               }
+
+               memset(mbufs[i], 0, sizeof(struct rte_mbuf));
+       }
+
+       /**
+        * Populate just the first item of  the list
+        */
+       ret = rte_gpu_comm_populate_list_pkts(&(comm_list[0]), mbufs, 10);
+       if (ret < 0) {
+               fprintf(stderr, "rte_gpu_comm_populate_list_pkts returned error %d\n", ret);
+               return -1;
+       }
+
+       ret = rte_gpu_comm_cleanup_list(&(comm_list[0]));
+       if (ret == 0) {
+               fprintf(stderr, "rte_gpu_comm_cleanup_list erroneously cleaned the list even if packets have not been consumed yet\n");
+               return -1;
+       }
+       fprintf(stderr, "rte_gpu_comm_cleanup_list correctly didn't clean up the packets because they have not been consumed yet\n");
+
+       /**
+        * Simulate a GPU tasks going through the packet list to consume
+        * mbufs packets and release them
+        */
+       simulate_gpu_task(&(comm_list[0]), 10);
+
+       /**
+        * Packets have been consumed, now the communication item
+        * and the related mbufs can be all released
+        */
+       ret = rte_gpu_comm_cleanup_list(&(comm_list[0]));
+       if (ret < 0) {
+               fprintf(stderr, "rte_gpu_comm_cleanup_list returned error %d\n", ret);
+               return -1;
+       }
+
+       ret = rte_gpu_comm_destroy_list(comm_list, num_comm_items);
+       if (ret < 0) {
+               fprintf(stderr, "rte_gpu_comm_destroy_list returned error %d\n", ret);
+               return -1;
+       }
+
+       for (i = 0; i < 10; i++)
+               rte_free(mbufs[i]);
+
+       printf("\nCommunication list test passed!\n");
+       return 0;
+}
+
  int
  main(int argc, char **argv)
  {
@@ -263,6 +356,7 @@ main(int argc, char **argv)
          * Communication items test
          */
         create_update_comm_flag(gpu_id);
+       create_update_comm_list(gpu_id);
  
         /* clean up the EAL */
         rte_eal_cleanup();
diff --git a/doc/guides/prog_guide/gpudev.rst b/doc/guides/prog_guide/gpudev.rst

index e0db627aed22b5c45eaeb3213f2fdd1527adb5c6..67c7f8e123bdb5c9b996e05aa0da4404f037bf62 100644 (file)
--- a/doc/guides/prog_guide/gpudev.rst
+++ b/doc/guides/prog_guide/gpudev.rst
@@ -86,3 +86,19 @@ that's waiting to receive a signal from the CPU
  to move forward with the execution.
  The communication flag allocates a CPU memory GPU-visible ``uint32_t`` flag
  that can be used by the CPU to communicate with a GPU task.
+
+Communication list
+~~~~~~~~~~~~~~~~~~
+
+By default, DPDK pulls free mbufs from a mempool to receive packets.
+Best practice, especially in a multithreaded application,
+is to no make any assumption on which mbufs will be used
+to receive the next bursts of packets.
+Considering an application with a GPU memory mempool
+attached to a receive queue having some task waiting on the GPU
+to receive a new burst of packets to be processed,
+there is the need to communicate from the CPU
+the list of mbuf payload addresses where received packet have been stored.
+The ``rte_gpu_comm_*()`` functions are responsible to create a list of packets
+that can be populated with receive mbuf payload addresses
+and communicated to the task running on the GPU.
diff --git a/doc/guides/rel_notes/release_21_11.rst b/doc/guides/rel_notes/release_21_11.rst

index 78b29d9a255c51c7b2da7245b977c1d47fb71109..23d8591f4085765dcfa06d555b68220d013bfcc9 100644 (file)
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@@ -105,7 +105,7 @@ New Features
  
    * Device information
    * Memory management
-  * Communication flag
+  * Communication flag & list
  
  * **Added new RSS offload types for IPv4/L4 checksum in RSS flow.**
  
diff --git a/lib/gpudev/gpudev.c b/lib/gpudev/gpudev.c

index 80c9a35176dcb37650c99f27b5024337d97962e2..2b174d8bd54bbeef8edbe275fa1614adc32d9c77 100644 (file)
--- a/lib/gpudev/gpudev.c
+++ b/lib/gpudev/gpudev.c
@@ -736,3 +736,173 @@ rte_gpu_comm_get_flag_value(struct rte_gpu_comm_flag *devflag, uint32_t *val)
  
         return 0;
  }
+
+struct rte_gpu_comm_list *
+rte_gpu_comm_create_list(uint16_t dev_id,
+               uint32_t num_comm_items)
+{
+       struct rte_gpu_comm_list *comm_list;
+       uint32_t idx_l;
+       int ret;
+       struct rte_gpu *dev;
+
+       if (num_comm_items == 0) {
+               rte_errno = EINVAL;
+               return NULL;
+       }
+
+       dev = gpu_get_by_id(dev_id);
+       if (dev == NULL) {
+               GPU_LOG(ERR, "memory barrier for invalid device ID %d", dev_id);
+               rte_errno = ENODEV;
+               return NULL;
+       }
+
+       comm_list = rte_zmalloc(NULL,
+                       sizeof(struct rte_gpu_comm_list) * num_comm_items, 0);
+       if (comm_list == NULL) {
+               rte_errno = ENOMEM;
+               return NULL;
+       }
+
+       ret = rte_gpu_mem_register(dev_id,
+                       sizeof(struct rte_gpu_comm_list) * num_comm_items, comm_list);
+       if (ret < 0) {
+               rte_errno = ENOMEM;
+               return NULL;
+       }
+
+       for (idx_l = 0; idx_l < num_comm_items; idx_l++) {
+               comm_list[idx_l].pkt_list = rte_zmalloc(NULL,
+                               sizeof(struct rte_gpu_comm_pkt) * RTE_GPU_COMM_LIST_PKTS_MAX, 0);
+               if (comm_list[idx_l].pkt_list == NULL) {
+                       rte_errno = ENOMEM;
+                       return NULL;
+               }
+
+               ret = rte_gpu_mem_register(dev_id,
+                               sizeof(struct rte_gpu_comm_pkt) * RTE_GPU_COMM_LIST_PKTS_MAX,
+                               comm_list[idx_l].pkt_list);
+               if (ret < 0) {
+                       rte_errno = ENOMEM;
+                       return NULL;
+               }
+
+               RTE_GPU_VOLATILE(comm_list[idx_l].status) = RTE_GPU_COMM_LIST_FREE;
+               comm_list[idx_l].num_pkts = 0;
+               comm_list[idx_l].dev_id = dev_id;
+
+               comm_list[idx_l].mbufs = rte_zmalloc(NULL,
+                               sizeof(struct rte_mbuf *) * RTE_GPU_COMM_LIST_PKTS_MAX, 0);
+               if (comm_list[idx_l].mbufs == NULL) {
+                       rte_errno = ENOMEM;
+                       return NULL;
+               }
+       }
+
+       return comm_list;
+}
+
+int
+rte_gpu_comm_destroy_list(struct rte_gpu_comm_list *comm_list,
+               uint32_t num_comm_items)
+{
+       uint32_t idx_l;
+       int ret;
+       uint16_t dev_id;
+
+       if (comm_list == NULL) {
+               rte_errno = EINVAL;
+               return -rte_errno;
+       }
+
+       dev_id = comm_list[0].dev_id;
+
+       for (idx_l = 0; idx_l < num_comm_items; idx_l++) {
+               ret = rte_gpu_mem_unregister(dev_id, comm_list[idx_l].pkt_list);
+               if (ret < 0) {
+                       rte_errno = EINVAL;
+                       return -1;
+               }
+
+               rte_free(comm_list[idx_l].pkt_list);
+               rte_free(comm_list[idx_l].mbufs);
+       }
+
+       ret = rte_gpu_mem_unregister(dev_id, comm_list);
+       if (ret < 0) {
+               rte_errno = EINVAL;
+               return -1;
+       }
+
+       rte_free(comm_list);
+
+       return 0;
+}
+
+int
+rte_gpu_comm_populate_list_pkts(struct rte_gpu_comm_list *comm_list_item,
+               struct rte_mbuf **mbufs, uint32_t num_mbufs)
+{
+       uint32_t idx;
+
+       if (comm_list_item == NULL || comm_list_item->pkt_list == NULL ||
+                       mbufs == NULL || num_mbufs > RTE_GPU_COMM_LIST_PKTS_MAX) {
+               rte_errno = EINVAL;
+               return -rte_errno;
+       }
+
+       for (idx = 0; idx < num_mbufs; idx++) {
+               /* support only unchained mbufs */
+               if (unlikely((mbufs[idx]->nb_segs > 1) ||
+                               (mbufs[idx]->next != NULL) ||
+                               (mbufs[idx]->data_len != mbufs[idx]->pkt_len))) {
+                       rte_errno = ENOTSUP;
+                       return -rte_errno;
+               }
+               comm_list_item->pkt_list[idx].addr =
+                               rte_pktmbuf_mtod_offset(mbufs[idx], uintptr_t, 0);
+               comm_list_item->pkt_list[idx].size = mbufs[idx]->pkt_len;
+               comm_list_item->mbufs[idx] = mbufs[idx];
+       }
+
+       RTE_GPU_VOLATILE(comm_list_item->num_pkts) = num_mbufs;
+       rte_gpu_wmb(comm_list_item->dev_id);
+       RTE_GPU_VOLATILE(comm_list_item->status) = RTE_GPU_COMM_LIST_READY;
+       rte_gpu_wmb(comm_list_item->dev_id);
+
+       return 0;
+}
+
+int
+rte_gpu_comm_cleanup_list(struct rte_gpu_comm_list *comm_list_item)
+{
+       uint32_t idx = 0;
+
+       if (comm_list_item == NULL) {
+               rte_errno = EINVAL;
+               return -rte_errno;
+       }
+
+       if (RTE_GPU_VOLATILE(comm_list_item->status) ==
+                       RTE_GPU_COMM_LIST_READY) {
+               GPU_LOG(ERR, "packet list is still in progress");
+               rte_errno = EINVAL;
+               return -rte_errno;
+       }
+
+       for (idx = 0; idx < RTE_GPU_COMM_LIST_PKTS_MAX; idx++) {
+               if (comm_list_item->pkt_list[idx].addr == 0)
+                       break;
+
+               comm_list_item->pkt_list[idx].addr = 0;
+               comm_list_item->pkt_list[idx].size = 0;
+               comm_list_item->mbufs[idx] = NULL;
+       }
+
+       RTE_GPU_VOLATILE(comm_list_item->status) = RTE_GPU_COMM_LIST_FREE;
+       RTE_GPU_VOLATILE(comm_list_item->num_pkts) = 0;
+       rte_mb();
+
+       return 0;
+}
diff --git a/lib/gpudev/meson.build b/lib/gpudev/meson.build

index 608154817b6449b2e999da15c2fc9c9210d98d15..89a118f3579633b4a00277dbb7d75efae2418fb1 100644 (file)
--- a/lib/gpudev/meson.build
+++ b/lib/gpudev/meson.build
@@ -8,3 +8,5 @@ headers = files(
  sources = files(
          'gpudev.c',
  )
+
+deps += ['mbuf']
diff --git a/lib/gpudev/rte_gpudev.h b/lib/gpudev/rte_gpudev.h

index e539823deab6befd98d4ff9faa3349465ed33527..fa3f3aad4f944b47ada2317ab79e38aa1072d769 100644 (file)
--- a/lib/gpudev/rte_gpudev.h
+++ b/lib/gpudev/rte_gpudev.h
@@ -9,6 +9,7 @@
  #include <stdint.h>
  #include <stdbool.h>
  
+#include <rte_mbuf.h>
  #include <rte_bitops.h>
  #include <rte_compat.h>
  
@@ -41,6 +42,9 @@ extern "C" {
  /** Access variable as volatile. */
  #define RTE_GPU_VOLATILE(x) (*(volatile typeof(x) *)&(x))
  
+/** Max number of packets per communication list. */
+#define RTE_GPU_COMM_LIST_PKTS_MAX 1024
+
  /** Store device info. */
  struct rte_gpu_info {
         /** Unique identifier name. */
@@ -87,6 +91,43 @@ struct rte_gpu_comm_flag {
         enum rte_gpu_comm_flag_type mtype;
  };
  
+/** List of packets shared among CPU and device. */
+struct rte_gpu_comm_pkt {
+       /** Address of the packet in memory (e.g. mbuf->buf_addr). */
+       uintptr_t addr;
+       /** Size in byte of the packet. */
+       size_t size;
+};
+
+/** Possible status for the list of packets shared among CPU and device. */
+enum rte_gpu_comm_list_status {
+       /** Packet list can be filled with new mbufs, no one is using it. */
+       RTE_GPU_COMM_LIST_FREE = 0,
+       /** Packet list has been filled with new mbufs and it's ready to be used .*/
+       RTE_GPU_COMM_LIST_READY,
+       /** Packet list has been processed, it's ready to be freed. */
+       RTE_GPU_COMM_LIST_DONE,
+       /** Some error occurred during packet list processing. */
+       RTE_GPU_COMM_LIST_ERROR,
+};
+
+/**
+ * Communication list holding a number of lists of packets
+ * each having a status flag.
+ */
+struct rte_gpu_comm_list {
+       /** Device that will use the communication list. */
+       uint16_t dev_id;
+       /** List of mbufs populated by the CPU with a set of mbufs. */
+       struct rte_mbuf **mbufs;
+       /** List of packets populated by the CPU with a set of mbufs info. */
+       struct rte_gpu_comm_pkt *pkt_list;
+       /** Number of packets in the list. */
+       uint32_t num_pkts;
+       /** Status of the list. */
+       enum rte_gpu_comm_list_status status;
+};
+
  /**
   * @warning
   * @b EXPERIMENTAL: this API may change without prior notice.
@@ -513,6 +554,94 @@ __rte_experimental
  int rte_gpu_comm_get_flag_value(struct rte_gpu_comm_flag *devflag,
                 uint32_t *val);
  
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Create a communication list that can be used to share packets
+ * between CPU and device.
+ * Each element of the list contains:
+ *  - a packet list of RTE_GPU_COMM_LIST_PKTS_MAX elements
+ *  - number of packets in the list
+ *  - a status flag to communicate if the packet list is FREE,
+ *    READY to be processed, DONE with processing.
+ *
+ * The list is allocated in CPU-visible memory.
+ * At creation time, every list is in FREE state.
+ *
+ * @param dev_id
+ *   Reference device ID.
+ * @param num_comm_items
+ *   Number of items in the communication list.
+ *
+ * @return
+ *   A pointer to the allocated list, otherwise NULL and rte_errno is set:
+ *   - EINVAL if invalid input params
+ */
+__rte_experimental
+struct rte_gpu_comm_list *rte_gpu_comm_create_list(uint16_t dev_id,
+               uint32_t num_comm_items);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Destroy a communication list.
+ *
+ * @param comm_list
+ *   Communication list to be destroyed.
+ * @param num_comm_items
+ *   Number of items in the communication list.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - EINVAL if invalid input params
+ */
+__rte_experimental
+int rte_gpu_comm_destroy_list(struct rte_gpu_comm_list *comm_list,
+               uint32_t num_comm_items);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Populate the packets list of the communication item
+ * with info from a list of mbufs.
+ * Status flag of that packet list is set to READY.
+ *
+ * @param comm_list_item
+ *   Communication list item to fill.
+ * @param mbufs
+ *   List of mbufs.
+ * @param num_mbufs
+ *   Number of mbufs.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - EINVAL if invalid input params
+ *   - ENOTSUP if mbufs are chained (multiple segments)
+ */
+__rte_experimental
+int rte_gpu_comm_populate_list_pkts(struct rte_gpu_comm_list *comm_list_item,
+               struct rte_mbuf **mbufs, uint32_t num_mbufs);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Reset a communication list item to the original state.
+ * The status flag set to FREE and mbufs are returned to the pool.
+ *
+ * @param comm_list_item
+ *   Communication list item to reset.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - EINVAL if invalid input params
+ */
+__rte_experimental
+int rte_gpu_comm_cleanup_list(struct rte_gpu_comm_list *comm_list_item);
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/lib/gpudev/version.map b/lib/gpudev/version.map

index 5b953d490ca70ae8ca6b5bc6846bf93d2566b7ad..2e414c65cc13abfead14010b673aeaba886da614 100644 (file)
--- a/lib/gpudev/version.map
+++ b/lib/gpudev/version.map
@@ -6,9 +6,13 @@ EXPERIMENTAL {
         rte_gpu_callback_register;
         rte_gpu_callback_unregister;
         rte_gpu_close;
+       rte_gpu_comm_cleanup_list;
         rte_gpu_comm_create_flag;
+       rte_gpu_comm_create_list;
         rte_gpu_comm_destroy_flag;
+       rte_gpu_comm_destroy_list;
         rte_gpu_comm_get_flag_value;
+       rte_gpu_comm_populate_list_pkts;
         rte_gpu_comm_set_flag;
         rte_gpu_count_avail;
         rte_gpu_find_next;
author	Elena Agostini <eagostini@nvidia.com>
	Mon, 8 Nov 2021 18:58:04 +0000 (18:58 +0000)
committer	Thomas Monjalon <thomas@monjalon.net>
	Mon, 8 Nov 2021 16:20:53 +0000 (17:20 +0100)
app/test-gpudev/main.c		patch \| blob \| history
doc/guides/prog_guide/gpudev.rst		patch \| blob \| history
doc/guides/rel_notes/release_21_11.rst		patch \| blob \| history
lib/gpudev/gpudev.c		patch \| blob \| history
lib/gpudev/meson.build		patch \| blob \| history
lib/gpudev/rte_gpudev.h		patch \| blob \| history
lib/gpudev/version.map		patch \| blob \| history