eal: add monitor wakeup function

author Anatoly Burakov <anatoly.burakov@intel.com>

Thu, 14 Jan 2021 14:46:07 +0000 (14:46 +0000)

committer Thomas Monjalon <thomas@monjalon.net>

Mon, 18 Jan 2021 22:59:42 +0000 (23:59 +0100)
author Anatoly Burakov <anatoly.burakov@intel.com>
Thu, 14 Jan 2021 14:46:07 +0000 (14:46 +0000)
committer Thomas Monjalon <thomas@monjalon.net>
Mon, 18 Jan 2021 22:59:42 +0000 (23:59 +0100)
diff --git a/lib/librte_eal/arm/rte_power_intrinsics.c b/lib/librte_eal/arm/rte_power_intrinsics.c

index 8d271dc0c12ac6022745af73c5e0a2100a44c229..e83f04072aae3d726646aae2e1dd4fd6bd52227d 100644 (file)
--- a/lib/librte_eal/arm/rte_power_intrinsics.c
+++ b/lib/librte_eal/arm/rte_power_intrinsics.c
@@ -27,3 +27,14 @@ rte_power_pause(const uint64_t tsc_timestamp)
  
         return -ENOTSUP;
  }
+
+/**
+ * This function is not supported on ARM.
+ */
+int
+rte_power_monitor_wakeup(const unsigned int lcore_id)
+{
+       RTE_SET_USED(lcore_id);
+
+       return -ENOTSUP;
+}
diff --git a/lib/librte_eal/include/generic/rte_power_intrinsics.h b/lib/librte_eal/include/generic/rte_power_intrinsics.h

index 85343bc9eb266160dcfdd4d98ba6ab19f22d0cba..6109d28faa06dfd5f9116c12c99b729c04b9638d 100644 (file)
--- a/lib/librte_eal/include/generic/rte_power_intrinsics.h
+++ b/lib/librte_eal/include/generic/rte_power_intrinsics.h
@@ -62,6 +62,22 @@ __rte_experimental
  int rte_power_monitor(const struct rte_power_monitor_cond *pmc,
                 const uint64_t tsc_timestamp);
  
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Wake up a specific lcore that is in a power optimized state and is monitoring
+ * an address.
+ *
+ * @note This function will *not* wake up a core that is in a power optimized
+ *   state due to calling `rte_power_pause`.
+ *
+ * @param lcore_id
+ *   Lcore ID of a sleeping thread.
+ */
+__rte_experimental
+int rte_power_monitor_wakeup(const unsigned int lcore_id);
+
  /**
   * @warning
   * @b EXPERIMENTAL: this API may change without prior notice
diff --git a/lib/librte_eal/ppc/rte_power_intrinsics.c b/lib/librte_eal/ppc/rte_power_intrinsics.c

index f7862ea324eec08e8ae24b84e9c1e343eeda51fa..7fc9586da7b06282a05ec01eca6fd26f31340683 100644 (file)
--- a/lib/librte_eal/ppc/rte_power_intrinsics.c
+++ b/lib/librte_eal/ppc/rte_power_intrinsics.c
@@ -27,3 +27,14 @@ rte_power_pause(const uint64_t tsc_timestamp)
  
         return -ENOTSUP;
  }
+
+/**
+ * This function is not supported on PPC64.
+ */
+int
+rte_power_monitor_wakeup(const unsigned int lcore_id)
+{
+       RTE_SET_USED(lcore_id);
+
+       return -ENOTSUP;
+}
diff --git a/lib/librte_eal/version.map b/lib/librte_eal/version.map

index 1fcd1d3bed5c4ec055b52f54e313866b4953c049..fce90a112f3b3912374b999112c0b38f436d43c7 100644 (file)
--- a/lib/librte_eal/version.map
+++ b/lib/librte_eal/version.map
@@ -406,6 +406,7 @@ EXPERIMENTAL {
  
         # added in 21.02
         rte_power_monitor;
+       rte_power_monitor_wakeup;
         rte_power_pause;
         rte_thread_tls_key_create;
         rte_thread_tls_key_delete;
diff --git a/lib/librte_eal/x86/rte_power_intrinsics.c b/lib/librte_eal/x86/rte_power_intrinsics.c

index 29247d8638d4307089b31f5b23dfdbec9199b453..af3ae3237c1e93e25bb87b2f6d47f255d1ac1242 100644 (file)
--- a/lib/librte_eal/x86/rte_power_intrinsics.c
+++ b/lib/librte_eal/x86/rte_power_intrinsics.c
@@ -2,8 +2,31 @@
   * Copyright(c) 2020 Intel Corporation
   */
  
+#include <rte_common.h>
+#include <rte_lcore.h>
+#include <rte_spinlock.h>
+
  #include "rte_power_intrinsics.h"
  
+/*
+ * Per-lcore structure holding current status of C0.2 sleeps.
+ */
+static struct power_wait_status {
+       rte_spinlock_t lock;
+       volatile void *monitor_addr; /**< NULL if not currently sleeping */
+} __rte_cache_aligned wait_status[RTE_MAX_LCORE];
+
+static inline void
+__umwait_wakeup(volatile void *addr)
+{
+       uint64_t val;
+
+       /* trigger a write but don't change the value */
+       val = __atomic_load_n((volatile uint64_t *)addr, __ATOMIC_RELAXED);
+       __atomic_compare_exchange_n((volatile uint64_t *)addr, &val, val, 0,
+                       __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+}
+
  static bool wait_supported;
  
  static inline uint64_t
@@ -51,17 +74,29 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc,
  {
         const uint32_t tsc_l = (uint32_t)tsc_timestamp;
         const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
+       const unsigned int lcore_id = rte_lcore_id();
+       struct power_wait_status *s;
  
         /* prevent user from running this instruction if it's not supported */
         if (!wait_supported)
                 return -ENOTSUP;
  
+       /* prevent non-EAL thread from using this API */
+       if (lcore_id >= RTE_MAX_LCORE)
+               return -EINVAL;
+
         if (pmc == NULL)
                 return -EINVAL;
  
         if (__check_val_size(pmc->data_sz) < 0)
                 return -EINVAL;
  
+       s = &wait_status[lcore_id];
+
+       /* update sleep address */
+       rte_spinlock_lock(&s->lock);
+       s->monitor_addr = pmc->addr;
+
         /*
          * we're using raw byte codes for now as only the newest compiler
          * versions support this instruction natively.
@@ -72,6 +107,10 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc,
                         :
                         : "D"(pmc->addr));
  
+       /* now that we've put this address into monitor, we can unlock */
+       rte_spinlock_unlock(&s->lock);
+
+       /* if we have a comparison mask, we might not need to sleep at all */
         if (pmc->mask) {
                 const uint64_t cur_value = __get_umwait_val(
                                 pmc->addr, pmc->data_sz);
@@ -79,14 +118,21 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc,
  
                 /* if the masked value is already matching, abort */
                 if (masked == pmc->val)
-                       return 0;
+                       goto end;
         }
+
         /* execute UMWAIT */
         asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;"
                         : /* ignore rflags */
                         : "D"(0), /* enter C0.2 */
                           "a"(tsc_l), "d"(tsc_h));
  
+end:
+       /* erase sleep address */
+       rte_spinlock_lock(&s->lock);
+       s->monitor_addr = NULL;
+       rte_spinlock_unlock(&s->lock);
+
         return 0;
  }
  
@@ -122,3 +168,48 @@ RTE_INIT(rte_power_intrinsics_init) {
         if (i.power_monitor && i.power_pause)
                 wait_supported = 1;
  }
+
+int
+rte_power_monitor_wakeup(const unsigned int lcore_id)
+{
+       struct power_wait_status *s;
+
+       /* prevent user from running this instruction if it's not supported */
+       if (!wait_supported)
+               return -ENOTSUP;
+
+       /* prevent buffer overrun */
+       if (lcore_id >= RTE_MAX_LCORE)
+               return -EINVAL;
+
+       s = &wait_status[lcore_id];
+
+       /*
+        * There is a race condition between sleep, wakeup and locking, but we
+        * don't need to handle it.
+        *
+        * Possible situations:
+        *
+        * 1. T1 locks, sets address, unlocks
+        * 2. T2 locks, triggers wakeup, unlocks
+        * 3. T1 sleeps
+        *
+        * In this case, because T1 has already set the address for monitoring,
+        * we will wake up immediately even if T2 triggers wakeup before T1
+        * goes to sleep.
+        *
+        * 1. T1 locks, sets address, unlocks, goes to sleep, and wakes up
+        * 2. T2 locks, triggers wakeup, and unlocks
+        * 3. T1 locks, erases address, and unlocks
+        *
+        * In this case, since we've already woken up, the "wakeup" was
+        * unneeded, and since T1 is still waiting on T2 releasing the lock, the
+        * wakeup address is still valid so it's perfectly safe to write it.
+        */
+       rte_spinlock_lock(&s->lock);
+       if (s->monitor_addr != NULL)
+               __umwait_wakeup(s->monitor_addr);
+       rte_spinlock_unlock(&s->lock);
+
+       return 0;
+}
author	Anatoly Burakov <anatoly.burakov@intel.com>
	Thu, 14 Jan 2021 14:46:07 +0000 (14:46 +0000)
committer	Thomas Monjalon <thomas@monjalon.net>
	Mon, 18 Jan 2021 22:59:42 +0000 (23:59 +0100)
lib/librte_eal/arm/rte_power_intrinsics.c		patch \| blob \| history
lib/librte_eal/include/generic/rte_power_intrinsics.h		patch \| blob \| history
lib/librte_eal/ppc/rte_power_intrinsics.c		patch \| blob \| history
lib/librte_eal/version.map		patch \| blob \| history
lib/librte_eal/x86/rte_power_intrinsics.c		patch \| blob \| history