]> git.droids-corp.org - dpdk.git/commitdiff
eal: use callbacks for power monitoring comparison
authorAnatoly Burakov <anatoly.burakov@intel.com>
Fri, 9 Jul 2021 16:08:10 +0000 (16:08 +0000)
committerDavid Marchand <david.marchand@redhat.com>
Fri, 9 Jul 2021 19:13:13 +0000 (21:13 +0200)
Previously, the semantics of power monitor were such that we were
checking current value against the expected value, and if they matched,
then the sleep was aborted. This is somewhat inflexible, because it only
allowed us to check for a specific value in a specific way.

This commit replaces the comparison with a user callback mechanism, so
that any PMD (or other code) using `rte_power_monitor()` can define
their own comparison semantics and decision making on how to detect the
need to abort the entering of power optimized state.

Existing implementations are adjusted to follow the new semantics.

Suggested-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Tested-by: David Hunt <david.hunt@intel.com>
Acked-by: Timothy McDaniel <timothy.mcdaniel@intel.com>
doc/guides/rel_notes/release_21_08.rst
drivers/event/dlb2/dlb2.c
drivers/net/i40e/i40e_rxtx.c
drivers/net/iavf/iavf_rxtx.c
drivers/net/ice/ice_rxtx.c
drivers/net/ixgbe/ixgbe_rxtx.c
drivers/net/mlx5/mlx5_rx.c
lib/eal/include/generic/rte_power_intrinsics.h
lib/eal/x86/rte_power_intrinsics.c

index 476822b47f71da477f835c04bd451fe31efd6345..fa6cbae2030535c5c4daa869a283e9ec30dee86d 100644 (file)
@@ -144,6 +144,9 @@ API Changes
 * eal: ``rte_strscpy`` sets ``rte_errno`` to ``E2BIG`` in case of string
   truncation.
 
+* eal: ``rte_power_monitor`` and the ``rte_power_monitor_cond`` struct changed
+  to use a callback mechanism.
+
 
 ABI Changes
 -----------
index eca183753fa92548aa5a1216ec6cc897d8627563..252bbd8d5e2e88bd93bf94e608364a0f531f7418 100644 (file)
@@ -3154,6 +3154,16 @@ dlb2_port_credits_inc(struct dlb2_port *qm_port, int num)
        }
 }
 
+#define CLB_MASK_IDX 0
+#define CLB_VAL_IDX 1
+static int
+dlb2_monitor_callback(const uint64_t val,
+               const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
+{
+       /* abort if the value matches */
+       return (val & opaque[CLB_MASK_IDX]) == opaque[CLB_VAL_IDX] ? -1 : 0;
+}
+
 static inline int
 dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
                  struct dlb2_eventdev_port *ev_port,
@@ -3194,8 +3204,11 @@ dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
                        expected_value = 0;
 
                pmc.addr = monitor_addr;
-               pmc.val = expected_value;
-               pmc.mask = qe_mask.raw_qe[1];
+               /* store expected value and comparison mask in opaque data */
+               pmc.opaque[CLB_VAL_IDX] = expected_value;
+               pmc.opaque[CLB_MASK_IDX] = qe_mask.raw_qe[1];
+               /* set up callback */
+               pmc.fn = dlb2_monitor_callback;
                pmc.size = sizeof(uint64_t);
 
                rte_power_monitor(&pmc, timeout + start_ticks);
index e518409fe598fbc38b4fdeb1852de5a168186607..8489f91f1dd9178e272f5bbec90fb2be7a7dba9f 100644 (file)
 #define I40E_TX_OFFLOAD_SIMPLE_NOTSUP_MASK \
                (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_SIMPLE_SUP_MASK)
 
+static int
+i40e_monitor_callback(const uint64_t value,
+               const uint64_t arg[RTE_POWER_MONITOR_OPAQUE_SZ] __rte_unused)
+{
+       const uint64_t m = rte_cpu_to_le_64(1 << I40E_RX_DESC_STATUS_DD_SHIFT);
+       /*
+        * we expect the DD bit to be set to 1 if this descriptor was already
+        * written to.
+        */
+       return (value & m) == m ? -1 : 0;
+}
+
 int
 i40e_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
@@ -93,12 +105,8 @@ i40e_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
        /* watch for changes in status bit */
        pmc->addr = &rxdp->wb.qword1.status_error_len;
 
-       /*
-        * we expect the DD bit to be set to 1 if this descriptor was already
-        * written to.
-        */
-       pmc->val = rte_cpu_to_le_64(1 << I40E_RX_DESC_STATUS_DD_SHIFT);
-       pmc->mask = rte_cpu_to_le_64(1 << I40E_RX_DESC_STATUS_DD_SHIFT);
+       /* comparison callback */
+       pmc->fn = i40e_monitor_callback;
 
        /* registers are 64-bit */
        pmc->size = sizeof(uint64_t);
index f817fbc49b4ba2ac2f1351002e31f75faed07cf9..d61b32fcee7e2097110fed3244f29eef6bcdeabe 100644 (file)
@@ -57,6 +57,18 @@ iavf_proto_xtr_type_to_rxdid(uint8_t flex_type)
                                rxdid_map[flex_type] : IAVF_RXDID_COMMS_OVS_1;
 }
 
+static int
+iavf_monitor_callback(const uint64_t value,
+               const uint64_t arg[RTE_POWER_MONITOR_OPAQUE_SZ] __rte_unused)
+{
+       const uint64_t m = rte_cpu_to_le_64(1 << IAVF_RX_DESC_STATUS_DD_SHIFT);
+       /*
+        * we expect the DD bit to be set to 1 if this descriptor was already
+        * written to.
+        */
+       return (value & m) == m ? -1 : 0;
+}
+
 int
 iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
@@ -69,12 +81,8 @@ iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
        /* watch for changes in status bit */
        pmc->addr = &rxdp->wb.qword1.status_error_len;
 
-       /*
-        * we expect the DD bit to be set to 1 if this descriptor was already
-        * written to.
-        */
-       pmc->val = rte_cpu_to_le_64(1 << IAVF_RX_DESC_STATUS_DD_SHIFT);
-       pmc->mask = rte_cpu_to_le_64(1 << IAVF_RX_DESC_STATUS_DD_SHIFT);
+       /* comparison callback */
+       pmc->fn = iavf_monitor_callback;
 
        /* registers are 64-bit */
        pmc->size = sizeof(uint64_t);
index 3f6e7359844b8214627957520fcf6711836b5820..5d7ab4f047ee6730e5a0a51d06c03a0ca67d584c 100644 (file)
@@ -27,6 +27,18 @@ uint64_t rte_net_ice_dynflag_proto_xtr_ipv6_flow_mask;
 uint64_t rte_net_ice_dynflag_proto_xtr_tcp_mask;
 uint64_t rte_net_ice_dynflag_proto_xtr_ip_offset_mask;
 
+static int
+ice_monitor_callback(const uint64_t value,
+               const uint64_t arg[RTE_POWER_MONITOR_OPAQUE_SZ] __rte_unused)
+{
+       const uint64_t m = rte_cpu_to_le_16(1 << ICE_RX_FLEX_DESC_STATUS0_DD_S);
+       /*
+        * we expect the DD bit to be set to 1 if this descriptor was already
+        * written to.
+        */
+       return (value & m) == m ? -1 : 0;
+}
+
 int
 ice_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
@@ -39,12 +51,8 @@ ice_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
        /* watch for changes in status bit */
        pmc->addr = &rxdp->wb.status_error0;
 
-       /*
-        * we expect the DD bit to be set to 1 if this descriptor was already
-        * written to.
-        */
-       pmc->val = rte_cpu_to_le_16(1 << ICE_RX_FLEX_DESC_STATUS0_DD_S);
-       pmc->mask = rte_cpu_to_le_16(1 << ICE_RX_FLEX_DESC_STATUS0_DD_S);
+       /* comparison callback */
+       pmc->fn = ice_monitor_callback;
 
        /* register is 16-bit */
        pmc->size = sizeof(uint16_t);
index d69f36e9777095de9948e9c2ae50f33f1f2dde7c..c814a28cb49a0d9eed4f7630ee4544dd2eca993d 100644 (file)
@@ -1369,6 +1369,18 @@ const uint32_t
                RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
 };
 
+static int
+ixgbe_monitor_callback(const uint64_t value,
+               const uint64_t arg[RTE_POWER_MONITOR_OPAQUE_SZ] __rte_unused)
+{
+       const uint64_t m = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD);
+       /*
+        * we expect the DD bit to be set to 1 if this descriptor was already
+        * written to.
+        */
+       return (value & m) == m ? -1 : 0;
+}
+
 int
 ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
@@ -1381,12 +1393,8 @@ ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
        /* watch for changes in status bit */
        pmc->addr = &rxdp->wb.upper.status_error;
 
-       /*
-        * we expect the DD bit to be set to 1 if this descriptor was already
-        * written to.
-        */
-       pmc->val = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD);
-       pmc->mask = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD);
+       /* comparison callback */
+       pmc->fn = ixgbe_monitor_callback;
 
        /* the registers are 32-bit */
        pmc->size = sizeof(uint32_t);
index 777a1d6e451a623893186dd15f248497f077eada..8d47637892e48a2cd157c962e0928bf380030ae3 100644 (file)
@@ -269,6 +269,18 @@ mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
        return rx_queue_count(rxq);
 }
 
+#define CLB_VAL_IDX 0
+#define CLB_MSK_IDX 1
+static int
+mlx5_monitor_callback(const uint64_t value,
+               const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
+{
+       const uint64_t m = opaque[CLB_MSK_IDX];
+       const uint64_t v = opaque[CLB_VAL_IDX];
+
+       return (value & m) == v ? -1 : 0;
+}
+
 int mlx5_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
        struct mlx5_rxq_data *rxq = rx_queue;
@@ -282,8 +294,9 @@ int mlx5_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
                return -rte_errno;
        }
        pmc->addr = &cqe->op_own;
-       pmc->val =  !!idx;
-       pmc->mask = MLX5_CQE_OWNER_MASK;
+       pmc->opaque[CLB_VAL_IDX] = !!idx;
+       pmc->opaque[CLB_MSK_IDX] = MLX5_CQE_OWNER_MASK;
+       pmc->fn = mlx5_monitor_callback;
        pmc->size = sizeof(uint8_t);
        return 0;
 }
index dddca3d41ce1efbb970530d2f05896487982f3ae..f2b4f6a10b476a4d67871fc9115e10cba268c856 100644 (file)
  * which are architecture-dependent.
  */
 
+/** Size of the opaque data in monitor condition */
+#define RTE_POWER_MONITOR_OPAQUE_SZ 4
+
+/**
+ * Callback definition for monitoring conditions. Callbacks with this signature
+ * will be used by `rte_power_monitor()` to check if the entering of power
+ * optimized state should be aborted.
+ *
+ * @param val
+ *   The value read from memory.
+ * @param opaque
+ *   Callback-specific data.
+ *
+ * @return
+ *   0 if entering of power optimized state should proceed
+ *   -1 if entering of power optimized state should be aborted
+ */
+typedef int (*rte_power_monitor_clb_t)(const uint64_t val,
+               const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ]);
+
 struct rte_power_monitor_cond {
        volatile void *addr;  /**< Address to monitor for changes */
-       uint64_t val;         /**< If the `mask` is non-zero, location pointed
-                              *   to by `addr` will be read and compared
-                              *   against this value.
-                              */
-       uint64_t mask;   /**< 64-bit mask to extract value read from `addr` */
-       uint8_t size;    /**< Data size (in bytes) that will be used to compare
-                         *   expected value (`val`) with data read from the
+       uint8_t size;    /**< Data size (in bytes) that will be read from the
                          *   monitored memory location (`addr`). Can be 1, 2,
                          *   4, or 8. Supplying any other value will result in
                          *   an error.
                          */
+       rte_power_monitor_clb_t fn; /**< Callback to be used to check if
+                                    *   entering power optimized state should
+                                    *   be aborted.
+                                    */
+       uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ];
+       /**< Callback-specific data */
 };
 
 /**
index 39ea9fdecdafe7ffe77391abb659c7a80691c637..66fea28897a9fd6009d5cf6fa34d94cd3aa76d5d 100644 (file)
@@ -76,6 +76,7 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc,
        const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
        const unsigned int lcore_id = rte_lcore_id();
        struct power_wait_status *s;
+       uint64_t cur_value;
 
        /* prevent user from running this instruction if it's not supported */
        if (!wait_supported)
@@ -91,6 +92,9 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc,
        if (__check_val_size(pmc->size) < 0)
                return -EINVAL;
 
+       if (pmc->fn == NULL)
+               return -EINVAL;
+
        s = &wait_status[lcore_id];
 
        /* update sleep address */
@@ -110,16 +114,11 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc,
        /* now that we've put this address into monitor, we can unlock */
        rte_spinlock_unlock(&s->lock);
 
-       /* if we have a comparison mask, we might not need to sleep at all */
-       if (pmc->mask) {
-               const uint64_t cur_value = __get_umwait_val(
-                               pmc->addr, pmc->size);
-               const uint64_t masked = cur_value & pmc->mask;
+       cur_value = __get_umwait_val(pmc->addr, pmc->size);
 
-               /* if the masked value is already matching, abort */
-               if (masked == pmc->val)
-                       goto end;
-       }
+       /* check if callback indicates we should abort */
+       if (pmc->fn(cur_value, pmc->opaque) != 0)
+               goto end;
 
        /* execute UMWAIT */
        asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;"