event/dsw: add port load measurements

author Mattias Rönnblom <mattias.ronnblom@ericsson.com>

Tue, 18 Sep 2018 12:45:10 +0000 (14:45 +0200)

committer Jerin Jacob <jerin.jacob@caviumnetworks.com>

Mon, 1 Oct 2018 14:45:37 +0000 (16:45 +0200)
author Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Tue, 18 Sep 2018 12:45:10 +0000 (14:45 +0200)
committer Jerin Jacob <jerin.jacob@caviumnetworks.com>
Mon, 1 Oct 2018 14:45:37 +0000 (16:45 +0200)
diff --git a/drivers/event/dsw/dsw_evdev.c b/drivers/event/dsw/dsw_evdev.c

index 40a7435be6f463f69fefa5247ddcab46e278a6c1..bcfa17bab2c6f8c467194076e7800f349897604b 100644 (file)
--- a/drivers/event/dsw/dsw_evdev.c
+++ b/drivers/event/dsw/dsw_evdev.c
@@ -4,6 +4,7 @@
  
  #include <stdbool.h>
  
+#include <rte_cycles.h>
  #include <rte_eventdev_pmd.h>
  #include <rte_eventdev_pmd_vdev.h>
  #include <rte_random.h>
@@ -43,6 +44,11 @@ dsw_port_setup(struct rte_eventdev *dev, uint8_t port_id,
  
         port->in_ring = in_ring;
  
+       rte_atomic16_init(&port->load);
+
+       port->load_update_interval =
+               (DSW_LOAD_UPDATE_INTERVAL * rte_get_timer_hz()) / US_PER_S;
+
         dev->data->ports[port_id] = port;
  
         return 0;
@@ -240,11 +246,19 @@ static int
  dsw_start(struct rte_eventdev *dev)
  {
         struct dsw_evdev *dsw = dsw_pmd_priv(dev);
+       uint16_t i;
+       uint64_t now;
  
         rte_atomic32_init(&dsw->credits_on_loan);
  
         initial_flow_to_port_assignment(dsw);
  
+       now = rte_get_timer_cycles();
+       for (i = 0; i < dsw->num_ports; i++) {
+               dsw->ports[i].measurement_start = now;
+               dsw->ports[i].busy_start = now;
+       }
+
         return 0;
  }
  
diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h

index f8e94e4a4e67bd574c159c96aba9987ef4e5277c..a5399dda5e88ed687b6d937bd6bde73172b61349 100644 (file)
--- a/drivers/event/dsw/dsw_evdev.h
+++ b/drivers/event/dsw/dsw_evdev.h
@@ -36,6 +36,15 @@
   */
  #define DSW_PARALLEL_FLOWS (1024)
  
+/* 'Background tasks' are polling the control rings for *
+ *  migration-related messages, or flush the output buffer (so
+ *  buffered events doesn't linger too long). Shouldn't be too low,
+ *  since the system won't benefit from the 'batching' effects from
+ *  the output buffer, and shouldn't be too high, since it will make
+ *  buffered events linger too long in case the port goes idle.
+ */
+#define DSW_MAX_PORT_OPS_PER_BG_TASK (128)
+
  /* Avoid making small 'loans' from the central in-flight event credit
   * pool, to improve efficiency.
   */
@@ -50,6 +59,22 @@
   */
  #define DSW_IN_RING_SIZE (DSW_MAX_EVENTS)
  
+#define DSW_MAX_LOAD (INT16_MAX)
+#define DSW_LOAD_FROM_PERCENT(x) ((int16_t)(((x)*DSW_MAX_LOAD)/100))
+#define DSW_LOAD_TO_PERCENT(x) ((100*x)/DSW_MAX_LOAD)
+
+/* The thought behind keeping the load update interval shorter than
+ * the migration interval is that the load from newly migrated flows
+ * should 'show up' on the load measurement before new migrations are
+ * considered. This is to avoid having too many flows, from too many
+ * source ports, to be migrated too quickly to a lightly loaded port -
+ * in particular since this might cause the system to oscillate.
+ */
+#define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4)
+#define DSW_OLD_LOAD_WEIGHT (1)
+
+#define DSW_MIGRATION_INTERVAL (1000)
+
  struct dsw_port {
         uint16_t id;
  
@@ -71,10 +96,25 @@ struct dsw_port {
  
         uint16_t next_parallel_flow_id;
  
+       uint16_t ops_since_bg_task;
+
+       uint64_t last_bg;
+
+       /* For port load measurement. */
+       uint64_t next_load_update;
+       uint64_t load_update_interval;
+       uint64_t measurement_start;
+       uint64_t busy_start;
+       uint64_t busy_cycles;
+       uint64_t total_busy_cycles;
+
         uint16_t out_buffer_len[DSW_MAX_PORTS];
         struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER];
  
         struct rte_event_ring *in_ring __rte_cache_aligned;
+
+       /* Estimate of current port load. */
+       rte_atomic16_t load __rte_cache_aligned;
  } __rte_cache_aligned;
  
  struct dsw_queue {
diff --git a/drivers/event/dsw/dsw_event.c b/drivers/event/dsw/dsw_event.c

index 4a3af8ecd0989f1f945aaad9ed20218ef090fb6c..f326147c986ce26a1e14ec7991cf071bfcd4ff8e 100644 (file)
--- a/drivers/event/dsw/dsw_event.c
+++ b/drivers/event/dsw/dsw_event.c
@@ -7,6 +7,7 @@
  #include <stdbool.h>
  
  #include <rte_atomic.h>
+#include <rte_cycles.h>
  #include <rte_random.h>
  
  static bool
@@ -75,6 +76,70 @@ dsw_port_return_credits(struct dsw_evdev *dsw, struct dsw_port *port,
         }
  }
  
+static void
+dsw_port_load_record(struct dsw_port *port, unsigned int dequeued)
+{
+       if (dequeued > 0 && port->busy_start == 0)
+               /* work period begins */
+               port->busy_start = rte_get_timer_cycles();
+       else if (dequeued == 0 && port->busy_start > 0) {
+               /* work period ends */
+               uint64_t work_period =
+                       rte_get_timer_cycles() - port->busy_start;
+               port->busy_cycles += work_period;
+               port->busy_start = 0;
+       }
+}
+
+static int16_t
+dsw_port_load_close_period(struct dsw_port *port, uint64_t now)
+{
+       uint64_t passed = now - port->measurement_start;
+       uint64_t busy_cycles = port->busy_cycles;
+
+       if (port->busy_start > 0) {
+               busy_cycles += (now - port->busy_start);
+               port->busy_start = now;
+       }
+
+       int16_t load = (DSW_MAX_LOAD * busy_cycles) / passed;
+
+       port->measurement_start = now;
+       port->busy_cycles = 0;
+
+       port->total_busy_cycles += busy_cycles;
+
+       return load;
+}
+
+static void
+dsw_port_load_update(struct dsw_port *port, uint64_t now)
+{
+       int16_t old_load;
+       int16_t period_load;
+       int16_t new_load;
+
+       old_load = rte_atomic16_read(&port->load);
+
+       period_load = dsw_port_load_close_period(port, now);
+
+       new_load = (period_load + old_load*DSW_OLD_LOAD_WEIGHT) /
+               (DSW_OLD_LOAD_WEIGHT+1);
+
+       rte_atomic16_set(&port->load, new_load);
+}
+
+static void
+dsw_port_consider_load_update(struct dsw_port *port, uint64_t now)
+{
+       if (now < port->next_load_update)
+               return;
+
+       port->next_load_update = now + port->load_update_interval;
+
+       dsw_port_load_update(port, now);
+}
+
  static uint8_t
  dsw_schedule(struct dsw_evdev *dsw, uint8_t queue_id, uint16_t flow_hash)
  {
@@ -196,6 +261,39 @@ dsw_port_buffer_event(struct dsw_evdev *dsw, struct dsw_port *source_port,
         dsw_port_buffer_non_paused(dsw, source_port, dest_port_id, event);
  }
  
+static void
+dsw_port_note_op(struct dsw_port *port, uint16_t num_events)
+{
+       /* To pull the control ring reasonbly often on busy ports,
+        * each dequeued/enqueued event is considered an 'op' too.
+        */
+       port->ops_since_bg_task += (num_events+1);
+}
+
+static void
+dsw_port_flush_out_buffers(struct dsw_evdev *dsw, struct dsw_port *source_port);
+
+static void
+dsw_port_bg_process(struct dsw_evdev *dsw, struct dsw_port *port)
+{
+       if (unlikely(port->ops_since_bg_task >= DSW_MAX_PORT_OPS_PER_BG_TASK)) {
+               uint64_t now;
+
+               now = rte_get_timer_cycles();
+
+               port->last_bg = now;
+
+               /* Logic to avoid having events linger in the output
+                * buffer too long.
+                */
+               dsw_port_flush_out_buffers(dsw, port);
+
+               dsw_port_consider_load_update(port, now);
+
+               port->ops_since_bg_task = 0;
+       }
+}
+
  static void
  dsw_port_flush_out_buffers(struct dsw_evdev *dsw, struct dsw_port *source_port)
  {
@@ -225,6 +323,8 @@ dsw_event_enqueue_burst_generic(void *port, const struct rte_event events[],
         DSW_LOG_DP_PORT(DEBUG, source_port->id, "Attempting to enqueue %d "
                         "events to port %d.\n", events_len, source_port->id);
  
+       dsw_port_bg_process(dsw, source_port);
+
         /* XXX: For performance (=ring efficiency) reasons, the
          * scheduler relies on internal non-ring buffers instead of
          * immediately sending the event to the destination ring. For
@@ -238,6 +338,7 @@ dsw_event_enqueue_burst_generic(void *port, const struct rte_event events[],
          * considered.
          */
         if (unlikely(events_len == 0)) {
+               dsw_port_note_op(source_port, DSW_MAX_PORT_OPS_PER_BG_TASK);
                 dsw_port_flush_out_buffers(dsw, source_port);
                 return 0;
         }
@@ -245,6 +346,8 @@ dsw_event_enqueue_burst_generic(void *port, const struct rte_event events[],
         if (unlikely(events_len > source_port->enqueue_depth))
                 events_len = source_port->enqueue_depth;
  
+       dsw_port_note_op(source_port, events_len);
+
         if (!op_types_known)
                 for (i = 0; i < events_len; i++) {
                         switch (events[i].op) {
@@ -337,6 +440,8 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
  
         source_port->pending_releases = 0;
  
+       dsw_port_bg_process(dsw, source_port);
+
         if (unlikely(num > source_port->dequeue_depth))
                 num = source_port->dequeue_depth;
  
@@ -344,6 +449,10 @@ dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
  
         source_port->pending_releases = dequeued;
  
+       dsw_port_load_record(source_port, dequeued);
+
+       dsw_port_note_op(source_port, dequeued);
+
         if (dequeued > 0) {
                 DSW_LOG_DP_PORT(DEBUG, source_port->id, "Dequeued %d events.\n",
                                 dequeued);
author	Mattias Rönnblom <mattias.ronnblom@ericsson.com>
	Tue, 18 Sep 2018 12:45:10 +0000 (14:45 +0200)
committer	Jerin Jacob <jerin.jacob@caviumnetworks.com>
	Mon, 1 Oct 2018 14:45:37 +0000 (16:45 +0200)
drivers/event/dsw/dsw_evdev.c		patch \| blob \| history
drivers/event/dsw/dsw_evdev.h		patch \| blob \| history
drivers/event/dsw/dsw_event.c		patch \| blob \| history