event/dsw: reduce max flows to speed up load balancing

[dpdk.git] / drivers / event / dsw / dsw_evdev.h
diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h

index a5399dda5e88ed687b6d937bd6bde73172b61349..dc44bce81ae8bfee0503103dc1a2ede08d8dbd90 100644 (file)
--- a/drivers/event/dsw/dsw_evdev.h
+++ b/drivers/event/dsw/dsw_evdev.h
@@ -10,7 +10,6 @@
  
  #define DSW_PMD_NAME RTE_STR(event_dsw)
  
-/* Code changes are required to allow more ports. */
  #define DSW_MAX_PORTS (64)
  #define DSW_MAX_PORT_DEQUEUE_DEPTH (128)
  #define DSW_MAX_PORT_ENQUEUE_DEPTH (128)
@@ -20,8 +19,20 @@
  
  #define DSW_MAX_EVENTS (16384)
  
-/* Code changes are required to allow more flows than 32k. */
-#define DSW_MAX_FLOWS_BITS (15)
+/* Multiple 24-bit flow ids will map to the same DSW-level flow. The
+ * number of DSW flows should be high enough make it unlikely that
+ * flow ids of several large flows hash to the same DSW-level flow.
+ * Such collisions will limit parallism and thus the number of cores
+ * that may be utilized. However, configuring a large number of DSW
+ * flows might potentially, depending on traffic and actual
+ * application flow id value range, result in each such DSW-level flow
+ * being very small. The effect of migrating such flows will be small,
+ * in terms amount of processing load redistributed. This will in turn
+ * reduce the load balancing speed, since flow migration rate has an
+ * upper limit. Code changes are required to allow > 32k DSW-level
+ * flows.
+ */
+#define DSW_MAX_FLOWS_BITS (13)
  #define DSW_MAX_FLOWS (1<<(DSW_MAX_FLOWS_BITS))
  #define DSW_MAX_FLOWS_MASK (DSW_MAX_FLOWS-1)
  
@@ -73,7 +84,48 @@
  #define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4)
  #define DSW_OLD_LOAD_WEIGHT (1)
  
+/* The minimum time (in us) between two flow migrations. What puts an
+ * upper limit on the actual migration rate is primarily the pace in
+ * which the ports send and receive control messages, which in turn is
+ * largely a function of how much cycles are spent the processing of
+ * an event burst.
+ */
  #define DSW_MIGRATION_INTERVAL (1000)
+#define DSW_MIN_SOURCE_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(70))
+#define DSW_MAX_TARGET_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(95))
+
+#define DSW_MAX_EVENTS_RECORDED (128)
+
+/* Only one outstanding migration per port is allowed */
+#define DSW_MAX_PAUSED_FLOWS (DSW_MAX_PORTS)
+
+/* Enough room for paus request/confirm and unpaus request/confirm for
+ * all possible senders.
+ */
+#define DSW_CTL_IN_RING_SIZE ((DSW_MAX_PORTS-1)*4)
+
+/* With DSW_SORT_DEQUEUED enabled, the scheduler will, at the point of
+ * dequeue(), arrange events so that events with the same flow id on
+ * the same queue forms a back-to-back "burst", and also so that such
+ * bursts of different flow ids, but on the same queue, also come
+ * consecutively. All this in an attempt to improve data and
+ * instruction cache usage for the application, at the cost of a
+ * scheduler overhead increase.
+ */
+
+/* #define DSW_SORT_DEQUEUED */
+
+struct dsw_queue_flow {
+       uint8_t queue_id;
+       uint16_t flow_hash;
+};
+
+enum dsw_migration_state {
+       DSW_MIGRATION_STATE_IDLE,
+       DSW_MIGRATION_STATE_PAUSING,
+       DSW_MIGRATION_STATE_FORWARDING,
+       DSW_MIGRATION_STATE_UNPAUSING
+};
  
  struct dsw_port {
         uint16_t id;
@@ -98,6 +150,7 @@ struct dsw_port {
  
         uint16_t ops_since_bg_task;
  
+       /* most recent 'background' processing */
         uint64_t last_bg;
  
         /* For port load measurement. */
@@ -108,11 +161,54 @@ struct dsw_port {
         uint64_t busy_cycles;
         uint64_t total_busy_cycles;
  
+       /* For the ctl interface and flow migration mechanism. */
+       uint64_t next_migration;
+       uint64_t migration_interval;
+       enum dsw_migration_state migration_state;
+
+       uint64_t migration_start;
+       uint64_t migrations;
+       uint64_t migration_latency;
+
+       uint8_t migration_target_port_id;
+       struct dsw_queue_flow migration_target_qf;
+       uint8_t cfm_cnt;
+
+       uint16_t paused_flows_len;
+       struct dsw_queue_flow paused_flows[DSW_MAX_PAUSED_FLOWS];
+
+       /* In a very contrived worst case all inflight events can be
+        * laying around paused here.
+        */
+       uint16_t paused_events_len;
+       struct rte_event paused_events[DSW_MAX_EVENTS];
+
+       uint16_t seen_events_len;
+       uint16_t seen_events_idx;
+       struct dsw_queue_flow seen_events[DSW_MAX_EVENTS_RECORDED];
+
+       uint64_t new_enqueued;
+       uint64_t forward_enqueued;
+       uint64_t release_enqueued;
+       uint64_t queue_enqueued[DSW_MAX_QUEUES];
+
+       uint64_t dequeued;
+       uint64_t queue_dequeued[DSW_MAX_QUEUES];
+
         uint16_t out_buffer_len[DSW_MAX_PORTS];
         struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER];
  
+       uint16_t in_buffer_len;
+       uint16_t in_buffer_start;
+       /* This buffer may contain events that were read up from the
+        * in_ring during the flow migration process.
+        */
+       struct rte_event in_buffer[DSW_MAX_EVENTS];
+
         struct rte_event_ring *in_ring __rte_cache_aligned;
  
+       struct rte_ring *ctl_in_ring __rte_cache_aligned;
+
         /* Estimate of current port load. */
         rte_atomic16_t load __rte_cache_aligned;
  } __rte_cache_aligned;
@@ -137,6 +233,17 @@ struct dsw_evdev {
         rte_atomic32_t credits_on_loan __rte_cache_aligned;
  };
  
+#define DSW_CTL_PAUS_REQ (0)
+#define DSW_CTL_UNPAUS_REQ (1)
+#define DSW_CTL_CFM (2)
+
+struct dsw_ctl_msg {
+       uint8_t type;
+       uint8_t originating_port_id;
+       uint8_t queue_id;
+       uint16_t flow_hash;
+} __rte_aligned(4);
+
  uint16_t dsw_event_enqueue(void *port, const struct rte_event *event);
  uint16_t dsw_event_enqueue_burst(void *port,
                                  const struct rte_event events[],
@@ -152,6 +259,17 @@ uint16_t dsw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait);
  uint16_t dsw_event_dequeue_burst(void *port, struct rte_event *events,
                                  uint16_t num, uint64_t wait);
  
+int dsw_xstats_get_names(const struct rte_eventdev *dev,
+                        enum rte_event_dev_xstats_mode mode,
+                        uint8_t queue_port_id,
+                        struct rte_event_dev_xstats_name *xstats_names,
+                        unsigned int *ids, unsigned int size);
+int dsw_xstats_get(const struct rte_eventdev *dev,
+                  enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id,
+                  const unsigned int ids[], uint64_t values[], unsigned int n);
+uint64_t dsw_xstats_get_by_name(const struct rte_eventdev *dev,
+                               const char *name, unsigned int *id);
+
  static inline struct dsw_evdev *
  dsw_pmd_priv(const struct rte_eventdev *eventdev)
  {