X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fevent%2Fdsw%2Fdsw_evdev.h;h=dc28ab125d8ed1d7c6142a3ac95e91c78c5f2bc1;hb=fb69492c042b96625fa8f5c4e49a326db22cab7c;hp=f8e94e4a4e67bd574c159c96aba9987ef4e5277c;hpb=1c8e3caa3bfba26f3b22ec52baf33224d08d690c;p=dpdk.git

diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h
index f8e94e4a4e..dc28ab125d 100644
--- a/drivers/event/dsw/dsw_evdev.h
+++ b/drivers/event/dsw/dsw_evdev.h
@@ -36,6 +36,15 @@
  */
 #define DSW_PARALLEL_FLOWS (1024)
 
+/* 'Background tasks' are polling the control rings for *
+ *  migration-related messages, or flush the output buffer (so
+ *  buffered events doesn't linger too long). Shouldn't be too low,
+ *  since the system won't benefit from the 'batching' effects from
+ *  the output buffer, and shouldn't be too high, since it will make
+ *  buffered events linger too long in case the port goes idle.
+ */
+#define DSW_MAX_PORT_OPS_PER_BG_TASK (128)
+
 /* Avoid making small 'loans' from the central in-flight event credit
  * pool, to improve efficiency.
  */
@@ -50,6 +59,63 @@
  */
 #define DSW_IN_RING_SIZE (DSW_MAX_EVENTS)
 
+#define DSW_MAX_LOAD (INT16_MAX)
+#define DSW_LOAD_FROM_PERCENT(x) ((int16_t)(((x)*DSW_MAX_LOAD)/100))
+#define DSW_LOAD_TO_PERCENT(x) ((100*x)/DSW_MAX_LOAD)
+
+/* The thought behind keeping the load update interval shorter than
+ * the migration interval is that the load from newly migrated flows
+ * should 'show up' on the load measurement before new migrations are
+ * considered. This is to avoid having too many flows, from too many
+ * source ports, to be migrated too quickly to a lightly loaded port -
+ * in particular since this might cause the system to oscillate.
+ */
+#define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4)
+#define DSW_OLD_LOAD_WEIGHT (1)
+
+/* The minimum time (in us) between two flow migrations. What puts an
+ * upper limit on the actual migration rate is primarily the pace in
+ * which the ports send and receive control messages, which in turn is
+ * largely a function of how much cycles are spent the processing of
+ * an event burst.
+ */
+#define DSW_MIGRATION_INTERVAL (1000)
+#define DSW_MIN_SOURCE_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(70))
+#define DSW_MAX_TARGET_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(95))
+
+#define DSW_MAX_EVENTS_RECORDED (128)
+
+/* Only one outstanding migration per port is allowed */
+#define DSW_MAX_PAUSED_FLOWS (DSW_MAX_PORTS)
+
+/* Enough room for paus request/confirm and unpaus request/confirm for
+ * all possible senders.
+ */
+#define DSW_CTL_IN_RING_SIZE ((DSW_MAX_PORTS-1)*4)
+
+/* With DSW_SORT_DEQUEUED enabled, the scheduler will, at the point of
+ * dequeue(), arrange events so that events with the same flow id on
+ * the same queue forms a back-to-back "burst", and also so that such
+ * bursts of different flow ids, but on the same queue, also come
+ * consecutively. All this in an attempt to improve data and
+ * instruction cache usage for the application, at the cost of a
+ * scheduler overhead increase.
+ */
+
+/* #define DSW_SORT_DEQUEUED */
+
+struct dsw_queue_flow {
+	uint8_t queue_id;
+	uint16_t flow_hash;
+};
+
+enum dsw_migration_state {
+	DSW_MIGRATION_STATE_IDLE,
+	DSW_MIGRATION_STATE_PAUSING,
+	DSW_MIGRATION_STATE_FORWARDING,
+	DSW_MIGRATION_STATE_UNPAUSING
+};
+
 struct dsw_port {
 	uint16_t id;
 
@@ -71,10 +137,69 @@ struct dsw_port {
 
 	uint16_t next_parallel_flow_id;
 
+	uint16_t ops_since_bg_task;
+
+	/* most recent 'background' processing */
+	uint64_t last_bg;
+
+	/* For port load measurement. */
+	uint64_t next_load_update;
+	uint64_t load_update_interval;
+	uint64_t measurement_start;
+	uint64_t busy_start;
+	uint64_t busy_cycles;
+	uint64_t total_busy_cycles;
+
+	/* For the ctl interface and flow migration mechanism. */
+	uint64_t next_migration;
+	uint64_t migration_interval;
+	enum dsw_migration_state migration_state;
+
+	uint64_t migration_start;
+	uint64_t migrations;
+	uint64_t migration_latency;
+
+	uint8_t migration_target_port_id;
+	struct dsw_queue_flow migration_target_qf;
+	uint8_t cfm_cnt;
+
+	uint16_t paused_flows_len;
+	struct dsw_queue_flow paused_flows[DSW_MAX_PAUSED_FLOWS];
+
+	/* In a very contrived worst case all inflight events can be
+	 * laying around paused here.
+	 */
+	uint16_t paused_events_len;
+	struct rte_event paused_events[DSW_MAX_EVENTS];
+
+	uint16_t seen_events_len;
+	uint16_t seen_events_idx;
+	struct dsw_queue_flow seen_events[DSW_MAX_EVENTS_RECORDED];
+
+	uint64_t new_enqueued;
+	uint64_t forward_enqueued;
+	uint64_t release_enqueued;
+	uint64_t queue_enqueued[DSW_MAX_QUEUES];
+
+	uint64_t dequeued;
+	uint64_t queue_dequeued[DSW_MAX_QUEUES];
+
 	uint16_t out_buffer_len[DSW_MAX_PORTS];
 	struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER];
 
+	uint16_t in_buffer_len;
+	uint16_t in_buffer_start;
+	/* This buffer may contain events that were read up from the
+	 * in_ring during the flow migration process.
+	 */
+	struct rte_event in_buffer[DSW_MAX_EVENTS];
+
 	struct rte_event_ring *in_ring __rte_cache_aligned;
+
+	struct rte_ring *ctl_in_ring __rte_cache_aligned;
+
+	/* Estimate of current port load. */
+	rte_atomic16_t load __rte_cache_aligned;
 } __rte_cache_aligned;
 
 struct dsw_queue {
@@ -97,6 +222,20 @@ struct dsw_evdev {
 	rte_atomic32_t credits_on_loan __rte_cache_aligned;
 };
 
+#define DSW_CTL_PAUS_REQ (0)
+#define DSW_CTL_UNPAUS_REQ (1)
+#define DSW_CTL_CFM (2)
+
+/* sizeof(struct dsw_ctl_msg) must be equal or less than
+ * sizeof(void *), to fit on the control ring.
+ */
+struct dsw_ctl_msg {
+	uint8_t type:2;
+	uint8_t originating_port_id:6;
+	uint8_t queue_id;
+	uint16_t flow_hash;
+} __rte_packed;
+
 uint16_t dsw_event_enqueue(void *port, const struct rte_event *event);
 uint16_t dsw_event_enqueue_burst(void *port,
 				 const struct rte_event events[],
@@ -112,6 +251,17 @@ uint16_t dsw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait);
 uint16_t dsw_event_dequeue_burst(void *port, struct rte_event *events,
 				 uint16_t num, uint64_t wait);
 
+int dsw_xstats_get_names(const struct rte_eventdev *dev,
+			 enum rte_event_dev_xstats_mode mode,
+			 uint8_t queue_port_id,
+			 struct rte_event_dev_xstats_name *xstats_names,
+			 unsigned int *ids, unsigned int size);
+int dsw_xstats_get(const struct rte_eventdev *dev,
+		   enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id,
+		   const unsigned int ids[], uint64_t values[], unsigned int n);
+uint64_t dsw_xstats_get_by_name(const struct rte_eventdev *dev,
+				const char *name, unsigned int *id);
+
 static inline struct dsw_evdev *
 dsw_pmd_priv(const struct rte_eventdev *eventdev)
 {