From 570ac17b8c3351dd751e40492de9b6469978a6bf Mon Sep 17 00:00:00 2001
From: =?utf8?q?Mattias=20R=C3=B6nnblom?= <mattias.ronnblom@ericsson.com>
Date: Mon, 9 Mar 2020 07:51:03 +0100
Subject: [PATCH] event/dsw: avoid migration waves in large systems
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

DSW limits the rate of migrations on a per-port basis. Hence, as the
number of cores grows, so does the total migration capacity.

In high core-count systems, this allows for a situation where flows
are migrated to a lightly loaded port which recently already received
a number of new flows (from other ports). The processing load
generated by these new flows may not yet be reflected in the lightly
loaded port's load estimate. The result is that the previously lightly
loaded port is now overloaded.

This patch adds a rough estimate of the size of the inbound migrations
to a particular port, which can be factored into the migration logic,
avoiding the above problem.

Signed-off-by: Mattias RÃ¶nnblom <mattias.ronnblom@ericsson.com>
---
 drivers/event/dsw/dsw_evdev.c |  1 +
 drivers/event/dsw/dsw_evdev.h |  2 ++
 drivers/event/dsw/dsw_event.c | 18 ++++++++++++++++--
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/event/dsw/dsw_evdev.c b/drivers/event/dsw/dsw_evdev.c
index 7798a38ad9..e796975dfc 100644
--- a/drivers/event/dsw/dsw_evdev.c
+++ b/drivers/event/dsw/dsw_evdev.c
@@ -62,6 +62,7 @@ dsw_port_setup(struct rte_eventdev *dev, uint8_t port_id,
 	port->ctl_in_ring = ctl_in_ring;
 
 	rte_atomic16_init(&port->load);
+	rte_atomic32_init(&port->immigration_load);
 
 	port->load_update_interval =
 		(DSW_LOAD_UPDATE_INTERVAL * rte_get_timer_hz()) / US_PER_S;
diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h
index ced40ef8db..6cb77cfc44 100644
--- a/drivers/event/dsw/dsw_evdev.h
+++ b/drivers/event/dsw/dsw_evdev.h
@@ -220,6 +220,8 @@ struct dsw_port {
 
 	/* Estimate of current port load. */
 	rte_atomic16_t load __rte_cache_aligned;
+	/* Estimate of flows currently migrating to this port. */
+	rte_atomic32_t immigration_load __rte_cache_aligned;
 } __rte_cache_aligned;
 
 struct dsw_queue {
diff --git a/drivers/event/dsw/dsw_event.c b/drivers/event/dsw/dsw_event.c
index a8161fd76a..04f0e9cab6 100644
--- a/drivers/event/dsw/dsw_event.c
+++ b/drivers/event/dsw/dsw_event.c
@@ -160,6 +160,11 @@ dsw_port_load_update(struct dsw_port *port, uint64_t now)
 		(DSW_OLD_LOAD_WEIGHT+1);
 
 	rte_atomic16_set(&port->load, new_load);
+
+	/* The load of the recently immigrated flows should hopefully
+	 * be reflected the load estimate by now.
+	 */
+	rte_atomic32_set(&port->immigration_load, 0);
 }
 
 static void
@@ -362,7 +367,13 @@ dsw_retrieve_port_loads(struct dsw_evdev *dsw, int16_t *port_loads,
 	uint16_t i;
 
 	for (i = 0; i < dsw->num_ports; i++) {
-		int16_t load = rte_atomic16_read(&dsw->ports[i].load);
+		int16_t measured_load = rte_atomic16_read(&dsw->ports[i].load);
+		int32_t immigration_load =
+			rte_atomic32_read(&dsw->ports[i].immigration_load);
+		int32_t load = measured_load + immigration_load;
+
+		load = RTE_MIN(load, DSW_MAX_LOAD);
+
 		if (load < load_limit)
 			below_limit = true;
 		port_loads[i] = load;
@@ -491,6 +502,9 @@ dsw_select_emigration_target(struct dsw_evdev *dsw,
 	target_qfs[*targets_len] = *candidate_qf;
 	(*targets_len)++;
 
+	rte_atomic32_add(&dsw->ports[candidate_port_id].immigration_load,
+			 candidate_flow_load);
+
 	return true;
 }
 
@@ -503,7 +517,7 @@ dsw_select_emigration_targets(struct dsw_evdev *dsw,
 	struct dsw_queue_flow *target_qfs = source_port->emigration_target_qfs;
 	uint8_t *target_port_ids = source_port->emigration_target_port_ids;
 	uint8_t *targets_len = &source_port->emigration_targets_len;
-	uint8_t i;
+	uint16_t i;
 
 	for (i = 0; i < DSW_MAX_FLOWS_PER_MIGRATION; i++) {
 		bool found;
-- 
2.39.5