From 68c1f26d4236cca0fc144a36cd5a5cc3f1b58bfe Mon Sep 17 00:00:00 2001 From: Jasvinder Singh Date: Wed, 16 Oct 2019 15:17:29 +0100 Subject: [PATCH] sched: support 64-bit values Modify internal structure and functions to support 64-bit values for rates and stats parameters. Signed-off-by: Jasvinder Singh Signed-off-by: Lukasz Krakowiak Acked-by: Cristian Dumitrescu --- lib/librte_sched/rte_approx.c | 153 ++++++++++++++++++++++++++++ lib/librte_sched/rte_approx.h | 18 ++++ lib/librte_sched/rte_sched.c | 125 ++++++++++++----------- lib/librte_sched/rte_sched_common.h | 6 -- 4 files changed, 237 insertions(+), 65 deletions(-) diff --git a/lib/librte_sched/rte_approx.c b/lib/librte_sched/rte_approx.c index 30620b83d0..edc8d9113b 100644 --- a/lib/librte_sched/rte_approx.c +++ b/lib/librte_sched/rte_approx.c @@ -165,3 +165,156 @@ int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q) /* Perform approximation */ return find_best_rational_approximation(alpha_num, d_num, denum, p, q); } + +/* fraction comparison (64 bit version): compare (a/b) and (c/d) */ +static inline uint64_t +less_64(uint64_t a, uint64_t b, uint64_t c, uint64_t d) +{ + return a*d < b*c; +} + +static inline uint64_t +less_or_equal_64(uint64_t a, uint64_t b, uint64_t c, uint64_t d) +{ + return a*d <= b*c; +} + +/* check whether a/b is a valid approximation (64 bit version) */ +static inline uint64_t +matches_64(uint64_t a, uint64_t b, + uint64_t alpha_num, uint64_t d_num, uint64_t denum) +{ + if (less_or_equal_64(a, b, alpha_num - d_num, denum)) + return 0; + + if (less_64(a, b, alpha_num + d_num, denum)) + return 1; + + return 0; +} + +static inline void +find_exact_solution_left_64(uint64_t p_a, uint64_t q_a, uint64_t p_b, uint64_t q_b, + uint64_t alpha_num, uint64_t d_num, uint64_t denum, uint64_t *p, uint64_t *q) +{ + uint64_t k_num = denum * p_b - (alpha_num + d_num) * q_b; + uint64_t k_denum = (alpha_num + d_num) * q_a - denum * p_a; + uint64_t k = (k_num / k_denum) + 1; + + *p = p_b + k * p_a; + *q = q_b + k * q_a; +} + +static inline void +find_exact_solution_right_64(uint64_t p_a, uint64_t q_a, uint64_t p_b, uint64_t q_b, + uint64_t alpha_num, uint64_t d_num, uint64_t denum, uint64_t *p, uint64_t *q) +{ + uint64_t k_num = -denum * p_b + (alpha_num - d_num) * q_b; + uint64_t k_denum = -(alpha_num - d_num) * q_a + denum * p_a; + uint64_t k = (k_num / k_denum) + 1; + + *p = p_b + k * p_a; + *q = q_b + k * q_a; +} + +static int +find_best_rational_approximation_64(uint64_t alpha_num, uint64_t d_num, + uint64_t denum, uint64_t *p, uint64_t *q) +{ + uint64_t p_a, q_a, p_b, q_b; + + /* check assumptions on the inputs */ + if (!((d_num > 0) && (d_num < alpha_num) && + (alpha_num < denum) && (d_num + alpha_num < denum))) { + return -1; + } + + /* set initial bounds for the search */ + p_a = 0; + q_a = 1; + p_b = 1; + q_b = 1; + + while (1) { + uint64_t new_p_a, new_q_a, new_p_b, new_q_b; + uint64_t x_num, x_denum, x; + int aa, bb; + + /* compute the number of steps to the left */ + x_num = denum * p_b - alpha_num * q_b; + x_denum = -denum * p_a + alpha_num * q_a; + x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */ + + /* check whether we have a valid approximation */ + aa = matches_64(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum); + bb = matches_64(p_b + (x-1) * p_a, q_b + (x - 1) * q_a, + alpha_num, d_num, denum); + if (aa || bb) { + find_exact_solution_left_64(p_a, q_a, p_b, q_b, + alpha_num, d_num, denum, p, q); + return 0; + } + + /* update the interval */ + new_p_a = p_b + (x - 1) * p_a; + new_q_a = q_b + (x - 1) * q_a; + new_p_b = p_b + x * p_a; + new_q_b = q_b + x * q_a; + + p_a = new_p_a; + q_a = new_q_a; + p_b = new_p_b; + q_b = new_q_b; + + /* compute the number of steps to the right */ + x_num = alpha_num * q_b - denum * p_b; + x_denum = -alpha_num * q_a + denum * p_a; + x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */ + + /* check whether we have a valid approximation */ + aa = matches_64(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum); + bb = matches_64(p_b + (x - 1) * p_a, q_b + (x - 1) * q_a, + alpha_num, d_num, denum); + if (aa || bb) { + find_exact_solution_right_64(p_a, q_a, p_b, q_b, + alpha_num, d_num, denum, p, q); + return 0; + } + + /* update the interval */ + new_p_a = p_b + (x - 1) * p_a; + new_q_a = q_b + (x - 1) * q_a; + new_p_b = p_b + x * p_a; + new_q_b = q_b + x * q_a; + + p_a = new_p_a; + q_a = new_q_a; + p_b = new_p_b; + q_b = new_q_b; + } +} + +int rte_approx_64(double alpha, double d, uint64_t *p, uint64_t *q) +{ + uint64_t alpha_num, d_num, denum; + + /* Check input arguments */ + if (!((0.0 < d) && (d < alpha) && (alpha < 1.0))) + return -1; + + if ((p == NULL) || (q == NULL)) + return -2; + + /* Compute alpha_num, d_num and denum */ + denum = 1; + while (d < 1) { + alpha *= 10; + d *= 10; + denum *= 10; + } + alpha_num = (uint64_t) alpha; + d_num = (uint64_t) d; + + /* Perform approximation */ + return find_best_rational_approximation_64(alpha_num, d_num, denum, p, q); +} diff --git a/lib/librte_sched/rte_approx.h b/lib/librte_sched/rte_approx.h index 0244d98f1f..74d55f457b 100644 --- a/lib/librte_sched/rte_approx.h +++ b/lib/librte_sched/rte_approx.h @@ -39,6 +39,24 @@ extern "C" { */ int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q); +/** + * Find best rational approximation (64 bit version) + * + * @param alpha + * Rational number to approximate + * @param d + * Precision for the rational approximation + * @param p + * Pointer to pre-allocated space where the numerator of the rational + * approximation will be stored when operation is successful + * @param q + * Pointer to pre-allocated space where the denominator of the rational + * approximation will be stored when operation is successful + * @return + * 0 upon success, error code otherwise + */ +int rte_approx_64(double alpha, double d, uint64_t *p, uint64_t *q); + #ifdef __cplusplus } #endif diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 710ecf65a8..c0983ddda4 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -49,13 +49,13 @@ struct rte_sched_pipe_profile { /* Token bucket (TB) */ - uint32_t tb_period; - uint32_t tb_credits_per_period; - uint32_t tb_size; + uint64_t tb_period; + uint64_t tb_credits_per_period; + uint64_t tb_size; /* Pipe traffic classes */ - uint32_t tc_period; - uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint64_t tc_period; + uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; uint8_t tc_ov_weight; /* Pipe best-effort traffic class queues */ @@ -65,20 +65,20 @@ struct rte_sched_pipe_profile { struct rte_sched_pipe { /* Token bucket (TB) */ uint64_t tb_time; /* time of last update */ - uint32_t tb_credits; + uint64_t tb_credits; /* Pipe profile and flags */ uint32_t profile; /* Traffic classes (TCs) */ uint64_t tc_time; /* time of next update */ - uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /* Weighted Round Robin (WRR) */ uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE]; /* TC oversubscription */ - uint32_t tc_ov_credits; + uint64_t tc_ov_credits; uint8_t tc_ov_period_id; } __rte_cache_aligned; @@ -141,28 +141,28 @@ struct rte_sched_grinder { struct rte_sched_subport { /* Token bucket (TB) */ uint64_t tb_time; /* time of last update */ - uint32_t tb_period; - uint32_t tb_credits_per_period; - uint32_t tb_size; - uint32_t tb_credits; + uint64_t tb_period; + uint64_t tb_credits_per_period; + uint64_t tb_size; + uint64_t tb_credits; /* Traffic classes (TCs) */ uint64_t tc_time; /* time of next update */ - uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; - uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; - uint32_t tc_period; + uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint64_t tc_period; /* TC oversubscription */ - uint32_t tc_ov_wm; - uint32_t tc_ov_wm_min; - uint32_t tc_ov_wm_max; + uint64_t tc_ov_wm; + uint64_t tc_ov_wm_min; + uint64_t tc_ov_wm_max; uint8_t tc_ov_period_id; uint8_t tc_ov; uint32_t tc_ov_n; double tc_ov_rate; /* Statistics */ - struct rte_sched_subport_stats stats; + struct rte_sched_subport_stats stats __rte_cache_aligned; /* Subport pipes */ uint32_t n_pipes_per_subport_enabled; @@ -170,7 +170,7 @@ struct rte_sched_subport { uint32_t n_max_pipe_profiles; /* Pipe best-effort TC rate */ - uint32_t pipe_tc_be_rate_max; + uint64_t pipe_tc_be_rate_max; /* Pipe queues size */ uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; @@ -212,7 +212,7 @@ struct rte_sched_port { uint16_t pipe_queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; uint8_t pipe_tc[RTE_SCHED_QUEUES_PER_PIPE]; uint8_t tc_queue[RTE_SCHED_QUEUES_PER_PIPE]; - uint32_t rate; + uint64_t rate; uint32_t mtu; uint32_t frame_overhead; int socket; @@ -517,9 +517,11 @@ rte_sched_port_log_pipe_profile(struct rte_sched_subport *subport, uint32_t i) struct rte_sched_pipe_profile *p = subport->pipe_profiles + i; RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n" - " Token bucket: period = %u, credits per period = %u, size = %u\n" - " Traffic classes: period = %u,\n" - " credits per period = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u]\n" + " Token bucket: period = %"PRIu64", credits per period = %"PRIu64", size = %"PRIu64"\n" + " Traffic classes: period = %"PRIu64",\n" + " credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64 + ", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64 + ", %"PRIu64", %"PRIu64", %"PRIu64"]\n" " Best-effort traffic class oversubscription: weight = %hhu\n" " WRR cost: [%hhu, %hhu, %hhu, %hhu]\n", i, @@ -553,7 +555,7 @@ rte_sched_port_log_pipe_profile(struct rte_sched_subport *subport, uint32_t i) } static inline uint64_t -rte_sched_time_ms_to_bytes(uint32_t time_ms, uint32_t rate) +rte_sched_time_ms_to_bytes(uint64_t time_ms, uint64_t rate) { uint64_t time = time_ms; @@ -566,7 +568,7 @@ static void rte_sched_pipe_profile_convert(struct rte_sched_subport *subport, struct rte_sched_pipe_params *src, struct rte_sched_pipe_profile *dst, - uint32_t rate) + uint64_t rate) { uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE]; uint32_t lcd1, lcd2, lcd; @@ -581,8 +583,8 @@ rte_sched_pipe_profile_convert(struct rte_sched_subport *subport, / (double) rate; double d = RTE_SCHED_TB_RATE_CONFIG_ERR; - rte_approx(tb_rate, d, - &dst->tb_credits_per_period, &dst->tb_period); + rte_approx_64(tb_rate, d, &dst->tb_credits_per_period, + &dst->tb_period); } dst->tb_size = src->tb_size; @@ -637,7 +639,7 @@ rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport, subport->pipe_tc_be_rate_max = 0; for (i = 0; i < subport->n_pipe_profiles; i++) { struct rte_sched_pipe_params *src = params->pipe_profiles + i; - uint32_t pipe_tc_be_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE]; + uint64_t pipe_tc_be_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE]; if (subport->pipe_tc_be_rate_max < pipe_tc_be_rate) subport->pipe_tc_be_rate_max = pipe_tc_be_rate; @@ -647,7 +649,7 @@ rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport, static int rte_sched_subport_check_params(struct rte_sched_subport_params *params, uint32_t n_max_pipes_per_subport, - uint32_t rate) + uint64_t rate) { uint32_t i; @@ -684,7 +686,7 @@ rte_sched_subport_check_params(struct rte_sched_subport_params *params, } for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { - uint32_t tc_rate = params->tc_rate[i]; + uint64_t tc_rate = params->tc_rate[i]; uint16_t qsize = params->qsize[i]; if ((qsize == 0 && tc_rate != 0) || @@ -910,10 +912,14 @@ rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i) struct rte_sched_subport *s = port->subports[i]; RTE_LOG(DEBUG, SCHED, "Low level config for subport %u:\n" - " Token bucket: period = %u, credits per period = %u, size = %u\n" - " Traffic classes: period = %u\n" - " credits per period = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u]\n" - " Best effort traffic class oversubscription: wm min = %u, wm max = %u\n", + " Token bucket: period = %"PRIu64", credits per period = %"PRIu64 + ", size = %"PRIu64"\n" + " Traffic classes: period = %"PRIu64"\n" + " credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64 + ", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64 + ", %"PRIu64", %"PRIu64", %"PRIu64"]\n" + " Best effort traffic class oversubscription: wm min = %"PRIu64 + ", wm max = %"PRIu64"\n", i, /* Token bucket */ @@ -1023,7 +1029,7 @@ rte_sched_subport_config(struct rte_sched_port *port, double tb_rate = ((double) params->tb_rate) / ((double) port->rate); double d = RTE_SCHED_TB_RATE_CONFIG_ERR; - rte_approx(tb_rate, d, &s->tb_credits_per_period, &s->tb_period); + rte_approx_64(tb_rate, d, &s->tb_credits_per_period, &s->tb_period); } s->tb_size = params->tb_size; @@ -1970,13 +1976,13 @@ grinder_credits_update(struct rte_sched_port *port, /* Subport TB */ n_periods = (port->time - subport->tb_time) / subport->tb_period; subport->tb_credits += n_periods * subport->tb_credits_per_period; - subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size); + subport->tb_credits = RTE_MIN(subport->tb_credits, subport->tb_size); subport->tb_time += n_periods * subport->tb_period; /* Pipe TB */ n_periods = (port->time - pipe->tb_time) / params->tb_period; pipe->tb_credits += n_periods * params->tb_credits_per_period; - pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size); + pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size); pipe->tb_time += n_periods * params->tb_period; /* Subport TCs */ @@ -1998,13 +2004,13 @@ grinder_credits_update(struct rte_sched_port *port, #else -static inline uint32_t +static inline uint64_t grinder_tc_ov_credits_update(struct rte_sched_port *port, struct rte_sched_subport *subport) { - uint32_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; - uint32_t tc_consumption = 0, tc_ov_consumption_max; - uint32_t tc_ov_wm = subport->tc_ov_wm; + uint64_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint64_t tc_consumption = 0, tc_ov_consumption_max; + uint64_t tc_ov_wm = subport->tc_ov_wm; uint32_t i; if (subport->tc_ov == 0) @@ -2053,13 +2059,13 @@ grinder_credits_update(struct rte_sched_port *port, /* Subport TB */ n_periods = (port->time - subport->tb_time) / subport->tb_period; subport->tb_credits += n_periods * subport->tb_credits_per_period; - subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size); + subport->tb_credits = RTE_MIN(subport->tb_credits, subport->tb_size); subport->tb_time += n_periods * subport->tb_period; /* Pipe TB */ n_periods = (port->time - pipe->tb_time) / params->tb_period; pipe->tb_credits += n_periods * params->tb_credits_per_period; - pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size); + pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size); pipe->tb_time += n_periods * params->tb_period; /* Subport TCs */ @@ -2101,11 +2107,11 @@ grinder_credits_check(struct rte_sched_port *port, struct rte_sched_pipe *pipe = grinder->pipe; struct rte_mbuf *pkt = grinder->pkt; uint32_t tc_index = grinder->tc_index; - uint32_t pkt_len = pkt->pkt_len + port->frame_overhead; - uint32_t subport_tb_credits = subport->tb_credits; - uint32_t subport_tc_credits = subport->tc_credits[tc_index]; - uint32_t pipe_tb_credits = pipe->tb_credits; - uint32_t pipe_tc_credits = pipe->tc_credits[tc_index]; + uint64_t pkt_len = pkt->pkt_len + port->frame_overhead; + uint64_t subport_tb_credits = subport->tb_credits; + uint64_t subport_tc_credits = subport->tc_credits[tc_index]; + uint64_t pipe_tb_credits = pipe->tb_credits; + uint64_t pipe_tc_credits = pipe->tc_credits[tc_index]; int enough_credits; /* Check queue credits */ @@ -2136,21 +2142,22 @@ grinder_credits_check(struct rte_sched_port *port, struct rte_sched_pipe *pipe = grinder->pipe; struct rte_mbuf *pkt = grinder->pkt; uint32_t tc_index = grinder->tc_index; - uint32_t pkt_len = pkt->pkt_len + port->frame_overhead; - uint32_t subport_tb_credits = subport->tb_credits; - uint32_t subport_tc_credits = subport->tc_credits[tc_index]; - uint32_t pipe_tb_credits = pipe->tb_credits; - uint32_t pipe_tc_credits = pipe->tc_credits[tc_index]; - uint32_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; - uint32_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0}; - uint32_t pipe_tc_ov_credits, i; + uint64_t pkt_len = pkt->pkt_len + port->frame_overhead; + uint64_t subport_tb_credits = subport->tb_credits; + uint64_t subport_tc_credits = subport->tc_credits[tc_index]; + uint64_t pipe_tb_credits = pipe->tb_credits; + uint64_t pipe_tc_credits = pipe->tc_credits[tc_index]; + uint64_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint64_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0}; + uint64_t pipe_tc_ov_credits; + uint32_t i; int enough_credits; for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) - pipe_tc_ov_mask1[i] = UINT32_MAX; + pipe_tc_ov_mask1[i] = ~0LLU; pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe->tc_ov_credits; - pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = UINT32_MAX; + pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = ~0LLU; pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index]; /* Check pipe and subport credits */ diff --git a/lib/librte_sched/rte_sched_common.h b/lib/librte_sched/rte_sched_common.h index 8c191a9b83..b58282de88 100644 --- a/lib/librte_sched/rte_sched_common.h +++ b/lib/librte_sched/rte_sched_common.h @@ -14,12 +14,6 @@ extern "C" { #define __rte_aligned_16 __attribute__((__aligned__(16))) -static inline uint32_t -rte_sched_min_val_2_u32(uint32_t x, uint32_t y) -{ - return (x < y)? x : y; -} - #if 0 static inline uint32_t rte_min_pos_4_u16(uint16_t *x) -- 2.20.1