sched: support 64-bit values
authorJasvinder Singh <jasvinder.singh@intel.com>
Wed, 16 Oct 2019 14:17:29 +0000 (15:17 +0100)
committerThomas Monjalon <thomas@monjalon.net>
Fri, 25 Oct 2019 16:07:37 +0000 (18:07 +0200)
Modify internal structure and functions to support 64-bit
values for rates and stats parameters.

Signed-off-by: Jasvinder Singh <jasvinder.singh@intel.com>
Signed-off-by: Lukasz Krakowiak <lukaszx.krakowiak@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
lib/librte_sched/rte_approx.c
lib/librte_sched/rte_approx.h
lib/librte_sched/rte_sched.c
lib/librte_sched/rte_sched_common.h

index 30620b8..edc8d91 100644 (file)
@@ -165,3 +165,156 @@ int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q)
        /* Perform approximation */
        return find_best_rational_approximation(alpha_num, d_num, denum, p, q);
 }
+
+/* fraction comparison (64 bit version): compare (a/b) and (c/d) */
+static inline uint64_t
+less_64(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
+{
+       return a*d < b*c;
+}
+
+static inline uint64_t
+less_or_equal_64(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
+{
+       return a*d <= b*c;
+}
+
+/* check whether a/b is a valid approximation (64 bit version) */
+static inline uint64_t
+matches_64(uint64_t a, uint64_t b,
+       uint64_t alpha_num, uint64_t d_num, uint64_t denum)
+{
+       if (less_or_equal_64(a, b, alpha_num - d_num, denum))
+               return 0;
+
+       if (less_64(a, b, alpha_num + d_num, denum))
+               return 1;
+
+       return 0;
+}
+
+static inline void
+find_exact_solution_left_64(uint64_t p_a, uint64_t q_a, uint64_t p_b, uint64_t q_b,
+       uint64_t alpha_num, uint64_t d_num, uint64_t denum, uint64_t *p, uint64_t *q)
+{
+       uint64_t k_num = denum * p_b - (alpha_num + d_num) * q_b;
+       uint64_t k_denum = (alpha_num + d_num) * q_a - denum * p_a;
+       uint64_t k = (k_num / k_denum) + 1;
+
+       *p = p_b + k * p_a;
+       *q = q_b + k * q_a;
+}
+
+static inline void
+find_exact_solution_right_64(uint64_t p_a, uint64_t q_a, uint64_t p_b, uint64_t q_b,
+       uint64_t alpha_num, uint64_t d_num, uint64_t denum, uint64_t *p, uint64_t *q)
+{
+       uint64_t k_num = -denum * p_b + (alpha_num - d_num) * q_b;
+       uint64_t k_denum = -(alpha_num - d_num) * q_a + denum * p_a;
+       uint64_t k = (k_num / k_denum) + 1;
+
+       *p = p_b + k * p_a;
+       *q = q_b + k * q_a;
+}
+
+static int
+find_best_rational_approximation_64(uint64_t alpha_num, uint64_t d_num,
+       uint64_t denum, uint64_t *p, uint64_t *q)
+{
+       uint64_t p_a, q_a, p_b, q_b;
+
+       /* check assumptions on the inputs */
+       if (!((d_num > 0) && (d_num < alpha_num) &&
+               (alpha_num < denum) && (d_num + alpha_num < denum))) {
+               return -1;
+       }
+
+       /* set initial bounds for the search */
+       p_a = 0;
+       q_a = 1;
+       p_b = 1;
+       q_b = 1;
+
+       while (1) {
+               uint64_t new_p_a, new_q_a, new_p_b, new_q_b;
+               uint64_t x_num, x_denum, x;
+               int aa, bb;
+
+               /* compute the number of steps to the left */
+               x_num = denum * p_b - alpha_num * q_b;
+               x_denum = -denum * p_a + alpha_num * q_a;
+               x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */
+
+               /* check whether we have a valid approximation */
+               aa = matches_64(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum);
+               bb = matches_64(p_b + (x-1) * p_a, q_b + (x - 1) * q_a,
+                       alpha_num, d_num, denum);
+               if (aa || bb) {
+                       find_exact_solution_left_64(p_a, q_a, p_b, q_b,
+                               alpha_num, d_num, denum, p, q);
+                       return 0;
+               }
+
+               /* update the interval */
+               new_p_a = p_b + (x - 1) * p_a;
+               new_q_a = q_b + (x - 1) * q_a;
+               new_p_b = p_b + x * p_a;
+               new_q_b = q_b + x * q_a;
+
+               p_a = new_p_a;
+               q_a = new_q_a;
+               p_b = new_p_b;
+               q_b = new_q_b;
+
+               /* compute the number of steps to the right */
+               x_num = alpha_num * q_b - denum * p_b;
+               x_denum = -alpha_num * q_a + denum * p_a;
+               x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */
+
+               /* check whether we have a valid approximation */
+               aa = matches_64(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum);
+               bb = matches_64(p_b + (x - 1) * p_a, q_b + (x - 1) * q_a,
+                       alpha_num, d_num, denum);
+               if (aa || bb) {
+                       find_exact_solution_right_64(p_a, q_a, p_b, q_b,
+                               alpha_num, d_num, denum, p, q);
+                       return 0;
+               }
+
+               /* update the interval */
+               new_p_a = p_b + (x - 1) * p_a;
+               new_q_a = q_b + (x - 1) * q_a;
+               new_p_b = p_b + x * p_a;
+               new_q_b = q_b + x * q_a;
+
+               p_a = new_p_a;
+               q_a = new_q_a;
+               p_b = new_p_b;
+               q_b = new_q_b;
+       }
+}
+
+int rte_approx_64(double alpha, double d, uint64_t *p, uint64_t *q)
+{
+       uint64_t alpha_num, d_num, denum;
+
+       /* Check input arguments */
+       if (!((0.0 < d) && (d < alpha) && (alpha < 1.0)))
+               return -1;
+
+       if ((p == NULL) || (q == NULL))
+               return -2;
+
+       /* Compute alpha_num, d_num and denum */
+       denum = 1;
+       while (d < 1) {
+               alpha *= 10;
+               d *= 10;
+               denum *= 10;
+       }
+       alpha_num = (uint64_t) alpha;
+       d_num = (uint64_t) d;
+
+       /* Perform approximation */
+       return find_best_rational_approximation_64(alpha_num, d_num, denum, p, q);
+}
index 0244d98..74d55f4 100644 (file)
@@ -39,6 +39,24 @@ extern "C" {
  */
 int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q);
 
+/**
+ * Find best rational approximation (64 bit version)
+ *
+ * @param alpha
+ *   Rational number to approximate
+ * @param d
+ *   Precision for the rational approximation
+ * @param p
+ *   Pointer to pre-allocated space where the numerator of the rational
+ *   approximation will be stored when operation is successful
+ * @param q
+ *   Pointer to pre-allocated space where the denominator of the rational
+ *   approximation will be stored when operation is successful
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int rte_approx_64(double alpha, double d, uint64_t *p, uint64_t *q);
+
 #ifdef __cplusplus
 }
 #endif
index 710ecf6..c0983dd 100644 (file)
 
 struct rte_sched_pipe_profile {
        /* Token bucket (TB) */
-       uint32_t tb_period;
-       uint32_t tb_credits_per_period;
-       uint32_t tb_size;
+       uint64_t tb_period;
+       uint64_t tb_credits_per_period;
+       uint64_t tb_size;
 
        /* Pipe traffic classes */
-       uint32_t tc_period;
-       uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       uint64_t tc_period;
+       uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
        uint8_t tc_ov_weight;
 
        /* Pipe best-effort traffic class queues */
@@ -65,20 +65,20 @@ struct rte_sched_pipe_profile {
 struct rte_sched_pipe {
        /* Token bucket (TB) */
        uint64_t tb_time; /* time of last update */
-       uint32_t tb_credits;
+       uint64_t tb_credits;
 
        /* Pipe profile and flags */
        uint32_t profile;
 
        /* Traffic classes (TCs) */
        uint64_t tc_time; /* time of next update */
-       uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
        /* Weighted Round Robin (WRR) */
        uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
 
        /* TC oversubscription */
-       uint32_t tc_ov_credits;
+       uint64_t tc_ov_credits;
        uint8_t tc_ov_period_id;
 } __rte_cache_aligned;
 
@@ -141,28 +141,28 @@ struct rte_sched_grinder {
 struct rte_sched_subport {
        /* Token bucket (TB) */
        uint64_t tb_time; /* time of last update */
-       uint32_t tb_period;
-       uint32_t tb_credits_per_period;
-       uint32_t tb_size;
-       uint32_t tb_credits;
+       uint64_t tb_period;
+       uint64_t tb_credits_per_period;
+       uint64_t tb_size;
+       uint64_t tb_credits;
 
        /* Traffic classes (TCs) */
        uint64_t tc_time; /* time of next update */
-       uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-       uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-       uint32_t tc_period;
+       uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       uint64_t tc_period;
 
        /* TC oversubscription */
-       uint32_t tc_ov_wm;
-       uint32_t tc_ov_wm_min;
-       uint32_t tc_ov_wm_max;
+       uint64_t tc_ov_wm;
+       uint64_t tc_ov_wm_min;
+       uint64_t tc_ov_wm_max;
        uint8_t tc_ov_period_id;
        uint8_t tc_ov;
        uint32_t tc_ov_n;
        double tc_ov_rate;
 
        /* Statistics */
-       struct rte_sched_subport_stats stats;
+       struct rte_sched_subport_stats stats __rte_cache_aligned;
 
        /* Subport pipes */
        uint32_t n_pipes_per_subport_enabled;
@@ -170,7 +170,7 @@ struct rte_sched_subport {
        uint32_t n_max_pipe_profiles;
 
        /* Pipe best-effort TC rate */
-       uint32_t pipe_tc_be_rate_max;
+       uint64_t pipe_tc_be_rate_max;
 
        /* Pipe queues size */
        uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
@@ -212,7 +212,7 @@ struct rte_sched_port {
        uint16_t pipe_queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
        uint8_t pipe_tc[RTE_SCHED_QUEUES_PER_PIPE];
        uint8_t tc_queue[RTE_SCHED_QUEUES_PER_PIPE];
-       uint32_t rate;
+       uint64_t rate;
        uint32_t mtu;
        uint32_t frame_overhead;
        int socket;
@@ -517,9 +517,11 @@ rte_sched_port_log_pipe_profile(struct rte_sched_subport *subport, uint32_t i)
        struct rte_sched_pipe_profile *p = subport->pipe_profiles + i;
 
        RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n"
-               "       Token bucket: period = %u, credits per period = %u, size = %u\n"
-               "       Traffic classes: period = %u,\n"
-               "       credits per period = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u]\n"
+               "       Token bucket: period = %"PRIu64", credits per period = %"PRIu64", size = %"PRIu64"\n"
+               "       Traffic classes: period = %"PRIu64",\n"
+               "       credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
+               ", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
+               ", %"PRIu64", %"PRIu64", %"PRIu64"]\n"
                "       Best-effort traffic class oversubscription: weight = %hhu\n"
                "       WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
                i,
@@ -553,7 +555,7 @@ rte_sched_port_log_pipe_profile(struct rte_sched_subport *subport, uint32_t i)
 }
 
 static inline uint64_t
-rte_sched_time_ms_to_bytes(uint32_t time_ms, uint32_t rate)
+rte_sched_time_ms_to_bytes(uint64_t time_ms, uint64_t rate)
 {
        uint64_t time = time_ms;
 
@@ -566,7 +568,7 @@ static void
 rte_sched_pipe_profile_convert(struct rte_sched_subport *subport,
        struct rte_sched_pipe_params *src,
        struct rte_sched_pipe_profile *dst,
-       uint32_t rate)
+       uint64_t rate)
 {
        uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
        uint32_t lcd1, lcd2, lcd;
@@ -581,8 +583,8 @@ rte_sched_pipe_profile_convert(struct rte_sched_subport *subport,
                                / (double) rate;
                double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
 
-               rte_approx(tb_rate, d,
-                       &dst->tb_credits_per_period, &dst->tb_period);
+               rte_approx_64(tb_rate, d, &dst->tb_credits_per_period,
+                       &dst->tb_period);
        }
 
        dst->tb_size = src->tb_size;
@@ -637,7 +639,7 @@ rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport,
        subport->pipe_tc_be_rate_max = 0;
        for (i = 0; i < subport->n_pipe_profiles; i++) {
                struct rte_sched_pipe_params *src = params->pipe_profiles + i;
-               uint32_t pipe_tc_be_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
+               uint64_t pipe_tc_be_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
 
                if (subport->pipe_tc_be_rate_max < pipe_tc_be_rate)
                        subport->pipe_tc_be_rate_max = pipe_tc_be_rate;
@@ -647,7 +649,7 @@ rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport,
 static int
 rte_sched_subport_check_params(struct rte_sched_subport_params *params,
        uint32_t n_max_pipes_per_subport,
-       uint32_t rate)
+       uint64_t rate)
 {
        uint32_t i;
 
@@ -684,7 +686,7 @@ rte_sched_subport_check_params(struct rte_sched_subport_params *params,
        }
 
        for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-               uint32_t tc_rate = params->tc_rate[i];
+               uint64_t tc_rate = params->tc_rate[i];
                uint16_t qsize = params->qsize[i];
 
                if ((qsize == 0 && tc_rate != 0) ||
@@ -910,10 +912,14 @@ rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i)
        struct rte_sched_subport *s = port->subports[i];
 
        RTE_LOG(DEBUG, SCHED, "Low level config for subport %u:\n"
-               "       Token bucket: period = %u, credits per period = %u, size = %u\n"
-               "       Traffic classes: period = %u\n"
-               "       credits per period = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u]\n"
-               "       Best effort traffic class oversubscription: wm min = %u, wm max = %u\n",
+               "       Token bucket: period = %"PRIu64", credits per period = %"PRIu64
+               ", size = %"PRIu64"\n"
+               "       Traffic classes: period = %"PRIu64"\n"
+               "       credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
+               ", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
+               ", %"PRIu64", %"PRIu64", %"PRIu64"]\n"
+               "       Best effort traffic class oversubscription: wm min = %"PRIu64
+               ", wm max = %"PRIu64"\n",
                i,
 
                /* Token bucket */
@@ -1023,7 +1029,7 @@ rte_sched_subport_config(struct rte_sched_port *port,
                double tb_rate = ((double) params->tb_rate) / ((double) port->rate);
                double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
 
-               rte_approx(tb_rate, d, &s->tb_credits_per_period, &s->tb_period);
+               rte_approx_64(tb_rate, d, &s->tb_credits_per_period, &s->tb_period);
        }
 
        s->tb_size = params->tb_size;
@@ -1970,13 +1976,13 @@ grinder_credits_update(struct rte_sched_port *port,
        /* Subport TB */
        n_periods = (port->time - subport->tb_time) / subport->tb_period;
        subport->tb_credits += n_periods * subport->tb_credits_per_period;
-       subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
+       subport->tb_credits = RTE_MIN(subport->tb_credits, subport->tb_size);
        subport->tb_time += n_periods * subport->tb_period;
 
        /* Pipe TB */
        n_periods = (port->time - pipe->tb_time) / params->tb_period;
        pipe->tb_credits += n_periods * params->tb_credits_per_period;
-       pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
+       pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
        pipe->tb_time += n_periods * params->tb_period;
 
        /* Subport TCs */
@@ -1998,13 +2004,13 @@ grinder_credits_update(struct rte_sched_port *port,
 
 #else
 
-static inline uint32_t
+static inline uint64_t
 grinder_tc_ov_credits_update(struct rte_sched_port *port,
        struct rte_sched_subport *subport)
 {
-       uint32_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-       uint32_t tc_consumption = 0, tc_ov_consumption_max;
-       uint32_t tc_ov_wm = subport->tc_ov_wm;
+       uint64_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       uint64_t tc_consumption = 0, tc_ov_consumption_max;
+       uint64_t tc_ov_wm = subport->tc_ov_wm;
        uint32_t i;
 
        if (subport->tc_ov == 0)
@@ -2053,13 +2059,13 @@ grinder_credits_update(struct rte_sched_port *port,
        /* Subport TB */
        n_periods = (port->time - subport->tb_time) / subport->tb_period;
        subport->tb_credits += n_periods * subport->tb_credits_per_period;
-       subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
+       subport->tb_credits = RTE_MIN(subport->tb_credits, subport->tb_size);
        subport->tb_time += n_periods * subport->tb_period;
 
        /* Pipe TB */
        n_periods = (port->time - pipe->tb_time) / params->tb_period;
        pipe->tb_credits += n_periods * params->tb_credits_per_period;
-       pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
+       pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
        pipe->tb_time += n_periods * params->tb_period;
 
        /* Subport TCs */
@@ -2101,11 +2107,11 @@ grinder_credits_check(struct rte_sched_port *port,
        struct rte_sched_pipe *pipe = grinder->pipe;
        struct rte_mbuf *pkt = grinder->pkt;
        uint32_t tc_index = grinder->tc_index;
-       uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
-       uint32_t subport_tb_credits = subport->tb_credits;
-       uint32_t subport_tc_credits = subport->tc_credits[tc_index];
-       uint32_t pipe_tb_credits = pipe->tb_credits;
-       uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
+       uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
+       uint64_t subport_tb_credits = subport->tb_credits;
+       uint64_t subport_tc_credits = subport->tc_credits[tc_index];
+       uint64_t pipe_tb_credits = pipe->tb_credits;
+       uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
        int enough_credits;
 
        /* Check queue credits */
@@ -2136,21 +2142,22 @@ grinder_credits_check(struct rte_sched_port *port,
        struct rte_sched_pipe *pipe = grinder->pipe;
        struct rte_mbuf *pkt = grinder->pkt;
        uint32_t tc_index = grinder->tc_index;
-       uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
-       uint32_t subport_tb_credits = subport->tb_credits;
-       uint32_t subport_tc_credits = subport->tc_credits[tc_index];
-       uint32_t pipe_tb_credits = pipe->tb_credits;
-       uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
-       uint32_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-       uint32_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0};
-       uint32_t pipe_tc_ov_credits, i;
+       uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
+       uint64_t subport_tb_credits = subport->tb_credits;
+       uint64_t subport_tc_credits = subport->tc_credits[tc_index];
+       uint64_t pipe_tb_credits = pipe->tb_credits;
+       uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
+       uint64_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+       uint64_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0};
+       uint64_t pipe_tc_ov_credits;
+       uint32_t i;
        int enough_credits;
 
        for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
-               pipe_tc_ov_mask1[i] = UINT32_MAX;
+               pipe_tc_ov_mask1[i] = ~0LLU;
 
        pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe->tc_ov_credits;
-       pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = UINT32_MAX;
+       pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = ~0LLU;
        pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
 
        /* Check pipe and subport credits */
index 8c191a9..b58282d 100644 (file)
@@ -14,12 +14,6 @@ extern "C" {
 
 #define __rte_aligned_16 __attribute__((__aligned__(16)))
 
-static inline uint32_t
-rte_sched_min_val_2_u32(uint32_t x, uint32_t y)
-{
-       return (x < y)? x : y;
-}
-
 #if 0
 static inline uint32_t
 rte_min_pos_4_u16(uint16_t *x)