X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_sched%2Frte_sched.c;h=0fa074166469705f987167b415e45ac814623371;hb=a8cdfc69c84b7740646368a46bbd3d4d6ddf97b0;hp=fdcbb214ea880efbb7e85a3504e43348186bfe9e;hpb=21dca4e3f61fbde4e2b1483afc38aaf708007a28;p=dpdk.git diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index fdcbb214ea..0fa0741664 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -49,13 +49,13 @@ struct rte_sched_pipe_profile { /* Token bucket (TB) */ - uint32_t tb_period; - uint32_t tb_credits_per_period; - uint32_t tb_size; + uint64_t tb_period; + uint64_t tb_credits_per_period; + uint64_t tb_size; /* Pipe traffic classes */ - uint32_t tc_period; - uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint64_t tc_period; + uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; uint8_t tc_ov_weight; /* Pipe best-effort traffic class queues */ @@ -65,20 +65,20 @@ struct rte_sched_pipe_profile { struct rte_sched_pipe { /* Token bucket (TB) */ uint64_t tb_time; /* time of last update */ - uint32_t tb_credits; + uint64_t tb_credits; /* Pipe profile and flags */ uint32_t profile; /* Traffic classes (TCs) */ uint64_t tc_time; /* time of next update */ - uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /* Weighted Round Robin (WRR) */ uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE]; /* TC oversubscription */ - uint32_t tc_ov_credits; + uint64_t tc_ov_credits; uint8_t tc_ov_period_id; } __rte_cache_aligned; @@ -141,28 +141,28 @@ struct rte_sched_grinder { struct rte_sched_subport { /* Token bucket (TB) */ uint64_t tb_time; /* time of last update */ - uint32_t tb_period; - uint32_t tb_credits_per_period; - uint32_t tb_size; - uint32_t tb_credits; + uint64_t tb_period; + uint64_t tb_credits_per_period; + uint64_t tb_size; + uint64_t tb_credits; /* Traffic classes (TCs) */ uint64_t tc_time; /* time of next update */ - uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; - uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; - uint32_t tc_period; + uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint64_t tc_period; /* TC oversubscription */ - uint32_t tc_ov_wm; - uint32_t tc_ov_wm_min; - uint32_t tc_ov_wm_max; + uint64_t tc_ov_wm; + uint64_t tc_ov_wm_min; + uint64_t tc_ov_wm_max; uint8_t tc_ov_period_id; uint8_t tc_ov; uint32_t tc_ov_n; double tc_ov_rate; /* Statistics */ - struct rte_sched_subport_stats stats; + struct rte_sched_subport_stats stats __rte_cache_aligned; /* Subport pipes */ uint32_t n_pipes_per_subport_enabled; @@ -170,7 +170,7 @@ struct rte_sched_subport { uint32_t n_max_pipe_profiles; /* Pipe best-effort TC rate */ - uint32_t pipe_tc_be_rate_max; + uint64_t pipe_tc_be_rate_max; /* Pipe queues size */ uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; @@ -212,65 +212,27 @@ struct rte_sched_port { uint16_t pipe_queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; uint8_t pipe_tc[RTE_SCHED_QUEUES_PER_PIPE]; uint8_t tc_queue[RTE_SCHED_QUEUES_PER_PIPE]; - uint32_t rate; + uint64_t rate; uint32_t mtu; uint32_t frame_overhead; int socket; - uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; - uint32_t n_pipe_profiles; - uint32_t n_max_pipe_profiles; - uint32_t pipe_tc_be_rate_max; -#ifdef RTE_SCHED_RED - struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS]; -#endif /* Timing */ uint64_t time_cpu_cycles; /* Current CPU time measured in CPU cyles */ uint64_t time_cpu_bytes; /* Current CPU time measured in bytes */ uint64_t time; /* Current NIC TX time measured in bytes */ struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */ - - /* Scheduling loop detection */ - uint32_t pipe_loop; - uint32_t pipe_exhaustion; - - /* Bitmap */ - struct rte_bitmap *bmp; - uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16; + uint64_t cycles_per_byte; /* Grinders */ - struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS]; - uint32_t busy_grinders; struct rte_mbuf **pkts_out; uint32_t n_pkts_out; - - /* Queue base calculation */ - uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE]; - uint32_t qsize_sum; + uint32_t subport_id; /* Large data structures */ - struct rte_sched_subport *subports[0]; - struct rte_sched_subport *subport; - struct rte_sched_pipe *pipe; - struct rte_sched_queue *queue; - struct rte_sched_queue_extra *queue_extra; - struct rte_sched_pipe_profile *pipe_profiles; - uint8_t *bmp_array; - struct rte_mbuf **queue_array; - uint8_t memory[0] __rte_cache_aligned; + struct rte_sched_subport *subports[0] __rte_cache_aligned; } __rte_cache_aligned; -enum rte_sched_port_array { - e_RTE_SCHED_PORT_ARRAY_SUBPORT = 0, - e_RTE_SCHED_PORT_ARRAY_PIPE, - e_RTE_SCHED_PORT_ARRAY_QUEUE, - e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA, - e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES, - e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY, - e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY, - e_RTE_SCHED_PORT_ARRAY_TOTAL, -}; - enum rte_sched_subport_array { e_RTE_SCHED_SUBPORT_ARRAY_PIPE = 0, e_RTE_SCHED_SUBPORT_ARRAY_QUEUE, @@ -309,25 +271,12 @@ struct rte_sched_subport *subport, uint32_t qindex) static inline uint32_t rte_sched_port_queues_per_port(struct rte_sched_port *port) { - return RTE_SCHED_QUEUES_PER_PIPE * port->n_pipes_per_subport * port->n_subports_per_port; -} - -static inline struct rte_mbuf ** -rte_sched_port_qbase(struct rte_sched_port *port, uint32_t qindex) -{ - uint32_t pindex = qindex >> 4; - uint32_t qpos = qindex & 0xF; + uint32_t n_queues = 0, i; - return (port->queue_array + pindex * - port->qsize_sum + port->qsize_add[qpos]); -} - -static inline uint16_t -rte_sched_port_qsize(struct rte_sched_port *port, uint32_t qindex) -{ - uint32_t tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)]; + for (i = 0; i < port->n_subports_per_port; i++) + n_queues += rte_sched_subport_pipe_queues(port->subports[i]); - return port->qsize[tc]; + return n_queues; } static inline uint16_t @@ -356,7 +305,7 @@ rte_sched_port_tc_queue(struct rte_sched_port *port, uint32_t qindex) static int pipe_profile_check(struct rte_sched_pipe_params *params, - uint32_t rate, uint16_t *qsize) + uint64_t rate, uint16_t *qsize) { uint32_t i; @@ -569,9 +518,11 @@ rte_sched_port_log_pipe_profile(struct rte_sched_subport *subport, uint32_t i) struct rte_sched_pipe_profile *p = subport->pipe_profiles + i; RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n" - " Token bucket: period = %u, credits per period = %u, size = %u\n" - " Traffic classes: period = %u,\n" - " credits per period = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u]\n" + " Token bucket: period = %"PRIu64", credits per period = %"PRIu64", size = %"PRIu64"\n" + " Traffic classes: period = %"PRIu64",\n" + " credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64 + ", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64 + ", %"PRIu64", %"PRIu64", %"PRIu64"]\n" " Best-effort traffic class oversubscription: weight = %hhu\n" " WRR cost: [%hhu, %hhu, %hhu, %hhu]\n", i, @@ -605,7 +556,7 @@ rte_sched_port_log_pipe_profile(struct rte_sched_subport *subport, uint32_t i) } static inline uint64_t -rte_sched_time_ms_to_bytes(uint32_t time_ms, uint32_t rate) +rte_sched_time_ms_to_bytes(uint64_t time_ms, uint64_t rate) { uint64_t time = time_ms; @@ -618,7 +569,7 @@ static void rte_sched_pipe_profile_convert(struct rte_sched_subport *subport, struct rte_sched_pipe_params *src, struct rte_sched_pipe_profile *dst, - uint32_t rate) + uint64_t rate) { uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE]; uint32_t lcd1, lcd2, lcd; @@ -633,8 +584,8 @@ rte_sched_pipe_profile_convert(struct rte_sched_subport *subport, / (double) rate; double d = RTE_SCHED_TB_RATE_CONFIG_ERR; - rte_approx(tb_rate, d, - &dst->tb_credits_per_period, &dst->tb_period); + rte_approx_64(tb_rate, d, &dst->tb_credits_per_period, + &dst->tb_period); } dst->tb_size = src->tb_size; @@ -674,7 +625,7 @@ rte_sched_pipe_profile_convert(struct rte_sched_subport *subport, static void rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport, - struct rte_sched_subport_params *params, uint32_t rate) + struct rte_sched_subport_params *params, uint64_t rate) { uint32_t i; @@ -689,7 +640,7 @@ rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport, subport->pipe_tc_be_rate_max = 0; for (i = 0; i < subport->n_pipe_profiles; i++) { struct rte_sched_pipe_params *src = params->pipe_profiles + i; - uint32_t pipe_tc_be_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE]; + uint64_t pipe_tc_be_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE]; if (subport->pipe_tc_be_rate_max < pipe_tc_be_rate) subport->pipe_tc_be_rate_max = pipe_tc_be_rate; @@ -699,7 +650,7 @@ rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport, static int rte_sched_subport_check_params(struct rte_sched_subport_params *params, uint32_t n_max_pipes_per_subport, - uint32_t rate) + uint64_t rate) { uint32_t i; @@ -736,7 +687,7 @@ rte_sched_subport_check_params(struct rte_sched_subport_params *params, } for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { - uint32_t tc_rate = params->tc_rate[i]; + uint64_t tc_rate = params->tc_rate[i]; uint16_t qsize = params->qsize[i]; if ((qsize == 0 && tc_rate != 0) || @@ -902,10 +853,12 @@ rte_sched_port_config(struct rte_sched_port_params *params) cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT) / params->rate; port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte); + port->cycles_per_byte = cycles_per_byte; /* Grinders */ port->pkts_out = NULL; port->n_pkts_out = 0; + port->subport_id = 0; return port; } @@ -937,7 +890,7 @@ rte_sched_subport_free(struct rte_sched_port *port, } } - rte_bitmap_free(subport->bmp); + rte_free(subport); } void @@ -961,10 +914,14 @@ rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i) struct rte_sched_subport *s = port->subports[i]; RTE_LOG(DEBUG, SCHED, "Low level config for subport %u:\n" - " Token bucket: period = %u, credits per period = %u, size = %u\n" - " Traffic classes: period = %u\n" - " credits per period = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u]\n" - " Best effort traffic class oversubscription: wm min = %u, wm max = %u\n", + " Token bucket: period = %"PRIu64", credits per period = %"PRIu64 + ", size = %"PRIu64"\n" + " Traffic classes: period = %"PRIu64"\n" + " credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64 + ", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64 + ", %"PRIu64", %"PRIu64", %"PRIu64"]\n" + " Best effort traffic class oversubscription: wm min = %"PRIu64 + ", wm max = %"PRIu64"\n", i, /* Token bucket */ @@ -1074,7 +1031,7 @@ rte_sched_subport_config(struct rte_sched_port *port, double tb_rate = ((double) params->tb_rate) / ((double) port->rate); double d = RTE_SCHED_TB_RATE_CONFIG_ERR; - rte_approx(tb_rate, d, &s->tb_credits_per_period, &s->tb_period); + rte_approx_64(tb_rate, d, &s->tb_credits_per_period, &s->tb_period); } s->tb_size = params->tb_size; @@ -1480,8 +1437,10 @@ rte_sched_queue_read_stats(struct rte_sched_port *port, struct rte_sched_queue_stats *stats, uint16_t *qlen) { + struct rte_sched_subport *s; struct rte_sched_queue *q; struct rte_sched_queue_extra *qe; + uint32_t subport_id, subport_qmask, subport_qindex; /* Check user parameters */ if (port == NULL) { @@ -1507,8 +1466,13 @@ rte_sched_queue_read_stats(struct rte_sched_port *port, "%s: Incorrect value for parameter qlen\n", __func__); return -EINVAL; } - q = port->queue + queue_id; - qe = port->queue_extra + queue_id; + subport_qmask = port->n_pipes_per_subport_log2 + 4; + subport_id = (queue_id >> subport_qmask) & (port->n_subports_per_port - 1); + + s = port->subports[subport_id]; + subport_qindex = ((1 << subport_qmask) - 1) & queue_id; + q = s->queue + subport_qindex; + qe = s->queue_extra + subport_qindex; /* Copy queue stats and clear */ memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats)); @@ -1523,9 +1487,10 @@ rte_sched_queue_read_stats(struct rte_sched_port *port, #ifdef RTE_SCHED_DEBUG static inline int -rte_sched_port_queue_is_empty(struct rte_sched_port *port, uint32_t qindex) +rte_sched_port_queue_is_empty(struct rte_sched_subport *subport, + uint32_t qindex) { - struct rte_sched_queue *queue = port->queue + qindex; + struct rte_sched_queue *queue = subport->queue + qindex; return queue->qr == queue->qw; } @@ -1640,9 +1605,10 @@ rte_sched_port_red_drop(struct rte_sched_port *port, } static inline void -rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port, uint32_t qindex) +rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port, + struct rte_sched_subport *subport, uint32_t qindex) { - struct rte_sched_queue_extra *qe = port->queue_extra + qindex; + struct rte_sched_queue_extra *qe = subport->queue_extra + qindex; struct rte_red *red = &qe->red; rte_red_mark_queue_empty(red, port->time); @@ -1659,14 +1625,14 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus return 0; } -#define rte_sched_port_set_queue_empty_timestamp(port, qindex) +#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex) #endif /* RTE_SCHED_RED */ #ifdef RTE_SCHED_DEBUG static inline void -debug_check_queue_slab(struct rte_sched_port *port, uint32_t bmp_pos, +debug_check_queue_slab(struct rte_sched_subport *subport, uint32_t bmp_pos, uint64_t bmp_slab) { uint64_t mask; @@ -1678,7 +1644,7 @@ debug_check_queue_slab(struct rte_sched_port *port, uint32_t bmp_pos, panic = 0; for (i = 0, mask = 1; i < 64; i++, mask <<= 1) { if (mask & bmp_slab) { - if (rte_sched_port_queue_is_empty(port, bmp_pos + i)) { + if (rte_sched_port_queue_is_empty(subport, bmp_pos + i)) { printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i); panic = 1; } @@ -2000,10 +1966,10 @@ rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, #ifndef RTE_SCHED_SUBPORT_TC_OV static inline void -grinder_credits_update(struct rte_sched_port *port, uint32_t pos) +grinder_credits_update(struct rte_sched_port *port, + struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; - struct rte_sched_subport *subport = grinder->subport; + struct rte_sched_grinder *grinder = subport->grinder + pos; struct rte_sched_pipe *pipe = grinder->pipe; struct rte_sched_pipe_profile *params = grinder->pipe_params; uint64_t n_periods; @@ -2012,13 +1978,13 @@ grinder_credits_update(struct rte_sched_port *port, uint32_t pos) /* Subport TB */ n_periods = (port->time - subport->tb_time) / subport->tb_period; subport->tb_credits += n_periods * subport->tb_credits_per_period; - subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size); + subport->tb_credits = RTE_MIN(subport->tb_credits, subport->tb_size); subport->tb_time += n_periods * subport->tb_period; /* Pipe TB */ n_periods = (port->time - pipe->tb_time) / params->tb_period; pipe->tb_credits += n_periods * params->tb_credits_per_period; - pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size); + pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size); pipe->tb_time += n_periods * params->tb_period; /* Subport TCs */ @@ -2040,14 +2006,13 @@ grinder_credits_update(struct rte_sched_port *port, uint32_t pos) #else -static inline uint32_t -grinder_tc_ov_credits_update(struct rte_sched_port *port, uint32_t pos) +static inline uint64_t +grinder_tc_ov_credits_update(struct rte_sched_port *port, + struct rte_sched_subport *subport) { - struct rte_sched_grinder *grinder = port->grinder + pos; - struct rte_sched_subport *subport = grinder->subport; - uint32_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; - uint32_t tc_consumption = 0, tc_ov_consumption_max; - uint32_t tc_ov_wm = subport->tc_ov_wm; + uint64_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint64_t tc_consumption = 0, tc_ov_consumption_max; + uint64_t tc_ov_wm = subport->tc_ov_wm; uint32_t i; if (subport->tc_ov == 0) @@ -2084,10 +2049,10 @@ grinder_tc_ov_credits_update(struct rte_sched_port *port, uint32_t pos) } static inline void -grinder_credits_update(struct rte_sched_port *port, uint32_t pos) +grinder_credits_update(struct rte_sched_port *port, + struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; - struct rte_sched_subport *subport = grinder->subport; + struct rte_sched_grinder *grinder = subport->grinder + pos; struct rte_sched_pipe *pipe = grinder->pipe; struct rte_sched_pipe_profile *params = grinder->pipe_params; uint64_t n_periods; @@ -2096,18 +2061,18 @@ grinder_credits_update(struct rte_sched_port *port, uint32_t pos) /* Subport TB */ n_periods = (port->time - subport->tb_time) / subport->tb_period; subport->tb_credits += n_periods * subport->tb_credits_per_period; - subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size); + subport->tb_credits = RTE_MIN(subport->tb_credits, subport->tb_size); subport->tb_time += n_periods * subport->tb_period; /* Pipe TB */ n_periods = (port->time - pipe->tb_time) / params->tb_period; pipe->tb_credits += n_periods * params->tb_credits_per_period; - pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size); + pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size); pipe->tb_time += n_periods * params->tb_period; /* Subport TCs */ if (unlikely(port->time >= subport->tc_time)) { - subport->tc_ov_wm = grinder_tc_ov_credits_update(port, pos); + subport->tc_ov_wm = grinder_tc_ov_credits_update(port, subport); for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) subport->tc_credits[i] = subport->tc_credits_per_period[i]; @@ -2137,18 +2102,18 @@ grinder_credits_update(struct rte_sched_port *port, uint32_t pos) #ifndef RTE_SCHED_SUBPORT_TC_OV static inline int -grinder_credits_check(struct rte_sched_port *port, uint32_t pos) +grinder_credits_check(struct rte_sched_port *port, + struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; - struct rte_sched_subport *subport = grinder->subport; + struct rte_sched_grinder *grinder = subport->grinder + pos; struct rte_sched_pipe *pipe = grinder->pipe; struct rte_mbuf *pkt = grinder->pkt; uint32_t tc_index = grinder->tc_index; - uint32_t pkt_len = pkt->pkt_len + port->frame_overhead; - uint32_t subport_tb_credits = subport->tb_credits; - uint32_t subport_tc_credits = subport->tc_credits[tc_index]; - uint32_t pipe_tb_credits = pipe->tb_credits; - uint32_t pipe_tc_credits = pipe->tc_credits[tc_index]; + uint64_t pkt_len = pkt->pkt_len + port->frame_overhead; + uint64_t subport_tb_credits = subport->tb_credits; + uint64_t subport_tc_credits = subport->tc_credits[tc_index]; + uint64_t pipe_tb_credits = pipe->tb_credits; + uint64_t pipe_tc_credits = pipe->tc_credits[tc_index]; int enough_credits; /* Check queue credits */ @@ -2172,28 +2137,29 @@ grinder_credits_check(struct rte_sched_port *port, uint32_t pos) #else static inline int -grinder_credits_check(struct rte_sched_port *port, uint32_t pos) +grinder_credits_check(struct rte_sched_port *port, + struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; - struct rte_sched_subport *subport = grinder->subport; + struct rte_sched_grinder *grinder = subport->grinder + pos; struct rte_sched_pipe *pipe = grinder->pipe; struct rte_mbuf *pkt = grinder->pkt; uint32_t tc_index = grinder->tc_index; - uint32_t pkt_len = pkt->pkt_len + port->frame_overhead; - uint32_t subport_tb_credits = subport->tb_credits; - uint32_t subport_tc_credits = subport->tc_credits[tc_index]; - uint32_t pipe_tb_credits = pipe->tb_credits; - uint32_t pipe_tc_credits = pipe->tc_credits[tc_index]; - uint32_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; - uint32_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0}; - uint32_t pipe_tc_ov_credits, i; + uint64_t pkt_len = pkt->pkt_len + port->frame_overhead; + uint64_t subport_tb_credits = subport->tb_credits; + uint64_t subport_tc_credits = subport->tc_credits[tc_index]; + uint64_t pipe_tb_credits = pipe->tb_credits; + uint64_t pipe_tc_credits = pipe->tc_credits[tc_index]; + uint64_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint64_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0}; + uint64_t pipe_tc_ov_credits; + uint32_t i; int enough_credits; for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) - pipe_tc_ov_mask1[i] = UINT32_MAX; + pipe_tc_ov_mask1[i] = ~0LLU; pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe->tc_ov_credits; - pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = UINT32_MAX; + pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = ~0LLU; pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index]; /* Check pipe and subport credits */ @@ -2220,15 +2186,16 @@ grinder_credits_check(struct rte_sched_port *port, uint32_t pos) static inline int -grinder_schedule(struct rte_sched_port *port, uint32_t pos) +grinder_schedule(struct rte_sched_port *port, + struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; struct rte_sched_queue *queue = grinder->queue[grinder->qpos]; struct rte_mbuf *pkt = grinder->pkt; uint32_t pkt_len = pkt->pkt_len + port->frame_overhead; uint32_t be_tc_active; - if (!grinder_credits_check(port, pos)) + if (!grinder_credits_check(port, subport, pos)) return 0; /* Advance port time */ @@ -2245,15 +2212,15 @@ grinder_schedule(struct rte_sched_port *port, uint32_t pos) if (queue->qr == queue->qw) { uint32_t qindex = grinder->qindex[grinder->qpos]; - rte_bitmap_clear(port->bmp, qindex); + rte_bitmap_clear(subport->bmp, qindex); grinder->qmask &= ~(1 << grinder->qpos); if (be_tc_active) grinder->wrr_mask[grinder->qpos] = 0; - rte_sched_port_set_queue_empty_timestamp(port, qindex); + rte_sched_port_set_queue_empty_timestamp(port, subport, qindex); } /* Reset pipe loop detection */ - port->pipe_loop = RTE_SCHED_PIPE_INVALID; + subport->pipe_loop = RTE_SCHED_PIPE_INVALID; grinder->productive = 1; return 1; @@ -2262,13 +2229,13 @@ grinder_schedule(struct rte_sched_port *port, uint32_t pos) #ifdef SCHED_VECTOR_SSE4 static inline int -grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe) +grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe) { __m128i index = _mm_set1_epi32(base_pipe); - __m128i pipes = _mm_load_si128((__m128i *)port->grinder_base_bmp_pos); + __m128i pipes = _mm_load_si128((__m128i *)subport->grinder_base_bmp_pos); __m128i res = _mm_cmpeq_epi32(pipes, index); - pipes = _mm_load_si128((__m128i *)(port->grinder_base_bmp_pos + 4)); + pipes = _mm_load_si128((__m128i *)(subport->grinder_base_bmp_pos + 4)); pipes = _mm_cmpeq_epi32(pipes, index); res = _mm_or_si128(res, pipes); @@ -2281,10 +2248,10 @@ grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe) #elif defined(SCHED_VECTOR_NEON) static inline int -grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe) +grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe) { uint32x4_t index, pipes; - uint32_t *pos = (uint32_t *)port->grinder_base_bmp_pos; + uint32_t *pos = (uint32_t *)subport->grinder_base_bmp_pos; index = vmovq_n_u32(base_pipe); pipes = vld1q_u32(pos); @@ -2301,12 +2268,12 @@ grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe) #else static inline int -grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe) +grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe) { uint32_t i; for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) { - if (port->grinder_base_bmp_pos[i] == base_pipe) + if (subport->grinder_base_bmp_pos[i] == base_pipe) return 1; } @@ -2316,9 +2283,10 @@ grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe) #endif /* RTE_SCHED_OPTIMIZATIONS */ static inline void -grinder_pcache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab) +grinder_pcache_populate(struct rte_sched_subport *subport, + uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; uint16_t w[4]; grinder->pcache_w = 0; @@ -2347,9 +2315,10 @@ grinder_pcache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t bmp_ } static inline void -grinder_tccache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t qindex, uint16_t qmask) +grinder_tccache_populate(struct rte_sched_subport *subport, + uint32_t pos, uint32_t qindex, uint16_t qmask) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; uint8_t b, i; grinder->tccache_w = 0; @@ -2370,9 +2339,10 @@ grinder_tccache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t qin } static inline int -grinder_next_tc(struct rte_sched_port *port, uint32_t pos) +grinder_next_tc(struct rte_sched_port *port, + struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; struct rte_mbuf **qbase; uint32_t qindex; uint16_t qsize; @@ -2381,15 +2351,15 @@ grinder_next_tc(struct rte_sched_port *port, uint32_t pos) return 0; qindex = grinder->tccache_qindex[grinder->tccache_r]; - qbase = rte_sched_port_qbase(port, qindex); - qsize = rte_sched_port_qsize(port, qindex); + qbase = rte_sched_subport_pipe_qbase(subport, qindex); + qsize = rte_sched_subport_pipe_qsize(port, subport, qindex); grinder->tc_index = rte_sched_port_pipe_tc(port, qindex); grinder->qmask = grinder->tccache_qmask[grinder->tccache_r]; grinder->qsize = qsize; if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) { - grinder->queue[0] = port->queue + qindex; + grinder->queue[0] = subport->queue + qindex; grinder->qbase[0] = qbase; grinder->qindex[0] = qindex; grinder->tccache_r++; @@ -2397,10 +2367,10 @@ grinder_next_tc(struct rte_sched_port *port, uint32_t pos) return 1; } - grinder->queue[0] = port->queue + qindex; - grinder->queue[1] = port->queue + qindex + 1; - grinder->queue[2] = port->queue + qindex + 2; - grinder->queue[3] = port->queue + qindex + 3; + grinder->queue[0] = subport->queue + qindex; + grinder->queue[1] = subport->queue + qindex + 1; + grinder->queue[2] = subport->queue + qindex + 2; + grinder->queue[3] = subport->queue + qindex + 3; grinder->qbase[0] = qbase; grinder->qbase[1] = qbase + qsize; @@ -2417,9 +2387,10 @@ grinder_next_tc(struct rte_sched_port *port, uint32_t pos) } static inline int -grinder_next_pipe(struct rte_sched_port *port, uint32_t pos) +grinder_next_pipe(struct rte_sched_port *port, + struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; uint32_t pipe_qindex; uint16_t pipe_qmask; @@ -2432,22 +2403,22 @@ grinder_next_pipe(struct rte_sched_port *port, uint32_t pos) uint32_t bmp_pos = 0; /* Get another non-empty pipe group */ - if (unlikely(rte_bitmap_scan(port->bmp, &bmp_pos, &bmp_slab) <= 0)) + if (unlikely(rte_bitmap_scan(subport->bmp, &bmp_pos, &bmp_slab) <= 0)) return 0; #ifdef RTE_SCHED_DEBUG - debug_check_queue_slab(port, bmp_pos, bmp_slab); + debug_check_queue_slab(subport, bmp_pos, bmp_slab); #endif /* Return if pipe group already in one of the other grinders */ - port->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID; - if (unlikely(grinder_pipe_exists(port, bmp_pos))) + subport->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID; + if (unlikely(grinder_pipe_exists(subport, bmp_pos))) return 0; - port->grinder_base_bmp_pos[pos] = bmp_pos; + subport->grinder_base_bmp_pos[pos] = bmp_pos; /* Install new pipe group into grinder's pipe cache */ - grinder_pcache_populate(port, pos, bmp_pos, bmp_slab); + grinder_pcache_populate(subport, pos, bmp_pos, bmp_slab); pipe_qmask = grinder->pcache_qmask[0]; pipe_qindex = grinder->pcache_qindex[0]; @@ -2456,18 +2427,18 @@ grinder_next_pipe(struct rte_sched_port *port, uint32_t pos) /* Install new pipe in the grinder */ grinder->pindex = pipe_qindex >> 4; - grinder->subport = port->subport + (grinder->pindex / port->n_pipes_per_subport); - grinder->pipe = port->pipe + grinder->pindex; + grinder->subport = subport; + grinder->pipe = subport->pipe + grinder->pindex; grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */ grinder->productive = 0; - grinder_tccache_populate(port, pos, pipe_qindex, pipe_qmask); - grinder_next_tc(port, pos); + grinder_tccache_populate(subport, pos, pipe_qindex, pipe_qmask); + grinder_next_tc(port, subport, pos); /* Check for pipe exhaustion */ - if (grinder->pindex == port->pipe_loop) { - port->pipe_exhaustion = 1; - port->pipe_loop = RTE_SCHED_PIPE_INVALID; + if (grinder->pindex == subport->pipe_loop) { + subport->pipe_exhaustion = 1; + subport->pipe_loop = RTE_SCHED_PIPE_INVALID; } return 1; @@ -2475,9 +2446,9 @@ grinder_next_pipe(struct rte_sched_port *port, uint32_t pos) static inline void -grinder_wrr_load(struct rte_sched_port *port, uint32_t pos) +grinder_wrr_load(struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; struct rte_sched_pipe *pipe = grinder->pipe; struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params; uint32_t qmask = grinder->qmask; @@ -2503,9 +2474,9 @@ grinder_wrr_load(struct rte_sched_port *port, uint32_t pos) } static inline void -grinder_wrr_store(struct rte_sched_port *port, uint32_t pos) +grinder_wrr_store(struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; struct rte_sched_pipe *pipe = grinder->pipe; pipe->wrr_tokens[0] = @@ -2523,9 +2494,9 @@ grinder_wrr_store(struct rte_sched_port *port, uint32_t pos) } static inline void -grinder_wrr(struct rte_sched_port *port, uint32_t pos) +grinder_wrr(struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; uint16_t wrr_tokens_min; grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0]; @@ -2543,21 +2514,21 @@ grinder_wrr(struct rte_sched_port *port, uint32_t pos) } -#define grinder_evict(port, pos) +#define grinder_evict(subport, pos) static inline void -grinder_prefetch_pipe(struct rte_sched_port *port, uint32_t pos) +grinder_prefetch_pipe(struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; rte_prefetch0(grinder->pipe); rte_prefetch0(grinder->queue[0]); } static inline void -grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos) +grinder_prefetch_tc_queue_arrays(struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; uint16_t qsize, qr[RTE_SCHED_MAX_QUEUES_PER_TC]; qsize = grinder->qsize; @@ -2578,17 +2549,17 @@ grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos) rte_prefetch0(grinder->qbase[0] + qr[0]); rte_prefetch0(grinder->qbase[1] + qr[1]); - grinder_wrr_load(port, pos); - grinder_wrr(port, pos); + grinder_wrr_load(subport, pos); + grinder_wrr(subport, pos); rte_prefetch0(grinder->qbase[2] + qr[2]); rte_prefetch0(grinder->qbase[3] + qr[3]); } static inline void -grinder_prefetch_mbuf(struct rte_sched_port *port, uint32_t pos) +grinder_prefetch_mbuf(struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; uint32_t qpos = grinder->qpos; struct rte_mbuf **qbase = grinder->qbase[qpos]; uint16_t qsize = grinder->qsize; @@ -2605,16 +2576,17 @@ grinder_prefetch_mbuf(struct rte_sched_port *port, uint32_t pos) } static inline uint32_t -grinder_handle(struct rte_sched_port *port, uint32_t pos) +grinder_handle(struct rte_sched_port *port, + struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; switch (grinder->state) { case e_GRINDER_PREFETCH_PIPE: { - if (grinder_next_pipe(port, pos)) { - grinder_prefetch_pipe(port, pos); - port->busy_grinders++; + if (grinder_next_pipe(port, subport, pos)) { + grinder_prefetch_pipe(subport, pos); + subport->busy_grinders++; grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS; return 0; @@ -2627,9 +2599,9 @@ grinder_handle(struct rte_sched_port *port, uint32_t pos) { struct rte_sched_pipe *pipe = grinder->pipe; - grinder->pipe_params = port->pipe_profiles + pipe->profile; - grinder_prefetch_tc_queue_arrays(port, pos); - grinder_credits_update(port, pos); + grinder->pipe_params = subport->pipe_profiles + pipe->profile; + grinder_prefetch_tc_queue_arrays(subport, pos); + grinder_credits_update(port, subport, pos); grinder->state = e_GRINDER_PREFETCH_MBUF; return 0; @@ -2637,7 +2609,7 @@ grinder_handle(struct rte_sched_port *port, uint32_t pos) case e_GRINDER_PREFETCH_MBUF: { - grinder_prefetch_mbuf(port, pos); + grinder_prefetch_mbuf(subport, pos); grinder->state = e_GRINDER_READ_MBUF; return 0; @@ -2647,47 +2619,47 @@ grinder_handle(struct rte_sched_port *port, uint32_t pos) { uint32_t wrr_active, result = 0; - result = grinder_schedule(port, pos); + result = grinder_schedule(port, subport, pos); wrr_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE); /* Look for next packet within the same TC */ if (result && grinder->qmask) { if (wrr_active) - grinder_wrr(port, pos); + grinder_wrr(subport, pos); - grinder_prefetch_mbuf(port, pos); + grinder_prefetch_mbuf(subport, pos); return 1; } if (wrr_active) - grinder_wrr_store(port, pos); + grinder_wrr_store(subport, pos); /* Look for another active TC within same pipe */ - if (grinder_next_tc(port, pos)) { - grinder_prefetch_tc_queue_arrays(port, pos); + if (grinder_next_tc(port, subport, pos)) { + grinder_prefetch_tc_queue_arrays(subport, pos); grinder->state = e_GRINDER_PREFETCH_MBUF; return result; } if (grinder->productive == 0 && - port->pipe_loop == RTE_SCHED_PIPE_INVALID) - port->pipe_loop = grinder->pindex; + subport->pipe_loop == RTE_SCHED_PIPE_INVALID) + subport->pipe_loop = grinder->pindex; - grinder_evict(port, pos); + grinder_evict(subport, pos); /* Look for another active pipe */ - if (grinder_next_pipe(port, pos)) { - grinder_prefetch_pipe(port, pos); + if (grinder_next_pipe(port, subport, pos)) { + grinder_prefetch_pipe(subport, pos); grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS; return result; } /* No active pipe found */ - port->busy_grinders--; + subport->busy_grinders--; grinder->state = e_GRINDER_PREFETCH_PIPE; return result; @@ -2703,34 +2675,41 @@ static inline void rte_sched_port_time_resync(struct rte_sched_port *port) { uint64_t cycles = rte_get_tsc_cycles(); - uint64_t cycles_diff = cycles - port->time_cpu_cycles; + uint64_t cycles_diff; uint64_t bytes_diff; + uint32_t i; + + if (cycles < port->time_cpu_cycles) + port->time_cpu_cycles = 0; + cycles_diff = cycles - port->time_cpu_cycles; /* Compute elapsed time in bytes */ bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT, port->inv_cycles_per_byte); /* Advance port time */ - port->time_cpu_cycles = cycles; + port->time_cpu_cycles += + (bytes_diff * port->cycles_per_byte) >> RTE_SCHED_TIME_SHIFT; port->time_cpu_bytes += bytes_diff; if (port->time < port->time_cpu_bytes) port->time = port->time_cpu_bytes; /* Reset pipe loop detection */ - port->pipe_loop = RTE_SCHED_PIPE_INVALID; + for (i = 0; i < port->n_subports_per_port; i++) + port->subports[i]->pipe_loop = RTE_SCHED_PIPE_INVALID; } static inline int -rte_sched_port_exceptions(struct rte_sched_port *port, int second_pass) +rte_sched_port_exceptions(struct rte_sched_subport *subport, int second_pass) { int exceptions; /* Check if any exception flag is set */ - exceptions = (second_pass && port->busy_grinders == 0) || - (port->pipe_exhaustion == 1); + exceptions = (second_pass && subport->busy_grinders == 0) || + (subport->pipe_exhaustion == 1); /* Clear exception flags */ - port->pipe_exhaustion = 0; + subport->pipe_exhaustion = 0; return exceptions; } @@ -2738,7 +2717,9 @@ rte_sched_port_exceptions(struct rte_sched_port *port, int second_pass) int rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts) { - uint32_t i, count; + struct rte_sched_subport *subport; + uint32_t subport_id = port->subport_id; + uint32_t i, n_subports = 0, count; port->pkts_out = pkts; port->n_pkts_out = 0; @@ -2747,9 +2728,32 @@ rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint /* Take each queue in the grinder one step further */ for (i = 0, count = 0; ; i++) { - count += grinder_handle(port, i & (RTE_SCHED_PORT_N_GRINDERS - 1)); - if ((count == n_pkts) || - rte_sched_port_exceptions(port, i >= RTE_SCHED_PORT_N_GRINDERS)) { + subport = port->subports[subport_id]; + + count += grinder_handle(port, subport, + i & (RTE_SCHED_PORT_N_GRINDERS - 1)); + + if (count == n_pkts) { + subport_id++; + + if (subport_id == port->n_subports_per_port) + subport_id = 0; + + port->subport_id = subport_id; + break; + } + + if (rte_sched_port_exceptions(subport, i >= RTE_SCHED_PORT_N_GRINDERS)) { + i = 0; + subport_id++; + n_subports++; + } + + if (subport_id == port->n_subports_per_port) + subport_id = 0; + + if (n_subports == port->n_subports_per_port) { + port->subport_id = subport_id; break; } }