#define RTE_SCHED_TB_RATE_CONFIG_ERR (1e-7)
#define RTE_SCHED_WRR_SHIFT 3
+#define RTE_SCHED_MAX_QUEUES_PER_TC RTE_SCHED_BE_QUEUES_PER_PIPE
#define RTE_SCHED_GRINDER_PCACHE_SIZE (64 / RTE_SCHED_QUEUES_PER_PIPE)
#define RTE_SCHED_PIPE_INVALID UINT32_MAX
#define RTE_SCHED_BMP_POS_INVALID UINT32_MAX
uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
uint8_t tc_ov_weight;
- /* Pipe queues */
- uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_PIPE];
+ /* Pipe best-effort traffic class queues */
+ uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
};
struct rte_sched_pipe {
uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
/* Weighted Round Robin (WRR) */
- uint8_t wrr_tokens[RTE_SCHED_QUEUES_PER_PIPE];
+ uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
/* TC oversubscription */
uint32_t tc_ov_credits;
uint32_t tc_index;
struct rte_sched_queue *queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
struct rte_mbuf **qbase[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
- uint32_t qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC];
uint16_t qsize;
uint32_t qmask;
uint32_t qpos;
struct rte_mbuf *pkt;
/* WRR */
- uint16_t wrr_tokens[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
- uint16_t wrr_mask[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
- uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+ uint16_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
+ uint16_t wrr_mask[RTE_SCHED_BE_QUEUES_PER_PIPE];
+ uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
};
struct rte_sched_port {
" Token bucket: period = %u, credits per period = %u, size = %u\n"
" Traffic classes: period = %u, credits per period = [%u, %u, %u, %u]\n"
" Traffic class 3 oversubscription: weight = %hhu\n"
- " WRR cost: [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu]\n",
+ " WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
i,
/* Token bucket */
p->tc_ov_weight,
/* WRR */
- p->wrr_cost[ 0], p->wrr_cost[ 1], p->wrr_cost[ 2], p->wrr_cost[ 3],
- p->wrr_cost[ 4], p->wrr_cost[ 5], p->wrr_cost[ 6], p->wrr_cost[ 7],
- p->wrr_cost[ 8], p->wrr_cost[ 9], p->wrr_cost[10], p->wrr_cost[11],
- p->wrr_cost[12], p->wrr_cost[13], p->wrr_cost[14], p->wrr_cost[15]);
+ p->wrr_cost[0], p->wrr_cost[1], p->wrr_cost[2], p->wrr_cost[3]);
}
static inline uint64_t
struct rte_sched_pipe_profile *dst,
uint32_t rate)
{
+ uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
+ uint32_t lcd1, lcd2, lcd;
uint32_t i;
/* Token Bucket */
dst->tc_ov_weight = src->tc_ov_weight;
#endif
- /* WRR */
- for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
- uint32_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
- uint32_t lcd, lcd1, lcd2;
- uint32_t qindex;
-
- qindex = i * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
-
- wrr_cost[0] = src->wrr_weights[qindex];
- wrr_cost[1] = src->wrr_weights[qindex + 1];
- wrr_cost[2] = src->wrr_weights[qindex + 2];
- wrr_cost[3] = src->wrr_weights[qindex + 3];
-
- lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
- lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
- lcd = rte_get_lcd(lcd1, lcd2);
-
- wrr_cost[0] = lcd / wrr_cost[0];
- wrr_cost[1] = lcd / wrr_cost[1];
- wrr_cost[2] = lcd / wrr_cost[2];
- wrr_cost[3] = lcd / wrr_cost[3];
-
- dst->wrr_cost[qindex] = (uint8_t) wrr_cost[0];
- dst->wrr_cost[qindex + 1] = (uint8_t) wrr_cost[1];
- dst->wrr_cost[qindex + 2] = (uint8_t) wrr_cost[2];
- dst->wrr_cost[qindex + 3] = (uint8_t) wrr_cost[3];
- }
+ /* WRR queues */
+ wrr_cost[0] = src->wrr_weights[0];
+ wrr_cost[1] = src->wrr_weights[1];
+ wrr_cost[2] = src->wrr_weights[2];
+ wrr_cost[3] = src->wrr_weights[3];
+
+ lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
+ lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
+ lcd = rte_get_lcd(lcd1, lcd2);
+
+ wrr_cost[0] = lcd / wrr_cost[0];
+ wrr_cost[1] = lcd / wrr_cost[1];
+ wrr_cost[2] = lcd / wrr_cost[2];
+ wrr_cost[3] = lcd / wrr_cost[3];
+
+ dst->wrr_cost[0] = (uint8_t) wrr_cost[0];
+ dst->wrr_cost[1] = (uint8_t) wrr_cost[1];
+ dst->wrr_cost[2] = (uint8_t) wrr_cost[2];
+ dst->wrr_cost[3] = (uint8_t) wrr_cost[3];
}
static void
struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
struct rte_mbuf *pkt = grinder->pkt;
uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
+ uint32_t be_tc_active;
if (!grinder_credits_check(port, pos))
return 0;
/* Send packet */
port->pkts_out[port->n_pkts_out++] = pkt;
queue->qr++;
- grinder->wrr_tokens[grinder->qpos] += pkt_len * grinder->wrr_cost[grinder->qpos];
+
+ be_tc_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE) ? ~0x0 : 0x0;
+ grinder->wrr_tokens[grinder->qpos] +=
+ (pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
+
if (queue->qr == queue->qw) {
uint32_t qindex = grinder->qindex[grinder->qpos];
rte_bitmap_clear(port->bmp, qindex);
grinder->qmask &= ~(1 << grinder->qpos);
- grinder->wrr_mask[grinder->qpos] = 0;
+ if (be_tc_active)
+ grinder->wrr_mask[grinder->qpos] = 0;
rte_sched_port_set_queue_empty_timestamp(port, qindex);
}
struct rte_sched_grinder *grinder = port->grinder + pos;
struct rte_sched_pipe *pipe = grinder->pipe;
struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params;
- uint32_t tc_index = grinder->tc_index;
uint32_t qmask = grinder->qmask;
- uint32_t qindex;
-
- qindex = tc_index * 4;
- grinder->wrr_tokens[0] = ((uint16_t) pipe->wrr_tokens[qindex]) << RTE_SCHED_WRR_SHIFT;
- grinder->wrr_tokens[1] = ((uint16_t) pipe->wrr_tokens[qindex + 1]) << RTE_SCHED_WRR_SHIFT;
- grinder->wrr_tokens[2] = ((uint16_t) pipe->wrr_tokens[qindex + 2]) << RTE_SCHED_WRR_SHIFT;
- grinder->wrr_tokens[3] = ((uint16_t) pipe->wrr_tokens[qindex + 3]) << RTE_SCHED_WRR_SHIFT;
+ grinder->wrr_tokens[0] =
+ ((uint16_t) pipe->wrr_tokens[0]) << RTE_SCHED_WRR_SHIFT;
+ grinder->wrr_tokens[1] =
+ ((uint16_t) pipe->wrr_tokens[1]) << RTE_SCHED_WRR_SHIFT;
+ grinder->wrr_tokens[2] =
+ ((uint16_t) pipe->wrr_tokens[2]) << RTE_SCHED_WRR_SHIFT;
+ grinder->wrr_tokens[3] =
+ ((uint16_t) pipe->wrr_tokens[3]) << RTE_SCHED_WRR_SHIFT;
grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
- grinder->wrr_cost[0] = pipe_params->wrr_cost[qindex];
- grinder->wrr_cost[1] = pipe_params->wrr_cost[qindex + 1];
- grinder->wrr_cost[2] = pipe_params->wrr_cost[qindex + 2];
- grinder->wrr_cost[3] = pipe_params->wrr_cost[qindex + 3];
+ grinder->wrr_cost[0] = pipe_params->wrr_cost[0];
+ grinder->wrr_cost[1] = pipe_params->wrr_cost[1];
+ grinder->wrr_cost[2] = pipe_params->wrr_cost[2];
+ grinder->wrr_cost[3] = pipe_params->wrr_cost[3];
}
static inline void
{
struct rte_sched_grinder *grinder = port->grinder + pos;
struct rte_sched_pipe *pipe = grinder->pipe;
- uint32_t tc_index = grinder->tc_index;
- uint32_t qindex;
- qindex = tc_index * 4;
-
- pipe->wrr_tokens[qindex] = (grinder->wrr_tokens[0] & grinder->wrr_mask[0])
- >> RTE_SCHED_WRR_SHIFT;
- pipe->wrr_tokens[qindex + 1] = (grinder->wrr_tokens[1] & grinder->wrr_mask[1])
- >> RTE_SCHED_WRR_SHIFT;
- pipe->wrr_tokens[qindex + 2] = (grinder->wrr_tokens[2] & grinder->wrr_mask[2])
- >> RTE_SCHED_WRR_SHIFT;
- pipe->wrr_tokens[qindex + 3] = (grinder->wrr_tokens[3] & grinder->wrr_mask[3])
- >> RTE_SCHED_WRR_SHIFT;
+ pipe->wrr_tokens[0] =
+ (grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >>
+ RTE_SCHED_WRR_SHIFT;
+ pipe->wrr_tokens[1] =
+ (grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >>
+ RTE_SCHED_WRR_SHIFT;
+ pipe->wrr_tokens[2] =
+ (grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >>
+ RTE_SCHED_WRR_SHIFT;
+ pipe->wrr_tokens[3] =
+ (grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >>
+ RTE_SCHED_WRR_SHIFT;
}
static inline void
grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos)
{
struct rte_sched_grinder *grinder = port->grinder + pos;
- uint16_t qsize, qr[4];
+ uint16_t qsize, qr[RTE_SCHED_MAX_QUEUES_PER_TC];
qsize = grinder->qsize;
+ grinder->qpos = 0;
+
+ if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
+ qr[0] = grinder->queue[0]->qr & (qsize - 1);
+
+ rte_prefetch0(grinder->qbase[0] + qr[0]);
+ return;
+ }
+
qr[0] = grinder->queue[0]->qr & (qsize - 1);
qr[1] = grinder->queue[1]->qr & (qsize - 1);
qr[2] = grinder->queue[2]->qr & (qsize - 1);
case e_GRINDER_READ_MBUF:
{
- uint32_t result = 0;
+ uint32_t wrr_active, result = 0;
result = grinder_schedule(port, pos);
+ wrr_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE);
+
/* Look for next packet within the same TC */
if (result && grinder->qmask) {
- grinder_wrr(port, pos);
+ if (wrr_active)
+ grinder_wrr(port, pos);
+
grinder_prefetch_mbuf(port, pos);
return 1;
}
- grinder_wrr_store(port, pos);
+
+ if (wrr_active)
+ grinder_wrr_store(port, pos);
/* Look for another active TC within same pipe */
if (grinder_next_tc(port, pos)) {