sched: remove redundant code
[dpdk.git] / lib / librte_sched / rte_sched.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <string.h>
7
8 #include <rte_common.h>
9 #include <rte_log.h>
10 #include <rte_memory.h>
11 #include <rte_malloc.h>
12 #include <rte_cycles.h>
13 #include <rte_prefetch.h>
14 #include <rte_branch_prediction.h>
15 #include <rte_mbuf.h>
16 #include <rte_bitmap.h>
17 #include <rte_reciprocal.h>
18
19 #include "rte_sched.h"
20 #include "rte_sched_common.h"
21 #include "rte_approx.h"
22
23 #ifdef __INTEL_COMPILER
24 #pragma warning(disable:2259) /* conversion may lose significant bits */
25 #endif
26
27 #ifdef RTE_SCHED_VECTOR
28 #include <rte_vect.h>
29
30 #ifdef RTE_ARCH_X86
31 #define SCHED_VECTOR_SSE4
32 #elif defined(RTE_MACHINE_CPUFLAG_NEON)
33 #define SCHED_VECTOR_NEON
34 #endif
35
36 #endif
37
38 #define RTE_SCHED_TB_RATE_CONFIG_ERR          (1e-7)
39 #define RTE_SCHED_WRR_SHIFT                   3
40 #define RTE_SCHED_MAX_QUEUES_PER_TC           RTE_SCHED_BE_QUEUES_PER_PIPE
41 #define RTE_SCHED_GRINDER_PCACHE_SIZE         (64 / RTE_SCHED_QUEUES_PER_PIPE)
42 #define RTE_SCHED_PIPE_INVALID                UINT32_MAX
43 #define RTE_SCHED_BMP_POS_INVALID             UINT32_MAX
44
45 /* Scaling for cycles_per_byte calculation
46  * Chosen so that minimum rate is 480 bit/sec
47  */
48 #define RTE_SCHED_TIME_SHIFT                  8
49
50 struct rte_sched_pipe_profile {
51         /* Token bucket (TB) */
52         uint32_t tb_period;
53         uint32_t tb_credits_per_period;
54         uint32_t tb_size;
55
56         /* Pipe traffic classes */
57         uint32_t tc_period;
58         uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
59         uint8_t tc_ov_weight;
60
61         /* Pipe best-effort traffic class queues */
62         uint8_t  wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
63 };
64
65 struct rte_sched_pipe {
66         /* Token bucket (TB) */
67         uint64_t tb_time; /* time of last update */
68         uint32_t tb_credits;
69
70         /* Pipe profile and flags */
71         uint32_t profile;
72
73         /* Traffic classes (TCs) */
74         uint64_t tc_time; /* time of next update */
75         uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
76
77         /* Weighted Round Robin (WRR) */
78         uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
79
80         /* TC oversubscription */
81         uint32_t tc_ov_credits;
82         uint8_t tc_ov_period_id;
83 } __rte_cache_aligned;
84
85 struct rte_sched_queue {
86         uint16_t qw;
87         uint16_t qr;
88 };
89
90 struct rte_sched_queue_extra {
91         struct rte_sched_queue_stats stats;
92 #ifdef RTE_SCHED_RED
93         struct rte_red red;
94 #endif
95 };
96
97 enum grinder_state {
98         e_GRINDER_PREFETCH_PIPE = 0,
99         e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS,
100         e_GRINDER_PREFETCH_MBUF,
101         e_GRINDER_READ_MBUF
102 };
103
104 struct rte_sched_grinder {
105         /* Pipe cache */
106         uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE];
107         uint32_t pcache_qindex[RTE_SCHED_GRINDER_PCACHE_SIZE];
108         uint32_t pcache_w;
109         uint32_t pcache_r;
110
111         /* Current pipe */
112         enum grinder_state state;
113         uint32_t productive;
114         uint32_t pindex;
115         struct rte_sched_subport *subport;
116         struct rte_sched_pipe *pipe;
117         struct rte_sched_pipe_profile *pipe_params;
118
119         /* TC cache */
120         uint8_t tccache_qmask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
121         uint32_t tccache_qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
122         uint32_t tccache_w;
123         uint32_t tccache_r;
124
125         /* Current TC */
126         uint32_t tc_index;
127         struct rte_sched_queue *queue[RTE_SCHED_MAX_QUEUES_PER_TC];
128         struct rte_mbuf **qbase[RTE_SCHED_MAX_QUEUES_PER_TC];
129         uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC];
130         uint16_t qsize;
131         uint32_t qmask;
132         uint32_t qpos;
133         struct rte_mbuf *pkt;
134
135         /* WRR */
136         uint16_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
137         uint16_t wrr_mask[RTE_SCHED_BE_QUEUES_PER_PIPE];
138         uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
139 };
140
141 struct rte_sched_subport {
142         /* Token bucket (TB) */
143         uint64_t tb_time; /* time of last update */
144         uint32_t tb_period;
145         uint32_t tb_credits_per_period;
146         uint32_t tb_size;
147         uint32_t tb_credits;
148
149         /* Traffic classes (TCs) */
150         uint64_t tc_time; /* time of next update */
151         uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
152         uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
153         uint32_t tc_period;
154
155         /* TC oversubscription */
156         uint32_t tc_ov_wm;
157         uint32_t tc_ov_wm_min;
158         uint32_t tc_ov_wm_max;
159         uint8_t tc_ov_period_id;
160         uint8_t tc_ov;
161         uint32_t tc_ov_n;
162         double tc_ov_rate;
163
164         /* Statistics */
165         struct rte_sched_subport_stats stats;
166
167         /* Subport pipes */
168         uint32_t n_pipes_per_subport_enabled;
169         uint32_t n_pipe_profiles;
170         uint32_t n_max_pipe_profiles;
171
172         /* Pipe best-effort TC rate */
173         uint32_t pipe_tc_be_rate_max;
174
175         /* Pipe queues size */
176         uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
177
178 #ifdef RTE_SCHED_RED
179         struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
180 #endif
181
182         /* Scheduling loop detection */
183         uint32_t pipe_loop;
184         uint32_t pipe_exhaustion;
185
186         /* Bitmap */
187         struct rte_bitmap *bmp;
188         uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16;
189
190         /* Grinders */
191         struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS];
192         uint32_t busy_grinders;
193
194         /* Queue base calculation */
195         uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE];
196         uint32_t qsize_sum;
197
198         struct rte_sched_pipe *pipe;
199         struct rte_sched_queue *queue;
200         struct rte_sched_queue_extra *queue_extra;
201         struct rte_sched_pipe_profile *pipe_profiles;
202         uint8_t *bmp_array;
203         struct rte_mbuf **queue_array;
204         uint8_t memory[0] __rte_cache_aligned;
205 } __rte_cache_aligned;
206
207 struct rte_sched_port {
208         /* User parameters */
209         uint32_t n_subports_per_port;
210         uint32_t n_pipes_per_subport;
211         uint32_t n_pipes_per_subport_log2;
212         uint16_t pipe_queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
213         uint8_t pipe_tc[RTE_SCHED_QUEUES_PER_PIPE];
214         uint8_t tc_queue[RTE_SCHED_QUEUES_PER_PIPE];
215         uint32_t rate;
216         uint32_t mtu;
217         uint32_t frame_overhead;
218         int socket;
219
220         /* Timing */
221         uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cyles */
222         uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
223         uint64_t time;                /* Current NIC TX time measured in bytes */
224         struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
225
226         /* Grinders */
227         struct rte_mbuf **pkts_out;
228         uint32_t n_pkts_out;
229         uint32_t subport_id;
230
231         /* Large data structures */
232         struct rte_sched_subport *subports[0] __rte_cache_aligned;
233 } __rte_cache_aligned;
234
235 enum rte_sched_subport_array {
236         e_RTE_SCHED_SUBPORT_ARRAY_PIPE = 0,
237         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE,
238         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA,
239         e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES,
240         e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY,
241         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY,
242         e_RTE_SCHED_SUBPORT_ARRAY_TOTAL,
243 };
244
245 static inline uint32_t
246 rte_sched_subport_pipe_queues(struct rte_sched_subport *subport)
247 {
248         return RTE_SCHED_QUEUES_PER_PIPE * subport->n_pipes_per_subport_enabled;
249 }
250
251 static inline struct rte_mbuf **
252 rte_sched_subport_pipe_qbase(struct rte_sched_subport *subport, uint32_t qindex)
253 {
254         uint32_t pindex = qindex >> 4;
255         uint32_t qpos = qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1);
256
257         return (subport->queue_array + pindex *
258                 subport->qsize_sum + subport->qsize_add[qpos]);
259 }
260
261 static inline uint16_t
262 rte_sched_subport_pipe_qsize(struct rte_sched_port *port,
263 struct rte_sched_subport *subport, uint32_t qindex)
264 {
265         uint32_t tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
266
267         return subport->qsize[tc];
268 }
269
270 static inline uint32_t
271 rte_sched_port_queues_per_port(struct rte_sched_port *port)
272 {
273         uint32_t n_queues = 0, i;
274
275         for (i = 0; i < port->n_subports_per_port; i++)
276                 n_queues += rte_sched_subport_pipe_queues(port->subports[i]);
277
278         return n_queues;
279 }
280
281 static inline uint16_t
282 rte_sched_port_pipe_queue(struct rte_sched_port *port, uint32_t traffic_class)
283 {
284         uint16_t pipe_queue = port->pipe_queue[traffic_class];
285
286         return pipe_queue;
287 }
288
289 static inline uint8_t
290 rte_sched_port_pipe_tc(struct rte_sched_port *port, uint32_t qindex)
291 {
292         uint8_t pipe_tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
293
294         return pipe_tc;
295 }
296
297 static inline uint8_t
298 rte_sched_port_tc_queue(struct rte_sched_port *port, uint32_t qindex)
299 {
300         uint8_t tc_queue = port->tc_queue[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
301
302         return tc_queue;
303 }
304
305 static int
306 pipe_profile_check(struct rte_sched_pipe_params *params,
307         uint32_t rate, uint16_t *qsize)
308 {
309         uint32_t i;
310
311         /* Pipe parameters */
312         if (params == NULL) {
313                 RTE_LOG(ERR, SCHED,
314                         "%s: Incorrect value for parameter params\n", __func__);
315                 return -EINVAL;
316         }
317
318         /* TB rate: non-zero, not greater than port rate */
319         if (params->tb_rate == 0 ||
320                 params->tb_rate > rate) {
321                 RTE_LOG(ERR, SCHED,
322                         "%s: Incorrect value for tb rate\n", __func__);
323                 return -EINVAL;
324         }
325
326         /* TB size: non-zero */
327         if (params->tb_size == 0) {
328                 RTE_LOG(ERR, SCHED,
329                         "%s: Incorrect value for tb size\n", __func__);
330                 return -EINVAL;
331         }
332
333         /* TC rate: non-zero if qsize non-zero, less than pipe rate */
334         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
335                 if ((qsize[i] == 0 && params->tc_rate[i] != 0) ||
336                         (qsize[i] != 0 && (params->tc_rate[i] == 0 ||
337                         params->tc_rate[i] > params->tb_rate))) {
338                         RTE_LOG(ERR, SCHED,
339                                 "%s: Incorrect value for qsize or tc_rate\n", __func__);
340                         return -EINVAL;
341                 }
342         }
343
344         if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 ||
345                 qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
346                 RTE_LOG(ERR, SCHED,
347                         "%s: Incorrect value for be traffic class rate\n", __func__);
348                 return -EINVAL;
349         }
350
351         /* TC period: non-zero */
352         if (params->tc_period == 0) {
353                 RTE_LOG(ERR, SCHED,
354                         "%s: Incorrect value for tc period\n", __func__);
355                 return -EINVAL;
356         }
357
358         /*  Best effort tc oversubscription weight: non-zero */
359         if (params->tc_ov_weight == 0) {
360                 RTE_LOG(ERR, SCHED,
361                         "%s: Incorrect value for tc ov weight\n", __func__);
362                 return -EINVAL;
363         }
364
365         /* Queue WRR weights: non-zero */
366         for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++) {
367                 if (params->wrr_weights[i] == 0) {
368                         RTE_LOG(ERR, SCHED,
369                                 "%s: Incorrect value for wrr weight\n", __func__);
370                         return -EINVAL;
371                 }
372         }
373
374         return 0;
375 }
376
377 static int
378 rte_sched_port_check_params(struct rte_sched_port_params *params)
379 {
380         if (params == NULL) {
381                 RTE_LOG(ERR, SCHED,
382                         "%s: Incorrect value for parameter params\n", __func__);
383                 return -EINVAL;
384         }
385
386         /* socket */
387         if (params->socket < 0) {
388                 RTE_LOG(ERR, SCHED,
389                         "%s: Incorrect value for socket id\n", __func__);
390                 return -EINVAL;
391         }
392
393         /* rate */
394         if (params->rate == 0) {
395                 RTE_LOG(ERR, SCHED,
396                         "%s: Incorrect value for rate\n", __func__);
397                 return -EINVAL;
398         }
399
400         /* mtu */
401         if (params->mtu == 0) {
402                 RTE_LOG(ERR, SCHED,
403                         "%s: Incorrect value for mtu\n", __func__);
404                 return -EINVAL;
405         }
406
407         /* n_subports_per_port: non-zero, limited to 16 bits, power of 2 */
408         if (params->n_subports_per_port == 0 ||
409             params->n_subports_per_port > 1u << 16 ||
410             !rte_is_power_of_2(params->n_subports_per_port)) {
411                 RTE_LOG(ERR, SCHED,
412                         "%s: Incorrect value for number of subports\n", __func__);
413                 return -EINVAL;
414         }
415
416         /* n_pipes_per_subport: non-zero, power of 2 */
417         if (params->n_pipes_per_subport == 0 ||
418             !rte_is_power_of_2(params->n_pipes_per_subport)) {
419                 RTE_LOG(ERR, SCHED,
420                         "%s: Incorrect value for maximum pipes number\n", __func__);
421                 return -EINVAL;
422         }
423
424         return 0;
425 }
426
427 static uint32_t
428 rte_sched_subport_get_array_base(struct rte_sched_subport_params *params,
429         enum rte_sched_subport_array array)
430 {
431         uint32_t n_pipes_per_subport = params->n_pipes_per_subport_enabled;
432         uint32_t n_subport_pipe_queues =
433                 RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport;
434
435         uint32_t size_pipe = n_pipes_per_subport * sizeof(struct rte_sched_pipe);
436         uint32_t size_queue =
437                 n_subport_pipe_queues * sizeof(struct rte_sched_queue);
438         uint32_t size_queue_extra
439                 = n_subport_pipe_queues * sizeof(struct rte_sched_queue_extra);
440         uint32_t size_pipe_profiles = params->n_max_pipe_profiles *
441                 sizeof(struct rte_sched_pipe_profile);
442         uint32_t size_bmp_array =
443                 rte_bitmap_get_memory_footprint(n_subport_pipe_queues);
444         uint32_t size_per_pipe_queue_array, size_queue_array;
445
446         uint32_t base, i;
447
448         size_per_pipe_queue_array = 0;
449         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
450                 if (i < RTE_SCHED_TRAFFIC_CLASS_BE)
451                         size_per_pipe_queue_array +=
452                                 params->qsize[i] * sizeof(struct rte_mbuf *);
453                 else
454                         size_per_pipe_queue_array += RTE_SCHED_MAX_QUEUES_PER_TC *
455                                 params->qsize[i] * sizeof(struct rte_mbuf *);
456         }
457         size_queue_array = n_pipes_per_subport * size_per_pipe_queue_array;
458
459         base = 0;
460
461         if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE)
462                 return base;
463         base += RTE_CACHE_LINE_ROUNDUP(size_pipe);
464
465         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE)
466                 return base;
467         base += RTE_CACHE_LINE_ROUNDUP(size_queue);
468
469         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA)
470                 return base;
471         base += RTE_CACHE_LINE_ROUNDUP(size_queue_extra);
472
473         if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES)
474                 return base;
475         base += RTE_CACHE_LINE_ROUNDUP(size_pipe_profiles);
476
477         if (array == e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY)
478                 return base;
479         base += RTE_CACHE_LINE_ROUNDUP(size_bmp_array);
480
481         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY)
482                 return base;
483         base += RTE_CACHE_LINE_ROUNDUP(size_queue_array);
484
485         return base;
486 }
487
488 static void
489 rte_sched_subport_config_qsize(struct rte_sched_subport *subport)
490 {
491         uint32_t i;
492
493         subport->qsize_add[0] = 0;
494
495         /* Strict prority traffic class */
496         for (i = 1; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
497                 subport->qsize_add[i] = subport->qsize_add[i-1] + subport->qsize[i-1];
498
499         /* Best-effort traffic class */
500         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] =
501                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE] +
502                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
503         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] =
504                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] +
505                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
506         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] =
507                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] +
508                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
509
510         subport->qsize_sum = subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] +
511                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
512 }
513
514 static void
515 rte_sched_port_log_pipe_profile(struct rte_sched_subport *subport, uint32_t i)
516 {
517         struct rte_sched_pipe_profile *p = subport->pipe_profiles + i;
518
519         RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n"
520                 "       Token bucket: period = %u, credits per period = %u, size = %u\n"
521                 "       Traffic classes: period = %u,\n"
522                 "       credits per period = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u]\n"
523                 "       Best-effort traffic class oversubscription: weight = %hhu\n"
524                 "       WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
525                 i,
526
527                 /* Token bucket */
528                 p->tb_period,
529                 p->tb_credits_per_period,
530                 p->tb_size,
531
532                 /* Traffic classes */
533                 p->tc_period,
534                 p->tc_credits_per_period[0],
535                 p->tc_credits_per_period[1],
536                 p->tc_credits_per_period[2],
537                 p->tc_credits_per_period[3],
538                 p->tc_credits_per_period[4],
539                 p->tc_credits_per_period[5],
540                 p->tc_credits_per_period[6],
541                 p->tc_credits_per_period[7],
542                 p->tc_credits_per_period[8],
543                 p->tc_credits_per_period[9],
544                 p->tc_credits_per_period[10],
545                 p->tc_credits_per_period[11],
546                 p->tc_credits_per_period[12],
547
548                 /* Best-effort traffic class oversubscription */
549                 p->tc_ov_weight,
550
551                 /* WRR */
552                 p->wrr_cost[0], p->wrr_cost[1], p->wrr_cost[2], p->wrr_cost[3]);
553 }
554
555 static inline uint64_t
556 rte_sched_time_ms_to_bytes(uint32_t time_ms, uint32_t rate)
557 {
558         uint64_t time = time_ms;
559
560         time = (time * rate) / 1000;
561
562         return time;
563 }
564
565 static void
566 rte_sched_pipe_profile_convert(struct rte_sched_subport *subport,
567         struct rte_sched_pipe_params *src,
568         struct rte_sched_pipe_profile *dst,
569         uint32_t rate)
570 {
571         uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
572         uint32_t lcd1, lcd2, lcd;
573         uint32_t i;
574
575         /* Token Bucket */
576         if (src->tb_rate == rate) {
577                 dst->tb_credits_per_period = 1;
578                 dst->tb_period = 1;
579         } else {
580                 double tb_rate = (double) src->tb_rate
581                                 / (double) rate;
582                 double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
583
584                 rte_approx(tb_rate, d,
585                         &dst->tb_credits_per_period, &dst->tb_period);
586         }
587
588         dst->tb_size = src->tb_size;
589
590         /* Traffic Classes */
591         dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period,
592                                                 rate);
593
594         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
595                 if (subport->qsize[i])
596                         dst->tc_credits_per_period[i]
597                                 = rte_sched_time_ms_to_bytes(src->tc_period,
598                                         src->tc_rate[i]);
599
600         dst->tc_ov_weight = src->tc_ov_weight;
601
602         /* WRR queues */
603         wrr_cost[0] = src->wrr_weights[0];
604         wrr_cost[1] = src->wrr_weights[1];
605         wrr_cost[2] = src->wrr_weights[2];
606         wrr_cost[3] = src->wrr_weights[3];
607
608         lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
609         lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
610         lcd = rte_get_lcd(lcd1, lcd2);
611
612         wrr_cost[0] = lcd / wrr_cost[0];
613         wrr_cost[1] = lcd / wrr_cost[1];
614         wrr_cost[2] = lcd / wrr_cost[2];
615         wrr_cost[3] = lcd / wrr_cost[3];
616
617         dst->wrr_cost[0] = (uint8_t) wrr_cost[0];
618         dst->wrr_cost[1] = (uint8_t) wrr_cost[1];
619         dst->wrr_cost[2] = (uint8_t) wrr_cost[2];
620         dst->wrr_cost[3] = (uint8_t) wrr_cost[3];
621 }
622
623 static void
624 rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport,
625         struct rte_sched_subport_params *params, uint32_t rate)
626 {
627         uint32_t i;
628
629         for (i = 0; i < subport->n_pipe_profiles; i++) {
630                 struct rte_sched_pipe_params *src = params->pipe_profiles + i;
631                 struct rte_sched_pipe_profile *dst = subport->pipe_profiles + i;
632
633                 rte_sched_pipe_profile_convert(subport, src, dst, rate);
634                 rte_sched_port_log_pipe_profile(subport, i);
635         }
636
637         subport->pipe_tc_be_rate_max = 0;
638         for (i = 0; i < subport->n_pipe_profiles; i++) {
639                 struct rte_sched_pipe_params *src = params->pipe_profiles + i;
640                 uint32_t pipe_tc_be_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
641
642                 if (subport->pipe_tc_be_rate_max < pipe_tc_be_rate)
643                         subport->pipe_tc_be_rate_max = pipe_tc_be_rate;
644         }
645 }
646
647 static int
648 rte_sched_subport_check_params(struct rte_sched_subport_params *params,
649         uint32_t n_max_pipes_per_subport,
650         uint32_t rate)
651 {
652         uint32_t i;
653
654         /* Check user parameters */
655         if (params == NULL) {
656                 RTE_LOG(ERR, SCHED,
657                         "%s: Incorrect value for parameter params\n", __func__);
658                 return -EINVAL;
659         }
660
661         if (params->tb_rate == 0 || params->tb_rate > rate) {
662                 RTE_LOG(ERR, SCHED,
663                         "%s: Incorrect value for tb rate\n", __func__);
664                 return -EINVAL;
665         }
666
667         if (params->tb_size == 0) {
668                 RTE_LOG(ERR, SCHED,
669                         "%s: Incorrect value for tb size\n", __func__);
670                 return -EINVAL;
671         }
672
673         /* qsize: if non-zero, power of 2,
674          * no bigger than 32K (due to 16-bit read/write pointers)
675          */
676         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
677                 uint16_t qsize = params->qsize[i];
678
679                 if (qsize != 0 && !rte_is_power_of_2(qsize)) {
680                         RTE_LOG(ERR, SCHED,
681                                 "%s: Incorrect value for qsize\n", __func__);
682                         return -EINVAL;
683                 }
684         }
685
686         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
687                 uint32_t tc_rate = params->tc_rate[i];
688                 uint16_t qsize = params->qsize[i];
689
690                 if ((qsize == 0 && tc_rate != 0) ||
691                         (qsize != 0 && tc_rate == 0) ||
692                         (tc_rate > params->tb_rate)) {
693                         RTE_LOG(ERR, SCHED,
694                                 "%s: Incorrect value for tc rate\n", __func__);
695                         return -EINVAL;
696                 }
697         }
698
699         if (params->qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 ||
700                 params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
701                 RTE_LOG(ERR, SCHED,
702                         "%s: Incorrect qsize or tc rate(best effort)\n", __func__);
703                 return -EINVAL;
704         }
705
706         if (params->tc_period == 0) {
707                 RTE_LOG(ERR, SCHED,
708                         "%s: Incorrect value for tc period\n", __func__);
709                 return -EINVAL;
710         }
711
712         /* n_pipes_per_subport: non-zero, power of 2 */
713         if (params->n_pipes_per_subport_enabled == 0 ||
714                 params->n_pipes_per_subport_enabled > n_max_pipes_per_subport ||
715             !rte_is_power_of_2(params->n_pipes_per_subport_enabled)) {
716                 RTE_LOG(ERR, SCHED,
717                         "%s: Incorrect value for pipes number\n", __func__);
718                 return -EINVAL;
719         }
720
721         /* pipe_profiles and n_pipe_profiles */
722         if (params->pipe_profiles == NULL ||
723             params->n_pipe_profiles == 0 ||
724                 params->n_max_pipe_profiles == 0 ||
725                 params->n_pipe_profiles > params->n_max_pipe_profiles) {
726                 RTE_LOG(ERR, SCHED,
727                         "%s: Incorrect value for pipe profiles\n", __func__);
728                 return -EINVAL;
729         }
730
731         for (i = 0; i < params->n_pipe_profiles; i++) {
732                 struct rte_sched_pipe_params *p = params->pipe_profiles + i;
733                 int status;
734
735                 status = pipe_profile_check(p, rate, &params->qsize[0]);
736                 if (status != 0) {
737                         RTE_LOG(ERR, SCHED,
738                                 "%s: Pipe profile check failed(%d)\n", __func__, status);
739                         return -EINVAL;
740                 }
741         }
742
743         return 0;
744 }
745
746 uint32_t
747 rte_sched_port_get_memory_footprint(struct rte_sched_port_params *port_params,
748         struct rte_sched_subport_params **subport_params)
749 {
750         uint32_t size0 = 0, size1 = 0, i;
751         int status;
752
753         status = rte_sched_port_check_params(port_params);
754         if (status != 0) {
755                 RTE_LOG(ERR, SCHED,
756                         "%s: Port scheduler port params check failed (%d)\n",
757                         __func__, status);
758
759                 return 0;
760         }
761
762         for (i = 0; i < port_params->n_subports_per_port; i++) {
763                 struct rte_sched_subport_params *sp = subport_params[i];
764
765                 status = rte_sched_subport_check_params(sp,
766                                 port_params->n_pipes_per_subport,
767                                 port_params->rate);
768                 if (status != 0) {
769                         RTE_LOG(ERR, SCHED,
770                                 "%s: Port scheduler subport params check failed (%d)\n",
771                                 __func__, status);
772
773                         return 0;
774                 }
775         }
776
777         size0 = sizeof(struct rte_sched_port);
778
779         for (i = 0; i < port_params->n_subports_per_port; i++) {
780                 struct rte_sched_subport_params *sp = subport_params[i];
781
782                 size1 += rte_sched_subport_get_array_base(sp,
783                                         e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
784         }
785
786         return size0 + size1;
787 }
788
789 struct rte_sched_port *
790 rte_sched_port_config(struct rte_sched_port_params *params)
791 {
792         struct rte_sched_port *port = NULL;
793         uint32_t size0, size1;
794         uint32_t cycles_per_byte;
795         uint32_t i, j;
796         int status;
797
798         status = rte_sched_port_check_params(params);
799         if (status != 0) {
800                 RTE_LOG(ERR, SCHED,
801                         "%s: Port scheduler params check failed (%d)\n",
802                         __func__, status);
803                 return NULL;
804         }
805
806         size0 = sizeof(struct rte_sched_port);
807         size1 = params->n_subports_per_port * sizeof(struct rte_sched_subport *);
808
809         /* Allocate memory to store the data structures */
810         port = rte_zmalloc_socket("qos_params", size0 + size1, RTE_CACHE_LINE_SIZE,
811                 params->socket);
812         if (port == NULL) {
813                 RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
814
815                 return NULL;
816         }
817
818         /* User parameters */
819         port->n_subports_per_port = params->n_subports_per_port;
820         port->n_pipes_per_subport = params->n_pipes_per_subport;
821         port->n_pipes_per_subport_log2 =
822                         __builtin_ctz(params->n_pipes_per_subport);
823         port->socket = params->socket;
824
825         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
826                 port->pipe_queue[i] = i;
827
828         for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
829                 port->pipe_tc[i] = j;
830
831                 if (j < RTE_SCHED_TRAFFIC_CLASS_BE)
832                         j++;
833         }
834
835         for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
836                 port->tc_queue[i] = j;
837
838                 if (i >= RTE_SCHED_TRAFFIC_CLASS_BE)
839                         j++;
840         }
841         port->rate = params->rate;
842         port->mtu = params->mtu + params->frame_overhead;
843         port->frame_overhead = params->frame_overhead;
844
845         /* Timing */
846         port->time_cpu_cycles = rte_get_tsc_cycles();
847         port->time_cpu_bytes = 0;
848         port->time = 0;
849
850         cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
851                 / params->rate;
852         port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
853
854         /* Grinders */
855         port->pkts_out = NULL;
856         port->n_pkts_out = 0;
857         port->subport_id = 0;
858
859         return port;
860 }
861
862 static inline void
863 rte_sched_subport_free(struct rte_sched_port *port,
864         struct rte_sched_subport *subport)
865 {
866         uint32_t n_subport_pipe_queues;
867         uint32_t qindex;
868
869         if (subport == NULL)
870                 return;
871
872         n_subport_pipe_queues = rte_sched_subport_pipe_queues(subport);
873
874         /* Free enqueued mbufs */
875         for (qindex = 0; qindex < n_subport_pipe_queues; qindex++) {
876                 struct rte_mbuf **mbufs =
877                         rte_sched_subport_pipe_qbase(subport, qindex);
878                 uint16_t qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
879                 if (qsize != 0) {
880                         struct rte_sched_queue *queue = subport->queue + qindex;
881                         uint16_t qr = queue->qr & (qsize - 1);
882                         uint16_t qw = queue->qw & (qsize - 1);
883
884                         for (; qr != qw; qr = (qr + 1) & (qsize - 1))
885                                 rte_pktmbuf_free(mbufs[qr]);
886                 }
887         }
888
889         rte_bitmap_free(subport->bmp);
890 }
891
892 void
893 rte_sched_port_free(struct rte_sched_port *port)
894 {
895         uint32_t i;
896
897         /* Check user parameters */
898         if (port == NULL)
899                 return;
900
901         for (i = 0; i < port->n_subports_per_port; i++)
902                 rte_sched_subport_free(port, port->subports[i]);
903
904         rte_free(port);
905 }
906
907 static void
908 rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i)
909 {
910         struct rte_sched_subport *s = port->subports[i];
911
912         RTE_LOG(DEBUG, SCHED, "Low level config for subport %u:\n"
913                 "       Token bucket: period = %u, credits per period = %u, size = %u\n"
914                 "       Traffic classes: period = %u\n"
915                 "       credits per period = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u]\n"
916                 "       Best effort traffic class oversubscription: wm min = %u, wm max = %u\n",
917                 i,
918
919                 /* Token bucket */
920                 s->tb_period,
921                 s->tb_credits_per_period,
922                 s->tb_size,
923
924                 /* Traffic classes */
925                 s->tc_period,
926                 s->tc_credits_per_period[0],
927                 s->tc_credits_per_period[1],
928                 s->tc_credits_per_period[2],
929                 s->tc_credits_per_period[3],
930                 s->tc_credits_per_period[4],
931                 s->tc_credits_per_period[5],
932                 s->tc_credits_per_period[6],
933                 s->tc_credits_per_period[7],
934                 s->tc_credits_per_period[8],
935                 s->tc_credits_per_period[9],
936                 s->tc_credits_per_period[10],
937                 s->tc_credits_per_period[11],
938                 s->tc_credits_per_period[12],
939
940                 /* Best effort traffic class oversubscription */
941                 s->tc_ov_wm_min,
942                 s->tc_ov_wm_max);
943 }
944
945 static void
946 rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
947 {
948         uint32_t i;
949
950         for (i = 0; i < n_subports; i++) {
951                 struct rte_sched_subport *subport = port->subports[i];
952
953                 rte_sched_subport_free(port, subport);
954         }
955
956         rte_free(port);
957 }
958
959 int
960 rte_sched_subport_config(struct rte_sched_port *port,
961         uint32_t subport_id,
962         struct rte_sched_subport_params *params)
963 {
964         struct rte_sched_subport *s = NULL;
965         uint32_t n_subports = subport_id;
966         uint32_t n_subport_pipe_queues, i;
967         uint32_t size0, size1, bmp_mem_size;
968         int status;
969
970         /* Check user parameters */
971         if (port == NULL) {
972                 RTE_LOG(ERR, SCHED,
973                         "%s: Incorrect value for parameter port\n", __func__);
974                 return 0;
975         }
976
977         if (subport_id >= port->n_subports_per_port) {
978                 RTE_LOG(ERR, SCHED,
979                         "%s: Incorrect value for subport id\n", __func__);
980
981                 rte_sched_free_memory(port, n_subports);
982                 return -EINVAL;
983         }
984
985         status = rte_sched_subport_check_params(params,
986                 port->n_pipes_per_subport,
987                 port->rate);
988         if (status != 0) {
989                 RTE_LOG(NOTICE, SCHED,
990                         "%s: Port scheduler params check failed (%d)\n",
991                         __func__, status);
992
993                 rte_sched_free_memory(port, n_subports);
994                 return -EINVAL;
995         }
996
997         /* Determine the amount of memory to allocate */
998         size0 = sizeof(struct rte_sched_subport);
999         size1 = rte_sched_subport_get_array_base(params,
1000                                 e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
1001
1002         /* Allocate memory to store the data structures */
1003         s = rte_zmalloc_socket("subport_params", size0 + size1,
1004                 RTE_CACHE_LINE_SIZE, port->socket);
1005         if (s == NULL) {
1006                 RTE_LOG(ERR, SCHED,
1007                         "%s: Memory allocation fails\n", __func__);
1008
1009                 rte_sched_free_memory(port, n_subports);
1010                 return -ENOMEM;
1011         }
1012
1013         n_subports++;
1014
1015         /* Port */
1016         port->subports[subport_id] = s;
1017
1018         /* Token Bucket (TB) */
1019         if (params->tb_rate == port->rate) {
1020                 s->tb_credits_per_period = 1;
1021                 s->tb_period = 1;
1022         } else {
1023                 double tb_rate = ((double) params->tb_rate) / ((double) port->rate);
1024                 double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
1025
1026                 rte_approx(tb_rate, d, &s->tb_credits_per_period, &s->tb_period);
1027         }
1028
1029         s->tb_size = params->tb_size;
1030         s->tb_time = port->time;
1031         s->tb_credits = s->tb_size / 2;
1032
1033         /* Traffic Classes (TCs) */
1034         s->tc_period = rte_sched_time_ms_to_bytes(params->tc_period, port->rate);
1035         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
1036                 if (params->qsize[i])
1037                         s->tc_credits_per_period[i]
1038                                 = rte_sched_time_ms_to_bytes(params->tc_period,
1039                                         params->tc_rate[i]);
1040         }
1041         s->tc_time = port->time + s->tc_period;
1042         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1043                 if (params->qsize[i])
1044                         s->tc_credits[i] = s->tc_credits_per_period[i];
1045
1046         /* compile time checks */
1047         RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS == 0);
1048         RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS &
1049                 (RTE_SCHED_PORT_N_GRINDERS - 1));
1050
1051         /* User parameters */
1052         s->n_pipes_per_subport_enabled = params->n_pipes_per_subport_enabled;
1053         memcpy(s->qsize, params->qsize, sizeof(params->qsize));
1054         s->n_pipe_profiles = params->n_pipe_profiles;
1055         s->n_max_pipe_profiles = params->n_max_pipe_profiles;
1056
1057 #ifdef RTE_SCHED_RED
1058         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
1059                 uint32_t j;
1060
1061                 for (j = 0; j < RTE_COLORS; j++) {
1062                         /* if min/max are both zero, then RED is disabled */
1063                         if ((params->red_params[i][j].min_th |
1064                              params->red_params[i][j].max_th) == 0) {
1065                                 continue;
1066                         }
1067
1068                         if (rte_red_config_init(&s->red_config[i][j],
1069                                 params->red_params[i][j].wq_log2,
1070                                 params->red_params[i][j].min_th,
1071                                 params->red_params[i][j].max_th,
1072                                 params->red_params[i][j].maxp_inv) != 0) {
1073                                 rte_sched_free_memory(port, n_subports);
1074
1075                                 RTE_LOG(NOTICE, SCHED,
1076                                 "%s: RED configuration init fails\n", __func__);
1077                                 return -EINVAL;
1078                         }
1079                 }
1080         }
1081 #endif
1082
1083         /* Scheduling loop detection */
1084         s->pipe_loop = RTE_SCHED_PIPE_INVALID;
1085         s->pipe_exhaustion = 0;
1086
1087         /* Grinders */
1088         s->busy_grinders = 0;
1089
1090         /* Queue base calculation */
1091         rte_sched_subport_config_qsize(s);
1092
1093         /* Large data structures */
1094         s->pipe = (struct rte_sched_pipe *)
1095                 (s->memory + rte_sched_subport_get_array_base(params,
1096                 e_RTE_SCHED_SUBPORT_ARRAY_PIPE));
1097         s->queue = (struct rte_sched_queue *)
1098                 (s->memory + rte_sched_subport_get_array_base(params,
1099                 e_RTE_SCHED_SUBPORT_ARRAY_QUEUE));
1100         s->queue_extra = (struct rte_sched_queue_extra *)
1101                 (s->memory + rte_sched_subport_get_array_base(params,
1102                 e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA));
1103         s->pipe_profiles = (struct rte_sched_pipe_profile *)
1104                 (s->memory + rte_sched_subport_get_array_base(params,
1105                 e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES));
1106         s->bmp_array =  s->memory + rte_sched_subport_get_array_base(params,
1107                 e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY);
1108         s->queue_array = (struct rte_mbuf **)
1109                 (s->memory + rte_sched_subport_get_array_base(params,
1110                 e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY));
1111
1112         /* Pipe profile table */
1113         rte_sched_subport_config_pipe_profile_table(s, params, port->rate);
1114
1115         /* Bitmap */
1116         n_subport_pipe_queues = rte_sched_subport_pipe_queues(s);
1117         bmp_mem_size = rte_bitmap_get_memory_footprint(n_subport_pipe_queues);
1118         s->bmp = rte_bitmap_init(n_subport_pipe_queues, s->bmp_array,
1119                                 bmp_mem_size);
1120         if (s->bmp == NULL) {
1121                 RTE_LOG(ERR, SCHED,
1122                         "%s: Subport bitmap init error\n", __func__);
1123
1124                 rte_sched_free_memory(port, n_subports);
1125                 return -EINVAL;
1126         }
1127
1128         for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
1129                 s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
1130
1131 #ifdef RTE_SCHED_SUBPORT_TC_OV
1132         /* TC oversubscription */
1133         s->tc_ov_wm_min = port->mtu;
1134         s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(params->tc_period,
1135                                                      s->pipe_tc_be_rate_max);
1136         s->tc_ov_wm = s->tc_ov_wm_max;
1137         s->tc_ov_period_id = 0;
1138         s->tc_ov = 0;
1139         s->tc_ov_n = 0;
1140         s->tc_ov_rate = 0;
1141 #endif
1142
1143         rte_sched_port_log_subport_config(port, subport_id);
1144
1145         return 0;
1146 }
1147
1148 int
1149 rte_sched_pipe_config(struct rte_sched_port *port,
1150         uint32_t subport_id,
1151         uint32_t pipe_id,
1152         int32_t pipe_profile)
1153 {
1154         struct rte_sched_subport *s;
1155         struct rte_sched_pipe *p;
1156         struct rte_sched_pipe_profile *params;
1157         uint32_t n_subports = subport_id + 1;
1158         uint32_t deactivate, profile, i;
1159
1160         /* Check user parameters */
1161         profile = (uint32_t) pipe_profile;
1162         deactivate = (pipe_profile < 0);
1163
1164         if (port == NULL) {
1165                 RTE_LOG(ERR, SCHED,
1166                         "%s: Incorrect value for parameter port\n", __func__);
1167                 return -EINVAL;
1168         }
1169
1170         if (subport_id >= port->n_subports_per_port) {
1171                 RTE_LOG(ERR, SCHED,
1172                         "%s: Incorrect value for parameter subport id\n", __func__);
1173
1174                 rte_sched_free_memory(port, n_subports);
1175                 return -EINVAL;
1176         }
1177
1178         s = port->subports[subport_id];
1179         if (pipe_id >= s->n_pipes_per_subport_enabled) {
1180                 RTE_LOG(ERR, SCHED,
1181                         "%s: Incorrect value for parameter pipe id\n", __func__);
1182
1183                 rte_sched_free_memory(port, n_subports);
1184                 return -EINVAL;
1185         }
1186
1187         if (!deactivate && profile >= s->n_pipe_profiles) {
1188                 RTE_LOG(ERR, SCHED,
1189                         "%s: Incorrect value for parameter pipe profile\n", __func__);
1190
1191                 rte_sched_free_memory(port, n_subports);
1192                 return -EINVAL;
1193         }
1194
1195         /* Handle the case when pipe already has a valid configuration */
1196         p = s->pipe + pipe_id;
1197         if (p->tb_time) {
1198                 params = s->pipe_profiles + p->profile;
1199
1200                 double subport_tc_be_rate =
1201                         (double) s->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1202                         / (double) s->tc_period;
1203                 double pipe_tc_be_rate =
1204                         (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1205                         / (double) params->tc_period;
1206                 uint32_t tc_be_ov = s->tc_ov;
1207
1208                 /* Unplug pipe from its subport */
1209                 s->tc_ov_n -= params->tc_ov_weight;
1210                 s->tc_ov_rate -= pipe_tc_be_rate;
1211                 s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1212
1213                 if (s->tc_ov != tc_be_ov) {
1214                         RTE_LOG(DEBUG, SCHED,
1215                                 "Subport %u Best-effort TC oversubscription is OFF (%.4lf >= %.4lf)\n",
1216                                 subport_id, subport_tc_be_rate, s->tc_ov_rate);
1217                 }
1218
1219                 /* Reset the pipe */
1220                 memset(p, 0, sizeof(struct rte_sched_pipe));
1221         }
1222
1223         if (deactivate)
1224                 return 0;
1225
1226         /* Apply the new pipe configuration */
1227         p->profile = profile;
1228         params = s->pipe_profiles + p->profile;
1229
1230         /* Token Bucket (TB) */
1231         p->tb_time = port->time;
1232         p->tb_credits = params->tb_size / 2;
1233
1234         /* Traffic Classes (TCs) */
1235         p->tc_time = port->time + params->tc_period;
1236
1237         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1238                 if (s->qsize[i])
1239                         p->tc_credits[i] = params->tc_credits_per_period[i];
1240
1241         {
1242                 /* Subport best effort tc oversubscription */
1243                 double subport_tc_be_rate =
1244                         (double) s->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1245                         / (double) s->tc_period;
1246                 double pipe_tc_be_rate =
1247                         (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1248                         / (double) params->tc_period;
1249                 uint32_t tc_be_ov = s->tc_ov;
1250
1251                 s->tc_ov_n += params->tc_ov_weight;
1252                 s->tc_ov_rate += pipe_tc_be_rate;
1253                 s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1254
1255                 if (s->tc_ov != tc_be_ov) {
1256                         RTE_LOG(DEBUG, SCHED,
1257                                 "Subport %u Best effort TC oversubscription is ON (%.4lf < %.4lf)\n",
1258                                 subport_id, subport_tc_be_rate, s->tc_ov_rate);
1259                 }
1260                 p->tc_ov_period_id = s->tc_ov_period_id;
1261                 p->tc_ov_credits = s->tc_ov_wm;
1262         }
1263
1264         return 0;
1265 }
1266
1267 int
1268 rte_sched_subport_pipe_profile_add(struct rte_sched_port *port,
1269         uint32_t subport_id,
1270         struct rte_sched_pipe_params *params,
1271         uint32_t *pipe_profile_id)
1272 {
1273         struct rte_sched_subport *s;
1274         struct rte_sched_pipe_profile *pp;
1275         uint32_t i;
1276         int status;
1277
1278         /* Port */
1279         if (port == NULL) {
1280                 RTE_LOG(ERR, SCHED,
1281                         "%s: Incorrect value for parameter port\n", __func__);
1282                 return -EINVAL;
1283         }
1284
1285         /* Subport id not exceeds the max limit */
1286         if (subport_id > port->n_subports_per_port) {
1287                 RTE_LOG(ERR, SCHED,
1288                         "%s: Incorrect value for subport id\n", __func__);
1289                 return -EINVAL;
1290         }
1291
1292         s = port->subports[subport_id];
1293
1294         /* Pipe profiles exceeds the max limit */
1295         if (s->n_pipe_profiles >= s->n_max_pipe_profiles) {
1296                 RTE_LOG(ERR, SCHED,
1297                         "%s: Number of pipe profiles exceeds the max limit\n", __func__);
1298                 return -EINVAL;
1299         }
1300
1301         /* Pipe params */
1302         status = pipe_profile_check(params, port->rate, &s->qsize[0]);
1303         if (status != 0) {
1304                 RTE_LOG(ERR, SCHED,
1305                         "%s: Pipe profile check failed(%d)\n", __func__, status);
1306                 return -EINVAL;
1307         }
1308
1309         pp = &s->pipe_profiles[s->n_pipe_profiles];
1310         rte_sched_pipe_profile_convert(s, params, pp, port->rate);
1311
1312         /* Pipe profile should not exists */
1313         for (i = 0; i < s->n_pipe_profiles; i++)
1314                 if (memcmp(s->pipe_profiles + i, pp, sizeof(*pp)) == 0) {
1315                         RTE_LOG(ERR, SCHED,
1316                                 "%s: Pipe profile exists\n", __func__);
1317                         return -EINVAL;
1318                 }
1319
1320         /* Pipe profile commit */
1321         *pipe_profile_id = s->n_pipe_profiles;
1322         s->n_pipe_profiles++;
1323
1324         if (s->pipe_tc_be_rate_max < params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE])
1325                 s->pipe_tc_be_rate_max = params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
1326
1327         rte_sched_port_log_pipe_profile(s, *pipe_profile_id);
1328
1329         return 0;
1330 }
1331
1332 static inline uint32_t
1333 rte_sched_port_qindex(struct rte_sched_port *port,
1334         uint32_t subport,
1335         uint32_t pipe,
1336         uint32_t traffic_class,
1337         uint32_t queue)
1338 {
1339         return ((subport & (port->n_subports_per_port - 1)) <<
1340                 (port->n_pipes_per_subport_log2 + 4)) |
1341                 ((pipe &
1342                 (port->subports[subport]->n_pipes_per_subport_enabled - 1)) << 4) |
1343                 ((rte_sched_port_pipe_queue(port, traffic_class) + queue) &
1344                 (RTE_SCHED_QUEUES_PER_PIPE - 1));
1345 }
1346
1347 void
1348 rte_sched_port_pkt_write(struct rte_sched_port *port,
1349                          struct rte_mbuf *pkt,
1350                          uint32_t subport, uint32_t pipe,
1351                          uint32_t traffic_class,
1352                          uint32_t queue, enum rte_color color)
1353 {
1354         uint32_t queue_id =
1355                 rte_sched_port_qindex(port, subport, pipe, traffic_class, queue);
1356
1357         rte_mbuf_sched_set(pkt, queue_id, traffic_class, (uint8_t)color);
1358 }
1359
1360 void
1361 rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port,
1362                                   const struct rte_mbuf *pkt,
1363                                   uint32_t *subport, uint32_t *pipe,
1364                                   uint32_t *traffic_class, uint32_t *queue)
1365 {
1366         uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1367
1368         *subport = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1369         *pipe = (queue_id >> 4) &
1370                 (port->subports[*subport]->n_pipes_per_subport_enabled - 1);
1371         *traffic_class = rte_sched_port_pipe_tc(port, queue_id);
1372         *queue = rte_sched_port_tc_queue(port, queue_id);
1373 }
1374
1375 enum rte_color
1376 rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt)
1377 {
1378         return (enum rte_color)rte_mbuf_sched_color_get(pkt);
1379 }
1380
1381 int
1382 rte_sched_subport_read_stats(struct rte_sched_port *port,
1383                              uint32_t subport_id,
1384                              struct rte_sched_subport_stats *stats,
1385                              uint32_t *tc_ov)
1386 {
1387         struct rte_sched_subport *s;
1388
1389         /* Check user parameters */
1390         if (port == NULL) {
1391                 RTE_LOG(ERR, SCHED,
1392                         "%s: Incorrect value for parameter port\n", __func__);
1393                 return -EINVAL;
1394         }
1395
1396         if (subport_id >= port->n_subports_per_port) {
1397                 RTE_LOG(ERR, SCHED,
1398                         "%s: Incorrect value for subport id\n", __func__);
1399                 return -EINVAL;
1400         }
1401
1402         if (stats == NULL) {
1403                 RTE_LOG(ERR, SCHED,
1404                         "%s: Incorrect value for parameter stats\n", __func__);
1405                 return -EINVAL;
1406         }
1407
1408         if (tc_ov == NULL) {
1409                 RTE_LOG(ERR, SCHED,
1410                         "%s: Incorrect value for tc_ov\n", __func__);
1411                 return -EINVAL;
1412         }
1413
1414         s = port->subports[subport_id];
1415
1416         /* Copy subport stats and clear */
1417         memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
1418         memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
1419
1420         /* Subport TC oversubscription status */
1421         *tc_ov = s->tc_ov;
1422
1423         return 0;
1424 }
1425
1426 int
1427 rte_sched_queue_read_stats(struct rte_sched_port *port,
1428         uint32_t queue_id,
1429         struct rte_sched_queue_stats *stats,
1430         uint16_t *qlen)
1431 {
1432         struct rte_sched_subport *s;
1433         struct rte_sched_queue *q;
1434         struct rte_sched_queue_extra *qe;
1435         uint32_t subport_id, subport_qmask, subport_qindex;
1436
1437         /* Check user parameters */
1438         if (port == NULL) {
1439                 RTE_LOG(ERR, SCHED,
1440                         "%s: Incorrect value for parameter port\n", __func__);
1441                 return -EINVAL;
1442         }
1443
1444         if (queue_id >= rte_sched_port_queues_per_port(port)) {
1445                 RTE_LOG(ERR, SCHED,
1446                         "%s: Incorrect value for queue id\n", __func__);
1447                 return -EINVAL;
1448         }
1449
1450         if (stats == NULL) {
1451                 RTE_LOG(ERR, SCHED,
1452                         "%s: Incorrect value for parameter stats\n", __func__);
1453                 return -EINVAL;
1454         }
1455
1456         if (qlen == NULL) {
1457                 RTE_LOG(ERR, SCHED,
1458                         "%s: Incorrect value for parameter qlen\n", __func__);
1459                 return -EINVAL;
1460         }
1461         subport_qmask = port->n_pipes_per_subport_log2 + 4;
1462         subport_id = (queue_id >> subport_qmask) & (port->n_subports_per_port - 1);
1463
1464         s = port->subports[subport_id];
1465         subport_qindex = ((1 << subport_qmask) - 1) & queue_id;
1466         q = s->queue + subport_qindex;
1467         qe = s->queue_extra + subport_qindex;
1468
1469         /* Copy queue stats and clear */
1470         memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats));
1471         memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
1472
1473         /* Queue length */
1474         *qlen = q->qw - q->qr;
1475
1476         return 0;
1477 }
1478
1479 #ifdef RTE_SCHED_DEBUG
1480
1481 static inline int
1482 rte_sched_port_queue_is_empty(struct rte_sched_subport *subport,
1483         uint32_t qindex)
1484 {
1485         struct rte_sched_queue *queue = subport->queue + qindex;
1486
1487         return queue->qr == queue->qw;
1488 }
1489
1490 #endif /* RTE_SCHED_DEBUG */
1491
1492 #ifdef RTE_SCHED_COLLECT_STATS
1493
1494 static inline void
1495 rte_sched_port_update_subport_stats(struct rte_sched_port *port,
1496         struct rte_sched_subport *subport,
1497         uint32_t qindex,
1498         struct rte_mbuf *pkt)
1499 {
1500         uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1501         uint32_t pkt_len = pkt->pkt_len;
1502
1503         subport->stats.n_pkts_tc[tc_index] += 1;
1504         subport->stats.n_bytes_tc[tc_index] += pkt_len;
1505 }
1506
1507 #ifdef RTE_SCHED_RED
1508 static inline void
1509 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
1510         struct rte_sched_subport *subport,
1511         uint32_t qindex,
1512         struct rte_mbuf *pkt,
1513         uint32_t red)
1514 #else
1515 static inline void
1516 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
1517         struct rte_sched_subport *subport,
1518         uint32_t qindex,
1519         struct rte_mbuf *pkt,
1520         __rte_unused uint32_t red)
1521 #endif
1522 {
1523         uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1524         uint32_t pkt_len = pkt->pkt_len;
1525
1526         subport->stats.n_pkts_tc_dropped[tc_index] += 1;
1527         subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
1528 #ifdef RTE_SCHED_RED
1529         subport->stats.n_pkts_red_dropped[tc_index] += red;
1530 #endif
1531 }
1532
1533 static inline void
1534 rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
1535         uint32_t qindex,
1536         struct rte_mbuf *pkt)
1537 {
1538         struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1539         uint32_t pkt_len = pkt->pkt_len;
1540
1541         qe->stats.n_pkts += 1;
1542         qe->stats.n_bytes += pkt_len;
1543 }
1544
1545 #ifdef RTE_SCHED_RED
1546 static inline void
1547 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
1548         uint32_t qindex,
1549         struct rte_mbuf *pkt,
1550         uint32_t red)
1551 #else
1552 static inline void
1553 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
1554         uint32_t qindex,
1555         struct rte_mbuf *pkt,
1556         __rte_unused uint32_t red)
1557 #endif
1558 {
1559         struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1560         uint32_t pkt_len = pkt->pkt_len;
1561
1562         qe->stats.n_pkts_dropped += 1;
1563         qe->stats.n_bytes_dropped += pkt_len;
1564 #ifdef RTE_SCHED_RED
1565         qe->stats.n_pkts_red_dropped += red;
1566 #endif
1567 }
1568
1569 #endif /* RTE_SCHED_COLLECT_STATS */
1570
1571 #ifdef RTE_SCHED_RED
1572
1573 static inline int
1574 rte_sched_port_red_drop(struct rte_sched_port *port,
1575         struct rte_sched_subport *subport,
1576         struct rte_mbuf *pkt,
1577         uint32_t qindex,
1578         uint16_t qlen)
1579 {
1580         struct rte_sched_queue_extra *qe;
1581         struct rte_red_config *red_cfg;
1582         struct rte_red *red;
1583         uint32_t tc_index;
1584         enum rte_color color;
1585
1586         tc_index = rte_sched_port_pipe_tc(port, qindex);
1587         color = rte_sched_port_pkt_read_color(pkt);
1588         red_cfg = &subport->red_config[tc_index][color];
1589
1590         if ((red_cfg->min_th | red_cfg->max_th) == 0)
1591                 return 0;
1592
1593         qe = subport->queue_extra + qindex;
1594         red = &qe->red;
1595
1596         return rte_red_enqueue(red_cfg, red, qlen, port->time);
1597 }
1598
1599 static inline void
1600 rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
1601         struct rte_sched_subport *subport, uint32_t qindex)
1602 {
1603         struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1604         struct rte_red *red = &qe->red;
1605
1606         rte_red_mark_queue_empty(red, port->time);
1607 }
1608
1609 #else
1610
1611 static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
1612         struct rte_sched_subport *subport __rte_unused,
1613         struct rte_mbuf *pkt __rte_unused,
1614         uint32_t qindex __rte_unused,
1615         uint16_t qlen __rte_unused)
1616 {
1617         return 0;
1618 }
1619
1620 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
1621
1622 #endif /* RTE_SCHED_RED */
1623
1624 #ifdef RTE_SCHED_DEBUG
1625
1626 static inline void
1627 debug_check_queue_slab(struct rte_sched_subport *subport, uint32_t bmp_pos,
1628                        uint64_t bmp_slab)
1629 {
1630         uint64_t mask;
1631         uint32_t i, panic;
1632
1633         if (bmp_slab == 0)
1634                 rte_panic("Empty slab at position %u\n", bmp_pos);
1635
1636         panic = 0;
1637         for (i = 0, mask = 1; i < 64; i++, mask <<= 1) {
1638                 if (mask & bmp_slab) {
1639                         if (rte_sched_port_queue_is_empty(subport, bmp_pos + i)) {
1640                                 printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i);
1641                                 panic = 1;
1642                         }
1643                 }
1644         }
1645
1646         if (panic)
1647                 rte_panic("Empty queues in slab 0x%" PRIx64 "starting at position %u\n",
1648                         bmp_slab, bmp_pos);
1649 }
1650
1651 #endif /* RTE_SCHED_DEBUG */
1652
1653 static inline struct rte_sched_subport *
1654 rte_sched_port_subport(struct rte_sched_port *port,
1655         struct rte_mbuf *pkt)
1656 {
1657         uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1658         uint32_t subport_id = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1659
1660         return port->subports[subport_id];
1661 }
1662
1663 static inline uint32_t
1664 rte_sched_port_enqueue_qptrs_prefetch0(struct rte_sched_subport *subport,
1665         struct rte_mbuf *pkt, uint32_t subport_qmask)
1666 {
1667         struct rte_sched_queue *q;
1668 #ifdef RTE_SCHED_COLLECT_STATS
1669         struct rte_sched_queue_extra *qe;
1670 #endif
1671         uint32_t qindex = rte_mbuf_sched_queue_get(pkt);
1672         uint32_t subport_queue_id = subport_qmask & qindex;
1673
1674         q = subport->queue + subport_queue_id;
1675         rte_prefetch0(q);
1676 #ifdef RTE_SCHED_COLLECT_STATS
1677         qe = subport->queue_extra + subport_queue_id;
1678         rte_prefetch0(qe);
1679 #endif
1680
1681         return subport_queue_id;
1682 }
1683
1684 static inline void
1685 rte_sched_port_enqueue_qwa_prefetch0(struct rte_sched_port *port,
1686         struct rte_sched_subport *subport,
1687         uint32_t qindex,
1688         struct rte_mbuf **qbase)
1689 {
1690         struct rte_sched_queue *q;
1691         struct rte_mbuf **q_qw;
1692         uint16_t qsize;
1693
1694         q = subport->queue + qindex;
1695         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
1696         q_qw = qbase + (q->qw & (qsize - 1));
1697
1698         rte_prefetch0(q_qw);
1699         rte_bitmap_prefetch0(subport->bmp, qindex);
1700 }
1701
1702 static inline int
1703 rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
1704         struct rte_sched_subport *subport,
1705         uint32_t qindex,
1706         struct rte_mbuf **qbase,
1707         struct rte_mbuf *pkt)
1708 {
1709         struct rte_sched_queue *q;
1710         uint16_t qsize;
1711         uint16_t qlen;
1712
1713         q = subport->queue + qindex;
1714         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
1715         qlen = q->qw - q->qr;
1716
1717         /* Drop the packet (and update drop stats) when queue is full */
1718         if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
1719                      (qlen >= qsize))) {
1720                 rte_pktmbuf_free(pkt);
1721 #ifdef RTE_SCHED_COLLECT_STATS
1722                 rte_sched_port_update_subport_stats_on_drop(port, subport,
1723                         qindex, pkt, qlen < qsize);
1724                 rte_sched_port_update_queue_stats_on_drop(subport, qindex, pkt,
1725                         qlen < qsize);
1726 #endif
1727                 return 0;
1728         }
1729
1730         /* Enqueue packet */
1731         qbase[q->qw & (qsize - 1)] = pkt;
1732         q->qw++;
1733
1734         /* Activate queue in the subport bitmap */
1735         rte_bitmap_set(subport->bmp, qindex);
1736
1737         /* Statistics */
1738 #ifdef RTE_SCHED_COLLECT_STATS
1739         rte_sched_port_update_subport_stats(port, subport, qindex, pkt);
1740         rte_sched_port_update_queue_stats(subport, qindex, pkt);
1741 #endif
1742
1743         return 1;
1744 }
1745
1746
1747 /*
1748  * The enqueue function implements a 4-level pipeline with each stage
1749  * processing two different packets. The purpose of using a pipeline
1750  * is to hide the latency of prefetching the data structures. The
1751  * naming convention is presented in the diagram below:
1752  *
1753  *   p00  _______   p10  _______   p20  _______   p30  _______
1754  * ----->|       |----->|       |----->|       |----->|       |----->
1755  *       |   0   |      |   1   |      |   2   |      |   3   |
1756  * ----->|_______|----->|_______|----->|_______|----->|_______|----->
1757  *   p01            p11            p21            p31
1758  *
1759  */
1760 int
1761 rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts,
1762                        uint32_t n_pkts)
1763 {
1764         struct rte_mbuf *pkt00, *pkt01, *pkt10, *pkt11, *pkt20, *pkt21,
1765                 *pkt30, *pkt31, *pkt_last;
1766         struct rte_mbuf **q00_base, **q01_base, **q10_base, **q11_base,
1767                 **q20_base, **q21_base, **q30_base, **q31_base, **q_last_base;
1768         struct rte_sched_subport *subport00, *subport01, *subport10, *subport11,
1769                 *subport20, *subport21, *subport30, *subport31, *subport_last;
1770         uint32_t q00, q01, q10, q11, q20, q21, q30, q31, q_last;
1771         uint32_t r00, r01, r10, r11, r20, r21, r30, r31, r_last;
1772         uint32_t subport_qmask;
1773         uint32_t result, i;
1774
1775         result = 0;
1776         subport_qmask = (1 << (port->n_pipes_per_subport_log2 + 4)) - 1;
1777
1778         /*
1779          * Less then 6 input packets available, which is not enough to
1780          * feed the pipeline
1781          */
1782         if (unlikely(n_pkts < 6)) {
1783                 struct rte_sched_subport *subports[5];
1784                 struct rte_mbuf **q_base[5];
1785                 uint32_t q[5];
1786
1787                 /* Prefetch the mbuf structure of each packet */
1788                 for (i = 0; i < n_pkts; i++)
1789                         rte_prefetch0(pkts[i]);
1790
1791                 /* Prefetch the subport structure for each packet */
1792                 for (i = 0; i < n_pkts; i++)
1793                         subports[i] = rte_sched_port_subport(port, pkts[i]);
1794
1795                 /* Prefetch the queue structure for each queue */
1796                 for (i = 0; i < n_pkts; i++)
1797                         q[i] = rte_sched_port_enqueue_qptrs_prefetch0(subports[i],
1798                                         pkts[i], subport_qmask);
1799
1800                 /* Prefetch the write pointer location of each queue */
1801                 for (i = 0; i < n_pkts; i++) {
1802                         q_base[i] = rte_sched_subport_pipe_qbase(subports[i], q[i]);
1803                         rte_sched_port_enqueue_qwa_prefetch0(port, subports[i],
1804                                 q[i], q_base[i]);
1805                 }
1806
1807                 /* Write each packet to its queue */
1808                 for (i = 0; i < n_pkts; i++)
1809                         result += rte_sched_port_enqueue_qwa(port, subports[i],
1810                                                 q[i], q_base[i], pkts[i]);
1811
1812                 return result;
1813         }
1814
1815         /* Feed the first 3 stages of the pipeline (6 packets needed) */
1816         pkt20 = pkts[0];
1817         pkt21 = pkts[1];
1818         rte_prefetch0(pkt20);
1819         rte_prefetch0(pkt21);
1820
1821         pkt10 = pkts[2];
1822         pkt11 = pkts[3];
1823         rte_prefetch0(pkt10);
1824         rte_prefetch0(pkt11);
1825
1826         subport20 = rte_sched_port_subport(port, pkt20);
1827         subport21 = rte_sched_port_subport(port, pkt21);
1828         q20 = rte_sched_port_enqueue_qptrs_prefetch0(subport20,
1829                         pkt20, subport_qmask);
1830         q21 = rte_sched_port_enqueue_qptrs_prefetch0(subport21,
1831                         pkt21, subport_qmask);
1832
1833         pkt00 = pkts[4];
1834         pkt01 = pkts[5];
1835         rte_prefetch0(pkt00);
1836         rte_prefetch0(pkt01);
1837
1838         subport10 = rte_sched_port_subport(port, pkt10);
1839         subport11 = rte_sched_port_subport(port, pkt11);
1840         q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
1841                         pkt10, subport_qmask);
1842         q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
1843                         pkt11, subport_qmask);
1844
1845         q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
1846         q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
1847         rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
1848         rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
1849
1850         /* Run the pipeline */
1851         for (i = 6; i < (n_pkts & (~1)); i += 2) {
1852                 /* Propagate stage inputs */
1853                 pkt30 = pkt20;
1854                 pkt31 = pkt21;
1855                 pkt20 = pkt10;
1856                 pkt21 = pkt11;
1857                 pkt10 = pkt00;
1858                 pkt11 = pkt01;
1859                 q30 = q20;
1860                 q31 = q21;
1861                 q20 = q10;
1862                 q21 = q11;
1863                 subport30 = subport20;
1864                 subport31 = subport21;
1865                 subport20 = subport10;
1866                 subport21 = subport11;
1867                 q30_base = q20_base;
1868                 q31_base = q21_base;
1869
1870                 /* Stage 0: Get packets in */
1871                 pkt00 = pkts[i];
1872                 pkt01 = pkts[i + 1];
1873                 rte_prefetch0(pkt00);
1874                 rte_prefetch0(pkt01);
1875
1876                 /* Stage 1: Prefetch subport and queue structure storing queue pointers */
1877                 subport10 = rte_sched_port_subport(port, pkt10);
1878                 subport11 = rte_sched_port_subport(port, pkt11);
1879                 q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
1880                                 pkt10, subport_qmask);
1881                 q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
1882                                 pkt11, subport_qmask);
1883
1884                 /* Stage 2: Prefetch queue write location */
1885                 q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
1886                 q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
1887                 rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
1888                 rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
1889
1890                 /* Stage 3: Write packet to queue and activate queue */
1891                 r30 = rte_sched_port_enqueue_qwa(port, subport30,
1892                                 q30, q30_base, pkt30);
1893                 r31 = rte_sched_port_enqueue_qwa(port, subport31,
1894                                 q31, q31_base, pkt31);
1895                 result += r30 + r31;
1896         }
1897
1898         /*
1899          * Drain the pipeline (exactly 6 packets).
1900          * Handle the last packet in the case
1901          * of an odd number of input packets.
1902          */
1903         pkt_last = pkts[n_pkts - 1];
1904         rte_prefetch0(pkt_last);
1905
1906         subport00 = rte_sched_port_subport(port, pkt00);
1907         subport01 = rte_sched_port_subport(port, pkt01);
1908         q00 = rte_sched_port_enqueue_qptrs_prefetch0(subport00,
1909                         pkt00, subport_qmask);
1910         q01 = rte_sched_port_enqueue_qptrs_prefetch0(subport01,
1911                         pkt01, subport_qmask);
1912
1913         q10_base = rte_sched_subport_pipe_qbase(subport10, q10);
1914         q11_base = rte_sched_subport_pipe_qbase(subport11, q11);
1915         rte_sched_port_enqueue_qwa_prefetch0(port, subport10, q10, q10_base);
1916         rte_sched_port_enqueue_qwa_prefetch0(port, subport11, q11, q11_base);
1917
1918         r20 = rte_sched_port_enqueue_qwa(port, subport20,
1919                         q20, q20_base, pkt20);
1920         r21 = rte_sched_port_enqueue_qwa(port, subport21,
1921                         q21, q21_base, pkt21);
1922         result += r20 + r21;
1923
1924         subport_last = rte_sched_port_subport(port, pkt_last);
1925         q_last = rte_sched_port_enqueue_qptrs_prefetch0(subport_last,
1926                                 pkt_last, subport_qmask);
1927
1928         q00_base = rte_sched_subport_pipe_qbase(subport00, q00);
1929         q01_base = rte_sched_subport_pipe_qbase(subport01, q01);
1930         rte_sched_port_enqueue_qwa_prefetch0(port, subport00, q00, q00_base);
1931         rte_sched_port_enqueue_qwa_prefetch0(port, subport01, q01, q01_base);
1932
1933         r10 = rte_sched_port_enqueue_qwa(port, subport10, q10,
1934                         q10_base, pkt10);
1935         r11 = rte_sched_port_enqueue_qwa(port, subport11, q11,
1936                         q11_base, pkt11);
1937         result += r10 + r11;
1938
1939         q_last_base = rte_sched_subport_pipe_qbase(subport_last, q_last);
1940         rte_sched_port_enqueue_qwa_prefetch0(port, subport_last,
1941                 q_last, q_last_base);
1942
1943         r00 = rte_sched_port_enqueue_qwa(port, subport00, q00,
1944                         q00_base, pkt00);
1945         r01 = rte_sched_port_enqueue_qwa(port, subport01, q01,
1946                         q01_base, pkt01);
1947         result += r00 + r01;
1948
1949         if (n_pkts & 1) {
1950                 r_last = rte_sched_port_enqueue_qwa(port, subport_last,
1951                                         q_last, q_last_base, pkt_last);
1952                 result += r_last;
1953         }
1954
1955         return result;
1956 }
1957
1958 #ifndef RTE_SCHED_SUBPORT_TC_OV
1959
1960 static inline void
1961 grinder_credits_update(struct rte_sched_port *port,
1962         struct rte_sched_subport *subport, uint32_t pos)
1963 {
1964         struct rte_sched_grinder *grinder = subport->grinder + pos;
1965         struct rte_sched_pipe *pipe = grinder->pipe;
1966         struct rte_sched_pipe_profile *params = grinder->pipe_params;
1967         uint64_t n_periods;
1968         uint32_t i;
1969
1970         /* Subport TB */
1971         n_periods = (port->time - subport->tb_time) / subport->tb_period;
1972         subport->tb_credits += n_periods * subport->tb_credits_per_period;
1973         subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
1974         subport->tb_time += n_periods * subport->tb_period;
1975
1976         /* Pipe TB */
1977         n_periods = (port->time - pipe->tb_time) / params->tb_period;
1978         pipe->tb_credits += n_periods * params->tb_credits_per_period;
1979         pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
1980         pipe->tb_time += n_periods * params->tb_period;
1981
1982         /* Subport TCs */
1983         if (unlikely(port->time >= subport->tc_time)) {
1984                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1985                         subport->tc_credits[i] = subport->tc_credits_per_period[i];
1986
1987                 subport->tc_time = port->time + subport->tc_period;
1988         }
1989
1990         /* Pipe TCs */
1991         if (unlikely(port->time >= pipe->tc_time)) {
1992                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1993                         pipe->tc_credits[i] = params->tc_credits_per_period[i];
1994
1995                 pipe->tc_time = port->time + params->tc_period;
1996         }
1997 }
1998
1999 #else
2000
2001 static inline uint32_t
2002 grinder_tc_ov_credits_update(struct rte_sched_port *port,
2003         struct rte_sched_subport *subport)
2004 {
2005         uint32_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2006         uint32_t tc_consumption = 0, tc_ov_consumption_max;
2007         uint32_t tc_ov_wm = subport->tc_ov_wm;
2008         uint32_t i;
2009
2010         if (subport->tc_ov == 0)
2011                 return subport->tc_ov_wm_max;
2012
2013         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2014                 tc_ov_consumption[i] =
2015                         subport->tc_credits_per_period[i] - subport->tc_credits[i];
2016                 tc_consumption += tc_ov_consumption[i];
2017         }
2018
2019         tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] =
2020                 subport->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2021                 subport->tc_credits[RTE_SCHED_TRAFFIC_CLASS_BE];
2022
2023         tc_ov_consumption_max =
2024                 subport->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2025                         tc_consumption;
2026
2027         if (tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] >
2028                 (tc_ov_consumption_max - port->mtu)) {
2029                 tc_ov_wm  -= tc_ov_wm >> 7;
2030                 if (tc_ov_wm < subport->tc_ov_wm_min)
2031                         tc_ov_wm = subport->tc_ov_wm_min;
2032
2033                 return tc_ov_wm;
2034         }
2035
2036         tc_ov_wm += (tc_ov_wm >> 7) + 1;
2037         if (tc_ov_wm > subport->tc_ov_wm_max)
2038                 tc_ov_wm = subport->tc_ov_wm_max;
2039
2040         return tc_ov_wm;
2041 }
2042
2043 static inline void
2044 grinder_credits_update(struct rte_sched_port *port,
2045         struct rte_sched_subport *subport, uint32_t pos)
2046 {
2047         struct rte_sched_grinder *grinder = subport->grinder + pos;
2048         struct rte_sched_pipe *pipe = grinder->pipe;
2049         struct rte_sched_pipe_profile *params = grinder->pipe_params;
2050         uint64_t n_periods;
2051         uint32_t i;
2052
2053         /* Subport TB */
2054         n_periods = (port->time - subport->tb_time) / subport->tb_period;
2055         subport->tb_credits += n_periods * subport->tb_credits_per_period;
2056         subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
2057         subport->tb_time += n_periods * subport->tb_period;
2058
2059         /* Pipe TB */
2060         n_periods = (port->time - pipe->tb_time) / params->tb_period;
2061         pipe->tb_credits += n_periods * params->tb_credits_per_period;
2062         pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
2063         pipe->tb_time += n_periods * params->tb_period;
2064
2065         /* Subport TCs */
2066         if (unlikely(port->time >= subport->tc_time)) {
2067                 subport->tc_ov_wm = grinder_tc_ov_credits_update(port, subport);
2068
2069                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2070                         subport->tc_credits[i] = subport->tc_credits_per_period[i];
2071
2072                 subport->tc_time = port->time + subport->tc_period;
2073                 subport->tc_ov_period_id++;
2074         }
2075
2076         /* Pipe TCs */
2077         if (unlikely(port->time >= pipe->tc_time)) {
2078                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2079                         pipe->tc_credits[i] = params->tc_credits_per_period[i];
2080                 pipe->tc_time = port->time + params->tc_period;
2081         }
2082
2083         /* Pipe TCs - Oversubscription */
2084         if (unlikely(pipe->tc_ov_period_id != subport->tc_ov_period_id)) {
2085                 pipe->tc_ov_credits = subport->tc_ov_wm * params->tc_ov_weight;
2086
2087                 pipe->tc_ov_period_id = subport->tc_ov_period_id;
2088         }
2089 }
2090
2091 #endif /* RTE_SCHED_TS_CREDITS_UPDATE, RTE_SCHED_SUBPORT_TC_OV */
2092
2093
2094 #ifndef RTE_SCHED_SUBPORT_TC_OV
2095
2096 static inline int
2097 grinder_credits_check(struct rte_sched_port *port,
2098         struct rte_sched_subport *subport, uint32_t pos)
2099 {
2100         struct rte_sched_grinder *grinder = subport->grinder + pos;
2101         struct rte_sched_pipe *pipe = grinder->pipe;
2102         struct rte_mbuf *pkt = grinder->pkt;
2103         uint32_t tc_index = grinder->tc_index;
2104         uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
2105         uint32_t subport_tb_credits = subport->tb_credits;
2106         uint32_t subport_tc_credits = subport->tc_credits[tc_index];
2107         uint32_t pipe_tb_credits = pipe->tb_credits;
2108         uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
2109         int enough_credits;
2110
2111         /* Check queue credits */
2112         enough_credits = (pkt_len <= subport_tb_credits) &&
2113                 (pkt_len <= subport_tc_credits) &&
2114                 (pkt_len <= pipe_tb_credits) &&
2115                 (pkt_len <= pipe_tc_credits);
2116
2117         if (!enough_credits)
2118                 return 0;
2119
2120         /* Update port credits */
2121         subport->tb_credits -= pkt_len;
2122         subport->tc_credits[tc_index] -= pkt_len;
2123         pipe->tb_credits -= pkt_len;
2124         pipe->tc_credits[tc_index] -= pkt_len;
2125
2126         return 1;
2127 }
2128
2129 #else
2130
2131 static inline int
2132 grinder_credits_check(struct rte_sched_port *port,
2133         struct rte_sched_subport *subport, uint32_t pos)
2134 {
2135         struct rte_sched_grinder *grinder = subport->grinder + pos;
2136         struct rte_sched_pipe *pipe = grinder->pipe;
2137         struct rte_mbuf *pkt = grinder->pkt;
2138         uint32_t tc_index = grinder->tc_index;
2139         uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
2140         uint32_t subport_tb_credits = subport->tb_credits;
2141         uint32_t subport_tc_credits = subport->tc_credits[tc_index];
2142         uint32_t pipe_tb_credits = pipe->tb_credits;
2143         uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
2144         uint32_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2145         uint32_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0};
2146         uint32_t pipe_tc_ov_credits, i;
2147         int enough_credits;
2148
2149         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2150                 pipe_tc_ov_mask1[i] = UINT32_MAX;
2151
2152         pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe->tc_ov_credits;
2153         pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = UINT32_MAX;
2154         pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
2155
2156         /* Check pipe and subport credits */
2157         enough_credits = (pkt_len <= subport_tb_credits) &&
2158                 (pkt_len <= subport_tc_credits) &&
2159                 (pkt_len <= pipe_tb_credits) &&
2160                 (pkt_len <= pipe_tc_credits) &&
2161                 (pkt_len <= pipe_tc_ov_credits);
2162
2163         if (!enough_credits)
2164                 return 0;
2165
2166         /* Update pipe and subport credits */
2167         subport->tb_credits -= pkt_len;
2168         subport->tc_credits[tc_index] -= pkt_len;
2169         pipe->tb_credits -= pkt_len;
2170         pipe->tc_credits[tc_index] -= pkt_len;
2171         pipe->tc_ov_credits -= pipe_tc_ov_mask2[tc_index] & pkt_len;
2172
2173         return 1;
2174 }
2175
2176 #endif /* RTE_SCHED_SUBPORT_TC_OV */
2177
2178
2179 static inline int
2180 grinder_schedule(struct rte_sched_port *port,
2181         struct rte_sched_subport *subport, uint32_t pos)
2182 {
2183         struct rte_sched_grinder *grinder = subport->grinder + pos;
2184         struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
2185         struct rte_mbuf *pkt = grinder->pkt;
2186         uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
2187         uint32_t be_tc_active;
2188
2189         if (!grinder_credits_check(port, subport, pos))
2190                 return 0;
2191
2192         /* Advance port time */
2193         port->time += pkt_len;
2194
2195         /* Send packet */
2196         port->pkts_out[port->n_pkts_out++] = pkt;
2197         queue->qr++;
2198
2199         be_tc_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE) ? ~0x0 : 0x0;
2200         grinder->wrr_tokens[grinder->qpos] +=
2201                 (pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
2202
2203         if (queue->qr == queue->qw) {
2204                 uint32_t qindex = grinder->qindex[grinder->qpos];
2205
2206                 rte_bitmap_clear(subport->bmp, qindex);
2207                 grinder->qmask &= ~(1 << grinder->qpos);
2208                 if (be_tc_active)
2209                         grinder->wrr_mask[grinder->qpos] = 0;
2210                 rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
2211         }
2212
2213         /* Reset pipe loop detection */
2214         subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
2215         grinder->productive = 1;
2216
2217         return 1;
2218 }
2219
2220 #ifdef SCHED_VECTOR_SSE4
2221
2222 static inline int
2223 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2224 {
2225         __m128i index = _mm_set1_epi32(base_pipe);
2226         __m128i pipes = _mm_load_si128((__m128i *)subport->grinder_base_bmp_pos);
2227         __m128i res = _mm_cmpeq_epi32(pipes, index);
2228
2229         pipes = _mm_load_si128((__m128i *)(subport->grinder_base_bmp_pos + 4));
2230         pipes = _mm_cmpeq_epi32(pipes, index);
2231         res = _mm_or_si128(res, pipes);
2232
2233         if (_mm_testz_si128(res, res))
2234                 return 0;
2235
2236         return 1;
2237 }
2238
2239 #elif defined(SCHED_VECTOR_NEON)
2240
2241 static inline int
2242 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2243 {
2244         uint32x4_t index, pipes;
2245         uint32_t *pos = (uint32_t *)subport->grinder_base_bmp_pos;
2246
2247         index = vmovq_n_u32(base_pipe);
2248         pipes = vld1q_u32(pos);
2249         if (!vminvq_u32(veorq_u32(pipes, index)))
2250                 return 1;
2251
2252         pipes = vld1q_u32(pos + 4);
2253         if (!vminvq_u32(veorq_u32(pipes, index)))
2254                 return 1;
2255
2256         return 0;
2257 }
2258
2259 #else
2260
2261 static inline int
2262 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2263 {
2264         uint32_t i;
2265
2266         for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) {
2267                 if (subport->grinder_base_bmp_pos[i] == base_pipe)
2268                         return 1;
2269         }
2270
2271         return 0;
2272 }
2273
2274 #endif /* RTE_SCHED_OPTIMIZATIONS */
2275
2276 static inline void
2277 grinder_pcache_populate(struct rte_sched_subport *subport,
2278         uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab)
2279 {
2280         struct rte_sched_grinder *grinder = subport->grinder + pos;
2281         uint16_t w[4];
2282
2283         grinder->pcache_w = 0;
2284         grinder->pcache_r = 0;
2285
2286         w[0] = (uint16_t) bmp_slab;
2287         w[1] = (uint16_t) (bmp_slab >> 16);
2288         w[2] = (uint16_t) (bmp_slab >> 32);
2289         w[3] = (uint16_t) (bmp_slab >> 48);
2290
2291         grinder->pcache_qmask[grinder->pcache_w] = w[0];
2292         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos;
2293         grinder->pcache_w += (w[0] != 0);
2294
2295         grinder->pcache_qmask[grinder->pcache_w] = w[1];
2296         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 16;
2297         grinder->pcache_w += (w[1] != 0);
2298
2299         grinder->pcache_qmask[grinder->pcache_w] = w[2];
2300         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 32;
2301         grinder->pcache_w += (w[2] != 0);
2302
2303         grinder->pcache_qmask[grinder->pcache_w] = w[3];
2304         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 48;
2305         grinder->pcache_w += (w[3] != 0);
2306 }
2307
2308 static inline void
2309 grinder_tccache_populate(struct rte_sched_subport *subport,
2310         uint32_t pos, uint32_t qindex, uint16_t qmask)
2311 {
2312         struct rte_sched_grinder *grinder = subport->grinder + pos;
2313         uint8_t b, i;
2314
2315         grinder->tccache_w = 0;
2316         grinder->tccache_r = 0;
2317
2318         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2319                 b = (uint8_t) ((qmask >> i) & 0x1);
2320                 grinder->tccache_qmask[grinder->tccache_w] = b;
2321                 grinder->tccache_qindex[grinder->tccache_w] = qindex + i;
2322                 grinder->tccache_w += (b != 0);
2323         }
2324
2325         b = (uint8_t) (qmask >> (RTE_SCHED_TRAFFIC_CLASS_BE));
2326         grinder->tccache_qmask[grinder->tccache_w] = b;
2327         grinder->tccache_qindex[grinder->tccache_w] = qindex +
2328                 RTE_SCHED_TRAFFIC_CLASS_BE;
2329         grinder->tccache_w += (b != 0);
2330 }
2331
2332 static inline int
2333 grinder_next_tc(struct rte_sched_port *port,
2334         struct rte_sched_subport *subport, uint32_t pos)
2335 {
2336         struct rte_sched_grinder *grinder = subport->grinder + pos;
2337         struct rte_mbuf **qbase;
2338         uint32_t qindex;
2339         uint16_t qsize;
2340
2341         if (grinder->tccache_r == grinder->tccache_w)
2342                 return 0;
2343
2344         qindex = grinder->tccache_qindex[grinder->tccache_r];
2345         qbase = rte_sched_subport_pipe_qbase(subport, qindex);
2346         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
2347
2348         grinder->tc_index = rte_sched_port_pipe_tc(port, qindex);
2349         grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
2350         grinder->qsize = qsize;
2351
2352         if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2353                 grinder->queue[0] = subport->queue + qindex;
2354                 grinder->qbase[0] = qbase;
2355                 grinder->qindex[0] = qindex;
2356                 grinder->tccache_r++;
2357
2358                 return 1;
2359         }
2360
2361         grinder->queue[0] = subport->queue + qindex;
2362         grinder->queue[1] = subport->queue + qindex + 1;
2363         grinder->queue[2] = subport->queue + qindex + 2;
2364         grinder->queue[3] = subport->queue + qindex + 3;
2365
2366         grinder->qbase[0] = qbase;
2367         grinder->qbase[1] = qbase + qsize;
2368         grinder->qbase[2] = qbase + 2 * qsize;
2369         grinder->qbase[3] = qbase + 3 * qsize;
2370
2371         grinder->qindex[0] = qindex;
2372         grinder->qindex[1] = qindex + 1;
2373         grinder->qindex[2] = qindex + 2;
2374         grinder->qindex[3] = qindex + 3;
2375
2376         grinder->tccache_r++;
2377         return 1;
2378 }
2379
2380 static inline int
2381 grinder_next_pipe(struct rte_sched_port *port,
2382         struct rte_sched_subport *subport, uint32_t pos)
2383 {
2384         struct rte_sched_grinder *grinder = subport->grinder + pos;
2385         uint32_t pipe_qindex;
2386         uint16_t pipe_qmask;
2387
2388         if (grinder->pcache_r < grinder->pcache_w) {
2389                 pipe_qmask = grinder->pcache_qmask[grinder->pcache_r];
2390                 pipe_qindex = grinder->pcache_qindex[grinder->pcache_r];
2391                 grinder->pcache_r++;
2392         } else {
2393                 uint64_t bmp_slab = 0;
2394                 uint32_t bmp_pos = 0;
2395
2396                 /* Get another non-empty pipe group */
2397                 if (unlikely(rte_bitmap_scan(subport->bmp, &bmp_pos, &bmp_slab) <= 0))
2398                         return 0;
2399
2400 #ifdef RTE_SCHED_DEBUG
2401                 debug_check_queue_slab(subport, bmp_pos, bmp_slab);
2402 #endif
2403
2404                 /* Return if pipe group already in one of the other grinders */
2405                 subport->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID;
2406                 if (unlikely(grinder_pipe_exists(subport, bmp_pos)))
2407                         return 0;
2408
2409                 subport->grinder_base_bmp_pos[pos] = bmp_pos;
2410
2411                 /* Install new pipe group into grinder's pipe cache */
2412                 grinder_pcache_populate(subport, pos, bmp_pos, bmp_slab);
2413
2414                 pipe_qmask = grinder->pcache_qmask[0];
2415                 pipe_qindex = grinder->pcache_qindex[0];
2416                 grinder->pcache_r = 1;
2417         }
2418
2419         /* Install new pipe in the grinder */
2420         grinder->pindex = pipe_qindex >> 4;
2421         grinder->subport = subport;
2422         grinder->pipe = subport->pipe + grinder->pindex;
2423         grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */
2424         grinder->productive = 0;
2425
2426         grinder_tccache_populate(subport, pos, pipe_qindex, pipe_qmask);
2427         grinder_next_tc(port, subport, pos);
2428
2429         /* Check for pipe exhaustion */
2430         if (grinder->pindex == subport->pipe_loop) {
2431                 subport->pipe_exhaustion = 1;
2432                 subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
2433         }
2434
2435         return 1;
2436 }
2437
2438
2439 static inline void
2440 grinder_wrr_load(struct rte_sched_subport *subport, uint32_t pos)
2441 {
2442         struct rte_sched_grinder *grinder = subport->grinder + pos;
2443         struct rte_sched_pipe *pipe = grinder->pipe;
2444         struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params;
2445         uint32_t qmask = grinder->qmask;
2446
2447         grinder->wrr_tokens[0] =
2448                 ((uint16_t) pipe->wrr_tokens[0]) << RTE_SCHED_WRR_SHIFT;
2449         grinder->wrr_tokens[1] =
2450                 ((uint16_t) pipe->wrr_tokens[1]) << RTE_SCHED_WRR_SHIFT;
2451         grinder->wrr_tokens[2] =
2452                 ((uint16_t) pipe->wrr_tokens[2]) << RTE_SCHED_WRR_SHIFT;
2453         grinder->wrr_tokens[3] =
2454                 ((uint16_t) pipe->wrr_tokens[3]) << RTE_SCHED_WRR_SHIFT;
2455
2456         grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
2457         grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
2458         grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
2459         grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
2460
2461         grinder->wrr_cost[0] = pipe_params->wrr_cost[0];
2462         grinder->wrr_cost[1] = pipe_params->wrr_cost[1];
2463         grinder->wrr_cost[2] = pipe_params->wrr_cost[2];
2464         grinder->wrr_cost[3] = pipe_params->wrr_cost[3];
2465 }
2466
2467 static inline void
2468 grinder_wrr_store(struct rte_sched_subport *subport, uint32_t pos)
2469 {
2470         struct rte_sched_grinder *grinder = subport->grinder + pos;
2471         struct rte_sched_pipe *pipe = grinder->pipe;
2472
2473         pipe->wrr_tokens[0] =
2474                         (grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >>
2475                                 RTE_SCHED_WRR_SHIFT;
2476         pipe->wrr_tokens[1] =
2477                         (grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >>
2478                                 RTE_SCHED_WRR_SHIFT;
2479         pipe->wrr_tokens[2] =
2480                         (grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >>
2481                                 RTE_SCHED_WRR_SHIFT;
2482         pipe->wrr_tokens[3] =
2483                         (grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >>
2484                                 RTE_SCHED_WRR_SHIFT;
2485 }
2486
2487 static inline void
2488 grinder_wrr(struct rte_sched_subport *subport, uint32_t pos)
2489 {
2490         struct rte_sched_grinder *grinder = subport->grinder + pos;
2491         uint16_t wrr_tokens_min;
2492
2493         grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0];
2494         grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1];
2495         grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2];
2496         grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3];
2497
2498         grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens);
2499         wrr_tokens_min = grinder->wrr_tokens[grinder->qpos];
2500
2501         grinder->wrr_tokens[0] -= wrr_tokens_min;
2502         grinder->wrr_tokens[1] -= wrr_tokens_min;
2503         grinder->wrr_tokens[2] -= wrr_tokens_min;
2504         grinder->wrr_tokens[3] -= wrr_tokens_min;
2505 }
2506
2507
2508 #define grinder_evict(subport, pos)
2509
2510 static inline void
2511 grinder_prefetch_pipe(struct rte_sched_subport *subport, uint32_t pos)
2512 {
2513         struct rte_sched_grinder *grinder = subport->grinder + pos;
2514
2515         rte_prefetch0(grinder->pipe);
2516         rte_prefetch0(grinder->queue[0]);
2517 }
2518
2519 static inline void
2520 grinder_prefetch_tc_queue_arrays(struct rte_sched_subport *subport, uint32_t pos)
2521 {
2522         struct rte_sched_grinder *grinder = subport->grinder + pos;
2523         uint16_t qsize, qr[RTE_SCHED_MAX_QUEUES_PER_TC];
2524
2525         qsize = grinder->qsize;
2526         grinder->qpos = 0;
2527
2528         if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2529                 qr[0] = grinder->queue[0]->qr & (qsize - 1);
2530
2531                 rte_prefetch0(grinder->qbase[0] + qr[0]);
2532                 return;
2533         }
2534
2535         qr[0] = grinder->queue[0]->qr & (qsize - 1);
2536         qr[1] = grinder->queue[1]->qr & (qsize - 1);
2537         qr[2] = grinder->queue[2]->qr & (qsize - 1);
2538         qr[3] = grinder->queue[3]->qr & (qsize - 1);
2539
2540         rte_prefetch0(grinder->qbase[0] + qr[0]);
2541         rte_prefetch0(grinder->qbase[1] + qr[1]);
2542
2543         grinder_wrr_load(subport, pos);
2544         grinder_wrr(subport, pos);
2545
2546         rte_prefetch0(grinder->qbase[2] + qr[2]);
2547         rte_prefetch0(grinder->qbase[3] + qr[3]);
2548 }
2549
2550 static inline void
2551 grinder_prefetch_mbuf(struct rte_sched_subport *subport, uint32_t pos)
2552 {
2553         struct rte_sched_grinder *grinder = subport->grinder + pos;
2554         uint32_t qpos = grinder->qpos;
2555         struct rte_mbuf **qbase = grinder->qbase[qpos];
2556         uint16_t qsize = grinder->qsize;
2557         uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1);
2558
2559         grinder->pkt = qbase[qr];
2560         rte_prefetch0(grinder->pkt);
2561
2562         if (unlikely((qr & 0x7) == 7)) {
2563                 uint16_t qr_next = (grinder->queue[qpos]->qr + 1) & (qsize - 1);
2564
2565                 rte_prefetch0(qbase + qr_next);
2566         }
2567 }
2568
2569 static inline uint32_t
2570 grinder_handle(struct rte_sched_port *port,
2571         struct rte_sched_subport *subport, uint32_t pos)
2572 {
2573         struct rte_sched_grinder *grinder = subport->grinder + pos;
2574
2575         switch (grinder->state) {
2576         case e_GRINDER_PREFETCH_PIPE:
2577         {
2578                 if (grinder_next_pipe(port, subport, pos)) {
2579                         grinder_prefetch_pipe(subport, pos);
2580                         subport->busy_grinders++;
2581
2582                         grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2583                         return 0;
2584                 }
2585
2586                 return 0;
2587         }
2588
2589         case e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS:
2590         {
2591                 struct rte_sched_pipe *pipe = grinder->pipe;
2592
2593                 grinder->pipe_params = subport->pipe_profiles + pipe->profile;
2594                 grinder_prefetch_tc_queue_arrays(subport, pos);
2595                 grinder_credits_update(port, subport, pos);
2596
2597                 grinder->state = e_GRINDER_PREFETCH_MBUF;
2598                 return 0;
2599         }
2600
2601         case e_GRINDER_PREFETCH_MBUF:
2602         {
2603                 grinder_prefetch_mbuf(subport, pos);
2604
2605                 grinder->state = e_GRINDER_READ_MBUF;
2606                 return 0;
2607         }
2608
2609         case e_GRINDER_READ_MBUF:
2610         {
2611                 uint32_t wrr_active, result = 0;
2612
2613                 result = grinder_schedule(port, subport, pos);
2614
2615                 wrr_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE);
2616
2617                 /* Look for next packet within the same TC */
2618                 if (result && grinder->qmask) {
2619                         if (wrr_active)
2620                                 grinder_wrr(subport, pos);
2621
2622                         grinder_prefetch_mbuf(subport, pos);
2623
2624                         return 1;
2625                 }
2626
2627                 if (wrr_active)
2628                         grinder_wrr_store(subport, pos);
2629
2630                 /* Look for another active TC within same pipe */
2631                 if (grinder_next_tc(port, subport, pos)) {
2632                         grinder_prefetch_tc_queue_arrays(subport, pos);
2633
2634                         grinder->state = e_GRINDER_PREFETCH_MBUF;
2635                         return result;
2636                 }
2637
2638                 if (grinder->productive == 0 &&
2639                     subport->pipe_loop == RTE_SCHED_PIPE_INVALID)
2640                         subport->pipe_loop = grinder->pindex;
2641
2642                 grinder_evict(subport, pos);
2643
2644                 /* Look for another active pipe */
2645                 if (grinder_next_pipe(port, subport, pos)) {
2646                         grinder_prefetch_pipe(subport, pos);
2647
2648                         grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2649                         return result;
2650                 }
2651
2652                 /* No active pipe found */
2653                 subport->busy_grinders--;
2654
2655                 grinder->state = e_GRINDER_PREFETCH_PIPE;
2656                 return result;
2657         }
2658
2659         default:
2660                 rte_panic("Algorithmic error (invalid state)\n");
2661                 return 0;
2662         }
2663 }
2664
2665 static inline void
2666 rte_sched_port_time_resync(struct rte_sched_port *port)
2667 {
2668         uint64_t cycles = rte_get_tsc_cycles();
2669         uint64_t cycles_diff = cycles - port->time_cpu_cycles;
2670         uint64_t bytes_diff;
2671         uint32_t i;
2672
2673         /* Compute elapsed time in bytes */
2674         bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT,
2675                                            port->inv_cycles_per_byte);
2676
2677         /* Advance port time */
2678         port->time_cpu_cycles = cycles;
2679         port->time_cpu_bytes += bytes_diff;
2680         if (port->time < port->time_cpu_bytes)
2681                 port->time = port->time_cpu_bytes;
2682
2683         /* Reset pipe loop detection */
2684         for (i = 0; i < port->n_subports_per_port; i++)
2685                 port->subports[i]->pipe_loop = RTE_SCHED_PIPE_INVALID;
2686 }
2687
2688 static inline int
2689 rte_sched_port_exceptions(struct rte_sched_subport *subport, int second_pass)
2690 {
2691         int exceptions;
2692
2693         /* Check if any exception flag is set */
2694         exceptions = (second_pass && subport->busy_grinders == 0) ||
2695                 (subport->pipe_exhaustion == 1);
2696
2697         /* Clear exception flags */
2698         subport->pipe_exhaustion = 0;
2699
2700         return exceptions;
2701 }
2702
2703 int
2704 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
2705 {
2706         struct rte_sched_subport *subport;
2707         uint32_t subport_id = port->subport_id;
2708         uint32_t i, n_subports = 0, count;
2709
2710         port->pkts_out = pkts;
2711         port->n_pkts_out = 0;
2712
2713         rte_sched_port_time_resync(port);
2714
2715         /* Take each queue in the grinder one step further */
2716         for (i = 0, count = 0; ; i++)  {
2717                 subport = port->subports[subport_id];
2718
2719                 count += grinder_handle(port, subport,
2720                                 i & (RTE_SCHED_PORT_N_GRINDERS - 1));
2721
2722                 if (count == n_pkts) {
2723                         subport_id++;
2724
2725                         if (subport_id == port->n_subports_per_port)
2726                                 subport_id = 0;
2727
2728                         port->subport_id = subport_id;
2729                         break;
2730                 }
2731
2732                 if (rte_sched_port_exceptions(subport, i >= RTE_SCHED_PORT_N_GRINDERS)) {
2733                         i = 0;
2734                         subport_id++;
2735                         n_subports++;
2736                 }
2737
2738                 if (subport_id == port->n_subports_per_port)
2739                         subport_id = 0;
2740
2741                 if (n_subports == port->n_subports_per_port) {
2742                         port->subport_id = subport_id;
2743                         break;
2744                 }
2745         }
2746
2747         return count;
2748 }