dma/hisilicon: enhance CQ scan robustness
[dpdk.git] / lib / sched / rte_sched.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <string.h>
7
8 #include <rte_common.h>
9 #include <rte_log.h>
10 #include <rte_malloc.h>
11 #include <rte_cycles.h>
12 #include <rte_prefetch.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mbuf.h>
15 #include <rte_bitmap.h>
16 #include <rte_reciprocal.h>
17
18 #include "rte_sched.h"
19 #include "rte_sched_common.h"
20 #include "rte_approx.h"
21
22 #ifdef __INTEL_COMPILER
23 #pragma warning(disable:2259) /* conversion may lose significant bits */
24 #endif
25
26 #ifndef RTE_SCHED_PORT_N_GRINDERS
27 #define RTE_SCHED_PORT_N_GRINDERS 8
28 #endif
29
30 #define RTE_SCHED_TB_RATE_CONFIG_ERR          (1e-7)
31 #define RTE_SCHED_WRR_SHIFT                   3
32 #define RTE_SCHED_MAX_QUEUES_PER_TC           RTE_SCHED_BE_QUEUES_PER_PIPE
33 #define RTE_SCHED_GRINDER_PCACHE_SIZE         (64 / RTE_SCHED_QUEUES_PER_PIPE)
34 #define RTE_SCHED_PIPE_INVALID                UINT32_MAX
35 #define RTE_SCHED_BMP_POS_INVALID             UINT32_MAX
36
37 /* Scaling for cycles_per_byte calculation
38  * Chosen so that minimum rate is 480 bit/sec
39  */
40 #define RTE_SCHED_TIME_SHIFT                  8
41
42 struct rte_sched_pipe_profile {
43         /* Token bucket (TB) */
44         uint64_t tb_period;
45         uint64_t tb_credits_per_period;
46         uint64_t tb_size;
47
48         /* Pipe traffic classes */
49         uint64_t tc_period;
50         uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
51         uint8_t tc_ov_weight;
52
53         /* Pipe best-effort traffic class queues */
54         uint8_t  wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
55 };
56
57 struct rte_sched_pipe {
58         /* Token bucket (TB) */
59         uint64_t tb_time; /* time of last update */
60         uint64_t tb_credits;
61
62         /* Pipe profile and flags */
63         uint32_t profile;
64
65         /* Traffic classes (TCs) */
66         uint64_t tc_time; /* time of next update */
67         uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
68
69         /* Weighted Round Robin (WRR) */
70         uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
71
72         /* TC oversubscription */
73         uint64_t tc_ov_credits;
74         uint8_t tc_ov_period_id;
75 } __rte_cache_aligned;
76
77 struct rte_sched_queue {
78         uint16_t qw;
79         uint16_t qr;
80 };
81
82 struct rte_sched_queue_extra {
83         struct rte_sched_queue_stats stats;
84 #ifdef RTE_SCHED_CMAN
85         RTE_STD_C11
86         union {
87                 struct rte_red red;
88                 struct rte_pie pie;
89         };
90 #endif
91 };
92
93 enum grinder_state {
94         e_GRINDER_PREFETCH_PIPE = 0,
95         e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS,
96         e_GRINDER_PREFETCH_MBUF,
97         e_GRINDER_READ_MBUF
98 };
99
100 struct rte_sched_subport_profile {
101         /* Token bucket (TB) */
102         uint64_t tb_period;
103         uint64_t tb_credits_per_period;
104         uint64_t tb_size;
105
106         uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
107         uint64_t tc_period;
108 };
109
110 struct rte_sched_grinder {
111         /* Pipe cache */
112         uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE];
113         uint32_t pcache_qindex[RTE_SCHED_GRINDER_PCACHE_SIZE];
114         uint32_t pcache_w;
115         uint32_t pcache_r;
116
117         /* Current pipe */
118         enum grinder_state state;
119         uint32_t productive;
120         uint32_t pindex;
121         struct rte_sched_subport *subport;
122         struct rte_sched_subport_profile *subport_params;
123         struct rte_sched_pipe *pipe;
124         struct rte_sched_pipe_profile *pipe_params;
125
126         /* TC cache */
127         uint8_t tccache_qmask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
128         uint32_t tccache_qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
129         uint32_t tccache_w;
130         uint32_t tccache_r;
131
132         /* Current TC */
133         uint32_t tc_index;
134         struct rte_sched_queue *queue[RTE_SCHED_MAX_QUEUES_PER_TC];
135         struct rte_mbuf **qbase[RTE_SCHED_MAX_QUEUES_PER_TC];
136         uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC];
137         uint16_t qsize;
138         uint32_t qmask;
139         uint32_t qpos;
140         struct rte_mbuf *pkt;
141
142         /* WRR */
143         uint16_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
144         uint16_t wrr_mask[RTE_SCHED_BE_QUEUES_PER_PIPE];
145         uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
146 };
147
148 struct rte_sched_subport {
149         /* Token bucket (TB) */
150         uint64_t tb_time; /* time of last update */
151         uint64_t tb_credits;
152
153         /* Traffic classes (TCs) */
154         uint64_t tc_time; /* time of next update */
155         uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
156
157         /* TC oversubscription */
158         uint64_t tc_ov_wm;
159         uint64_t tc_ov_wm_min;
160         uint64_t tc_ov_wm_max;
161         uint8_t tc_ov_period_id;
162         uint8_t tc_ov;
163         uint32_t tc_ov_n;
164         double tc_ov_rate;
165
166         /* Statistics */
167         struct rte_sched_subport_stats stats __rte_cache_aligned;
168
169         /* subport profile */
170         uint32_t profile;
171         /* Subport pipes */
172         uint32_t n_pipes_per_subport_enabled;
173         uint32_t n_pipe_profiles;
174         uint32_t n_max_pipe_profiles;
175
176         /* Pipe best-effort TC rate */
177         uint64_t pipe_tc_be_rate_max;
178
179         /* Pipe queues size */
180         uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
181
182 #ifdef RTE_SCHED_CMAN
183         bool cman_enabled;
184         enum rte_sched_cman_mode cman;
185
186         RTE_STD_C11
187         union {
188                 struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
189                 struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
190         };
191 #endif
192
193         /* Scheduling loop detection */
194         uint32_t pipe_loop;
195         uint32_t pipe_exhaustion;
196
197         /* Bitmap */
198         struct rte_bitmap *bmp;
199         uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16;
200
201         /* Grinders */
202         struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS];
203         uint32_t busy_grinders;
204
205         /* Queue base calculation */
206         uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE];
207         uint32_t qsize_sum;
208
209         struct rte_sched_pipe *pipe;
210         struct rte_sched_queue *queue;
211         struct rte_sched_queue_extra *queue_extra;
212         struct rte_sched_pipe_profile *pipe_profiles;
213         uint8_t *bmp_array;
214         struct rte_mbuf **queue_array;
215         uint8_t memory[0] __rte_cache_aligned;
216
217         /* TC oversubscription activation */
218         int tc_ov_enabled;
219 } __rte_cache_aligned;
220
221 struct rte_sched_port {
222         /* User parameters */
223         uint32_t n_subports_per_port;
224         uint32_t n_pipes_per_subport;
225         uint32_t n_pipes_per_subport_log2;
226         uint16_t pipe_queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
227         uint8_t pipe_tc[RTE_SCHED_QUEUES_PER_PIPE];
228         uint8_t tc_queue[RTE_SCHED_QUEUES_PER_PIPE];
229         uint32_t n_subport_profiles;
230         uint32_t n_max_subport_profiles;
231         uint64_t rate;
232         uint32_t mtu;
233         uint32_t frame_overhead;
234         int socket;
235
236         /* Timing */
237         uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cycles */
238         uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
239         uint64_t time;                /* Current NIC TX time measured in bytes */
240         struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
241         uint64_t cycles_per_byte;
242
243         /* Grinders */
244         struct rte_mbuf **pkts_out;
245         uint32_t n_pkts_out;
246         uint32_t subport_id;
247
248         /* Large data structures */
249         struct rte_sched_subport_profile *subport_profiles;
250         struct rte_sched_subport *subports[0] __rte_cache_aligned;
251 } __rte_cache_aligned;
252
253 enum rte_sched_subport_array {
254         e_RTE_SCHED_SUBPORT_ARRAY_PIPE = 0,
255         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE,
256         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA,
257         e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES,
258         e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY,
259         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY,
260         e_RTE_SCHED_SUBPORT_ARRAY_TOTAL,
261 };
262
263 static inline uint32_t
264 rte_sched_subport_pipe_queues(struct rte_sched_subport *subport)
265 {
266         return RTE_SCHED_QUEUES_PER_PIPE * subport->n_pipes_per_subport_enabled;
267 }
268
269 static inline struct rte_mbuf **
270 rte_sched_subport_pipe_qbase(struct rte_sched_subport *subport, uint32_t qindex)
271 {
272         uint32_t pindex = qindex >> 4;
273         uint32_t qpos = qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1);
274
275         return (subport->queue_array + pindex *
276                 subport->qsize_sum + subport->qsize_add[qpos]);
277 }
278
279 static inline uint16_t
280 rte_sched_subport_pipe_qsize(struct rte_sched_port *port,
281 struct rte_sched_subport *subport, uint32_t qindex)
282 {
283         uint32_t tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
284
285         return subport->qsize[tc];
286 }
287
288 static inline uint32_t
289 rte_sched_port_queues_per_port(struct rte_sched_port *port)
290 {
291         uint32_t n_queues = 0, i;
292
293         for (i = 0; i < port->n_subports_per_port; i++)
294                 n_queues += rte_sched_subport_pipe_queues(port->subports[i]);
295
296         return n_queues;
297 }
298
299 static inline uint16_t
300 rte_sched_port_pipe_queue(struct rte_sched_port *port, uint32_t traffic_class)
301 {
302         uint16_t pipe_queue = port->pipe_queue[traffic_class];
303
304         return pipe_queue;
305 }
306
307 static inline uint8_t
308 rte_sched_port_pipe_tc(struct rte_sched_port *port, uint32_t qindex)
309 {
310         uint8_t pipe_tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
311
312         return pipe_tc;
313 }
314
315 static inline uint8_t
316 rte_sched_port_tc_queue(struct rte_sched_port *port, uint32_t qindex)
317 {
318         uint8_t tc_queue = port->tc_queue[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
319
320         return tc_queue;
321 }
322
323 static int
324 pipe_profile_check(struct rte_sched_pipe_params *params,
325         uint64_t rate, uint16_t *qsize)
326 {
327         uint32_t i;
328
329         /* Pipe parameters */
330         if (params == NULL) {
331                 RTE_LOG(ERR, SCHED,
332                         "%s: Incorrect value for parameter params\n", __func__);
333                 return -EINVAL;
334         }
335
336         /* TB rate: non-zero, not greater than port rate */
337         if (params->tb_rate == 0 ||
338                 params->tb_rate > rate) {
339                 RTE_LOG(ERR, SCHED,
340                         "%s: Incorrect value for tb rate\n", __func__);
341                 return -EINVAL;
342         }
343
344         /* TB size: non-zero */
345         if (params->tb_size == 0) {
346                 RTE_LOG(ERR, SCHED,
347                         "%s: Incorrect value for tb size\n", __func__);
348                 return -EINVAL;
349         }
350
351         /* TC rate: non-zero if qsize non-zero, less than pipe rate */
352         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
353                 if ((qsize[i] == 0 && params->tc_rate[i] != 0) ||
354                         (qsize[i] != 0 && (params->tc_rate[i] == 0 ||
355                         params->tc_rate[i] > params->tb_rate))) {
356                         RTE_LOG(ERR, SCHED,
357                                 "%s: Incorrect value for qsize or tc_rate\n", __func__);
358                         return -EINVAL;
359                 }
360         }
361
362         if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 ||
363                 qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
364                 RTE_LOG(ERR, SCHED,
365                         "%s: Incorrect value for be traffic class rate\n", __func__);
366                 return -EINVAL;
367         }
368
369         /* TC period: non-zero */
370         if (params->tc_period == 0) {
371                 RTE_LOG(ERR, SCHED,
372                         "%s: Incorrect value for tc period\n", __func__);
373                 return -EINVAL;
374         }
375
376         /*  Best effort tc oversubscription weight: non-zero */
377         if (params->tc_ov_weight == 0) {
378                 RTE_LOG(ERR, SCHED,
379                         "%s: Incorrect value for tc ov weight\n", __func__);
380                 return -EINVAL;
381         }
382
383         /* Queue WRR weights: non-zero */
384         for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++) {
385                 if (params->wrr_weights[i] == 0) {
386                         RTE_LOG(ERR, SCHED,
387                                 "%s: Incorrect value for wrr weight\n", __func__);
388                         return -EINVAL;
389                 }
390         }
391
392         return 0;
393 }
394
395 static int
396 subport_profile_check(struct rte_sched_subport_profile_params *params,
397         uint64_t rate)
398 {
399         uint32_t i;
400
401         /* Check user parameters */
402         if (params == NULL) {
403                 RTE_LOG(ERR, SCHED, "%s: "
404                 "Incorrect value for parameter params\n", __func__);
405                 return -EINVAL;
406         }
407
408         if (params->tb_rate == 0 || params->tb_rate > rate) {
409                 RTE_LOG(ERR, SCHED, "%s: "
410                 "Incorrect value for tb rate\n", __func__);
411                 return -EINVAL;
412         }
413
414         if (params->tb_size == 0) {
415                 RTE_LOG(ERR, SCHED, "%s: "
416                 "Incorrect value for tb size\n", __func__);
417                 return -EINVAL;
418         }
419
420         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
421                 uint64_t tc_rate = params->tc_rate[i];
422
423                 if (tc_rate == 0 || (tc_rate > params->tb_rate)) {
424                         RTE_LOG(ERR, SCHED, "%s: "
425                         "Incorrect value for tc rate\n", __func__);
426                         return -EINVAL;
427                 }
428         }
429
430         if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
431                 RTE_LOG(ERR, SCHED, "%s: "
432                 "Incorrect tc rate(best effort)\n", __func__);
433                 return -EINVAL;
434         }
435
436         if (params->tc_period == 0) {
437                 RTE_LOG(ERR, SCHED, "%s: "
438                 "Incorrect value for tc period\n", __func__);
439                 return -EINVAL;
440         }
441
442         return 0;
443 }
444
445 static int
446 rte_sched_port_check_params(struct rte_sched_port_params *params)
447 {
448         uint32_t i;
449
450         if (params == NULL) {
451                 RTE_LOG(ERR, SCHED,
452                         "%s: Incorrect value for parameter params\n", __func__);
453                 return -EINVAL;
454         }
455
456         /* socket */
457         if (params->socket < 0) {
458                 RTE_LOG(ERR, SCHED,
459                         "%s: Incorrect value for socket id\n", __func__);
460                 return -EINVAL;
461         }
462
463         /* rate */
464         if (params->rate == 0) {
465                 RTE_LOG(ERR, SCHED,
466                         "%s: Incorrect value for rate\n", __func__);
467                 return -EINVAL;
468         }
469
470         /* mtu */
471         if (params->mtu == 0) {
472                 RTE_LOG(ERR, SCHED,
473                         "%s: Incorrect value for mtu\n", __func__);
474                 return -EINVAL;
475         }
476
477         /* n_subports_per_port: non-zero, limited to 16 bits, power of 2 */
478         if (params->n_subports_per_port == 0 ||
479             params->n_subports_per_port > 1u << 16 ||
480             !rte_is_power_of_2(params->n_subports_per_port)) {
481                 RTE_LOG(ERR, SCHED,
482                         "%s: Incorrect value for number of subports\n", __func__);
483                 return -EINVAL;
484         }
485
486         if (params->subport_profiles == NULL ||
487                 params->n_subport_profiles == 0 ||
488                 params->n_max_subport_profiles == 0 ||
489                 params->n_subport_profiles > params->n_max_subport_profiles) {
490                 RTE_LOG(ERR, SCHED,
491                 "%s: Incorrect value for subport profiles\n", __func__);
492                 return -EINVAL;
493         }
494
495         for (i = 0; i < params->n_subport_profiles; i++) {
496                 struct rte_sched_subport_profile_params *p =
497                                                 params->subport_profiles + i;
498                 int status;
499
500                 status = subport_profile_check(p, params->rate);
501                 if (status != 0) {
502                         RTE_LOG(ERR, SCHED,
503                         "%s: subport profile check failed(%d)\n",
504                         __func__, status);
505                         return -EINVAL;
506                 }
507         }
508
509         /* n_pipes_per_subport: non-zero, power of 2 */
510         if (params->n_pipes_per_subport == 0 ||
511             !rte_is_power_of_2(params->n_pipes_per_subport)) {
512                 RTE_LOG(ERR, SCHED,
513                         "%s: Incorrect value for maximum pipes number\n", __func__);
514                 return -EINVAL;
515         }
516
517         return 0;
518 }
519
520 static uint32_t
521 rte_sched_subport_get_array_base(struct rte_sched_subport_params *params,
522         enum rte_sched_subport_array array)
523 {
524         uint32_t n_pipes_per_subport = params->n_pipes_per_subport_enabled;
525         uint32_t n_subport_pipe_queues =
526                 RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport;
527
528         uint32_t size_pipe = n_pipes_per_subport * sizeof(struct rte_sched_pipe);
529         uint32_t size_queue =
530                 n_subport_pipe_queues * sizeof(struct rte_sched_queue);
531         uint32_t size_queue_extra
532                 = n_subport_pipe_queues * sizeof(struct rte_sched_queue_extra);
533         uint32_t size_pipe_profiles = params->n_max_pipe_profiles *
534                 sizeof(struct rte_sched_pipe_profile);
535         uint32_t size_bmp_array =
536                 rte_bitmap_get_memory_footprint(n_subport_pipe_queues);
537         uint32_t size_per_pipe_queue_array, size_queue_array;
538
539         uint32_t base, i;
540
541         size_per_pipe_queue_array = 0;
542         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
543                 if (i < RTE_SCHED_TRAFFIC_CLASS_BE)
544                         size_per_pipe_queue_array +=
545                                 params->qsize[i] * sizeof(struct rte_mbuf *);
546                 else
547                         size_per_pipe_queue_array += RTE_SCHED_MAX_QUEUES_PER_TC *
548                                 params->qsize[i] * sizeof(struct rte_mbuf *);
549         }
550         size_queue_array = n_pipes_per_subport * size_per_pipe_queue_array;
551
552         base = 0;
553
554         if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE)
555                 return base;
556         base += RTE_CACHE_LINE_ROUNDUP(size_pipe);
557
558         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE)
559                 return base;
560         base += RTE_CACHE_LINE_ROUNDUP(size_queue);
561
562         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA)
563                 return base;
564         base += RTE_CACHE_LINE_ROUNDUP(size_queue_extra);
565
566         if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES)
567                 return base;
568         base += RTE_CACHE_LINE_ROUNDUP(size_pipe_profiles);
569
570         if (array == e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY)
571                 return base;
572         base += RTE_CACHE_LINE_ROUNDUP(size_bmp_array);
573
574         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY)
575                 return base;
576         base += RTE_CACHE_LINE_ROUNDUP(size_queue_array);
577
578         return base;
579 }
580
581 static void
582 rte_sched_subport_config_qsize(struct rte_sched_subport *subport)
583 {
584         uint32_t i;
585
586         subport->qsize_add[0] = 0;
587
588         /* Strict priority traffic class */
589         for (i = 1; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
590                 subport->qsize_add[i] = subport->qsize_add[i-1] + subport->qsize[i-1];
591
592         /* Best-effort traffic class */
593         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] =
594                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE] +
595                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
596         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] =
597                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] +
598                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
599         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] =
600                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] +
601                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
602
603         subport->qsize_sum = subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] +
604                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
605 }
606
607 static void
608 rte_sched_port_log_pipe_profile(struct rte_sched_subport *subport, uint32_t i)
609 {
610         struct rte_sched_pipe_profile *p = subport->pipe_profiles + i;
611
612         RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n"
613                 "       Token bucket: period = %"PRIu64", credits per period = %"PRIu64", size = %"PRIu64"\n"
614                 "       Traffic classes: period = %"PRIu64",\n"
615                 "       credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
616                 ", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
617                 ", %"PRIu64", %"PRIu64", %"PRIu64"]\n"
618                 "       Best-effort traffic class oversubscription: weight = %hhu\n"
619                 "       WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
620                 i,
621
622                 /* Token bucket */
623                 p->tb_period,
624                 p->tb_credits_per_period,
625                 p->tb_size,
626
627                 /* Traffic classes */
628                 p->tc_period,
629                 p->tc_credits_per_period[0],
630                 p->tc_credits_per_period[1],
631                 p->tc_credits_per_period[2],
632                 p->tc_credits_per_period[3],
633                 p->tc_credits_per_period[4],
634                 p->tc_credits_per_period[5],
635                 p->tc_credits_per_period[6],
636                 p->tc_credits_per_period[7],
637                 p->tc_credits_per_period[8],
638                 p->tc_credits_per_period[9],
639                 p->tc_credits_per_period[10],
640                 p->tc_credits_per_period[11],
641                 p->tc_credits_per_period[12],
642
643                 /* Best-effort traffic class oversubscription */
644                 p->tc_ov_weight,
645
646                 /* WRR */
647                 p->wrr_cost[0], p->wrr_cost[1], p->wrr_cost[2], p->wrr_cost[3]);
648 }
649
650 static void
651 rte_sched_port_log_subport_profile(struct rte_sched_port *port, uint32_t i)
652 {
653         struct rte_sched_subport_profile *p = port->subport_profiles + i;
654
655         RTE_LOG(DEBUG, SCHED, "Low level config for subport profile %u:\n"
656         "Token bucket: period = %"PRIu64", credits per period = %"PRIu64","
657         "size = %"PRIu64"\n"
658         "Traffic classes: period = %"PRIu64",\n"
659         "credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
660         " %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
661         " %"PRIu64", %"PRIu64", %"PRIu64"]\n",
662         i,
663
664         /* Token bucket */
665         p->tb_period,
666         p->tb_credits_per_period,
667         p->tb_size,
668
669         /* Traffic classes */
670         p->tc_period,
671         p->tc_credits_per_period[0],
672         p->tc_credits_per_period[1],
673         p->tc_credits_per_period[2],
674         p->tc_credits_per_period[3],
675         p->tc_credits_per_period[4],
676         p->tc_credits_per_period[5],
677         p->tc_credits_per_period[6],
678         p->tc_credits_per_period[7],
679         p->tc_credits_per_period[8],
680         p->tc_credits_per_period[9],
681         p->tc_credits_per_period[10],
682         p->tc_credits_per_period[11],
683         p->tc_credits_per_period[12]);
684 }
685
686 static inline uint64_t
687 rte_sched_time_ms_to_bytes(uint64_t time_ms, uint64_t rate)
688 {
689         uint64_t time = time_ms;
690
691         time = (time * rate) / 1000;
692
693         return time;
694 }
695
696 static void
697 rte_sched_pipe_profile_convert(struct rte_sched_subport *subport,
698         struct rte_sched_pipe_params *src,
699         struct rte_sched_pipe_profile *dst,
700         uint64_t rate)
701 {
702         uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
703         uint32_t lcd1, lcd2, lcd;
704         uint32_t i;
705
706         /* Token Bucket */
707         if (src->tb_rate == rate) {
708                 dst->tb_credits_per_period = 1;
709                 dst->tb_period = 1;
710         } else {
711                 double tb_rate = (double) src->tb_rate
712                                 / (double) rate;
713                 double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
714
715                 rte_approx_64(tb_rate, d, &dst->tb_credits_per_period,
716                         &dst->tb_period);
717         }
718
719         dst->tb_size = src->tb_size;
720
721         /* Traffic Classes */
722         dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period,
723                                                 rate);
724
725         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
726                 if (subport->qsize[i])
727                         dst->tc_credits_per_period[i]
728                                 = rte_sched_time_ms_to_bytes(src->tc_period,
729                                         src->tc_rate[i]);
730
731         dst->tc_ov_weight = src->tc_ov_weight;
732
733         /* WRR queues */
734         wrr_cost[0] = src->wrr_weights[0];
735         wrr_cost[1] = src->wrr_weights[1];
736         wrr_cost[2] = src->wrr_weights[2];
737         wrr_cost[3] = src->wrr_weights[3];
738
739         lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
740         lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
741         lcd = rte_get_lcd(lcd1, lcd2);
742
743         wrr_cost[0] = lcd / wrr_cost[0];
744         wrr_cost[1] = lcd / wrr_cost[1];
745         wrr_cost[2] = lcd / wrr_cost[2];
746         wrr_cost[3] = lcd / wrr_cost[3];
747
748         dst->wrr_cost[0] = (uint8_t) wrr_cost[0];
749         dst->wrr_cost[1] = (uint8_t) wrr_cost[1];
750         dst->wrr_cost[2] = (uint8_t) wrr_cost[2];
751         dst->wrr_cost[3] = (uint8_t) wrr_cost[3];
752 }
753
754 static void
755 rte_sched_subport_profile_convert(struct rte_sched_subport_profile_params *src,
756         struct rte_sched_subport_profile *dst,
757         uint64_t rate)
758 {
759         uint32_t i;
760
761         /* Token Bucket */
762         if (src->tb_rate == rate) {
763                 dst->tb_credits_per_period = 1;
764                 dst->tb_period = 1;
765         } else {
766                 double tb_rate = (double) src->tb_rate
767                                 / (double) rate;
768                 double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
769
770                 rte_approx_64(tb_rate, d, &dst->tb_credits_per_period,
771                         &dst->tb_period);
772         }
773
774         dst->tb_size = src->tb_size;
775
776         /* Traffic Classes */
777         dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period, rate);
778
779         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
780                 dst->tc_credits_per_period[i]
781                         = rte_sched_time_ms_to_bytes(src->tc_period,
782                                 src->tc_rate[i]);
783 }
784
785 static void
786 rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport,
787         struct rte_sched_subport_params *params, uint64_t rate)
788 {
789         uint32_t i;
790
791         for (i = 0; i < subport->n_pipe_profiles; i++) {
792                 struct rte_sched_pipe_params *src = params->pipe_profiles + i;
793                 struct rte_sched_pipe_profile *dst = subport->pipe_profiles + i;
794
795                 rte_sched_pipe_profile_convert(subport, src, dst, rate);
796                 rte_sched_port_log_pipe_profile(subport, i);
797         }
798
799         subport->pipe_tc_be_rate_max = 0;
800         for (i = 0; i < subport->n_pipe_profiles; i++) {
801                 struct rte_sched_pipe_params *src = params->pipe_profiles + i;
802                 uint64_t pipe_tc_be_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
803
804                 if (subport->pipe_tc_be_rate_max < pipe_tc_be_rate)
805                         subport->pipe_tc_be_rate_max = pipe_tc_be_rate;
806         }
807 }
808
809 static void
810 rte_sched_port_config_subport_profile_table(struct rte_sched_port *port,
811         struct rte_sched_port_params *params,
812         uint64_t rate)
813 {
814         uint32_t i;
815
816         for (i = 0; i < port->n_subport_profiles; i++) {
817                 struct rte_sched_subport_profile_params *src
818                                 = params->subport_profiles + i;
819                 struct rte_sched_subport_profile *dst
820                                 = port->subport_profiles + i;
821
822                 rte_sched_subport_profile_convert(src, dst, rate);
823                 rte_sched_port_log_subport_profile(port, i);
824         }
825 }
826
827 static int
828 rte_sched_subport_check_params(struct rte_sched_subport_params *params,
829         uint32_t n_max_pipes_per_subport,
830         uint64_t rate)
831 {
832         uint32_t i;
833
834         /* Check user parameters */
835         if (params == NULL) {
836                 RTE_LOG(ERR, SCHED,
837                         "%s: Incorrect value for parameter params\n", __func__);
838                 return -EINVAL;
839         }
840
841         /* qsize: if non-zero, power of 2,
842          * no bigger than 32K (due to 16-bit read/write pointers)
843          */
844         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
845                 uint16_t qsize = params->qsize[i];
846
847                 if (qsize != 0 && !rte_is_power_of_2(qsize)) {
848                         RTE_LOG(ERR, SCHED,
849                                 "%s: Incorrect value for qsize\n", __func__);
850                         return -EINVAL;
851                 }
852         }
853
854         if (params->qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
855                 RTE_LOG(ERR, SCHED, "%s: Incorrect qsize\n", __func__);
856                 return -EINVAL;
857         }
858
859         /* n_pipes_per_subport: non-zero, power of 2 */
860         if (params->n_pipes_per_subport_enabled == 0 ||
861                 params->n_pipes_per_subport_enabled > n_max_pipes_per_subport ||
862             !rte_is_power_of_2(params->n_pipes_per_subport_enabled)) {
863                 RTE_LOG(ERR, SCHED,
864                         "%s: Incorrect value for pipes number\n", __func__);
865                 return -EINVAL;
866         }
867
868         /* pipe_profiles and n_pipe_profiles */
869         if (params->pipe_profiles == NULL ||
870             params->n_pipe_profiles == 0 ||
871                 params->n_max_pipe_profiles == 0 ||
872                 params->n_pipe_profiles > params->n_max_pipe_profiles) {
873                 RTE_LOG(ERR, SCHED,
874                         "%s: Incorrect value for pipe profiles\n", __func__);
875                 return -EINVAL;
876         }
877
878         for (i = 0; i < params->n_pipe_profiles; i++) {
879                 struct rte_sched_pipe_params *p = params->pipe_profiles + i;
880                 int status;
881
882                 status = pipe_profile_check(p, rate, &params->qsize[0]);
883                 if (status != 0) {
884                         RTE_LOG(ERR, SCHED,
885                                 "%s: Pipe profile check failed(%d)\n", __func__, status);
886                         return -EINVAL;
887                 }
888         }
889
890         return 0;
891 }
892
893 uint32_t
894 rte_sched_port_get_memory_footprint(struct rte_sched_port_params *port_params,
895         struct rte_sched_subport_params **subport_params)
896 {
897         uint32_t size0 = 0, size1 = 0, i;
898         int status;
899
900         status = rte_sched_port_check_params(port_params);
901         if (status != 0) {
902                 RTE_LOG(ERR, SCHED,
903                         "%s: Port scheduler port params check failed (%d)\n",
904                         __func__, status);
905
906                 return 0;
907         }
908
909         for (i = 0; i < port_params->n_subports_per_port; i++) {
910                 struct rte_sched_subport_params *sp = subport_params[i];
911
912                 status = rte_sched_subport_check_params(sp,
913                                 port_params->n_pipes_per_subport,
914                                 port_params->rate);
915                 if (status != 0) {
916                         RTE_LOG(ERR, SCHED,
917                                 "%s: Port scheduler subport params check failed (%d)\n",
918                                 __func__, status);
919
920                         return 0;
921                 }
922         }
923
924         size0 = sizeof(struct rte_sched_port);
925
926         for (i = 0; i < port_params->n_subports_per_port; i++) {
927                 struct rte_sched_subport_params *sp = subport_params[i];
928
929                 size1 += rte_sched_subport_get_array_base(sp,
930                                         e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
931         }
932
933         return size0 + size1;
934 }
935
936 struct rte_sched_port *
937 rte_sched_port_config(struct rte_sched_port_params *params)
938 {
939         struct rte_sched_port *port = NULL;
940         uint32_t size0, size1, size2;
941         uint32_t cycles_per_byte;
942         uint32_t i, j;
943         int status;
944
945         status = rte_sched_port_check_params(params);
946         if (status != 0) {
947                 RTE_LOG(ERR, SCHED,
948                         "%s: Port scheduler params check failed (%d)\n",
949                         __func__, status);
950                 return NULL;
951         }
952
953         size0 = sizeof(struct rte_sched_port);
954         size1 = params->n_subports_per_port * sizeof(struct rte_sched_subport *);
955         size2 = params->n_max_subport_profiles *
956                 sizeof(struct rte_sched_subport_profile);
957
958         /* Allocate memory to store the data structures */
959         port = rte_zmalloc_socket("qos_params", size0 + size1,
960                                  RTE_CACHE_LINE_SIZE, params->socket);
961         if (port == NULL) {
962                 RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
963
964                 return NULL;
965         }
966
967         /* Allocate memory to store the subport profile */
968         port->subport_profiles  = rte_zmalloc_socket("subport_profile", size2,
969                                         RTE_CACHE_LINE_SIZE, params->socket);
970         if (port->subport_profiles == NULL) {
971                 RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
972                 rte_free(port);
973                 return NULL;
974         }
975
976         /* User parameters */
977         port->n_subports_per_port = params->n_subports_per_port;
978         port->n_subport_profiles = params->n_subport_profiles;
979         port->n_max_subport_profiles = params->n_max_subport_profiles;
980         port->n_pipes_per_subport = params->n_pipes_per_subport;
981         port->n_pipes_per_subport_log2 =
982                         __builtin_ctz(params->n_pipes_per_subport);
983         port->socket = params->socket;
984
985         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
986                 port->pipe_queue[i] = i;
987
988         for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
989                 port->pipe_tc[i] = j;
990
991                 if (j < RTE_SCHED_TRAFFIC_CLASS_BE)
992                         j++;
993         }
994
995         for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
996                 port->tc_queue[i] = j;
997
998                 if (i >= RTE_SCHED_TRAFFIC_CLASS_BE)
999                         j++;
1000         }
1001         port->rate = params->rate;
1002         port->mtu = params->mtu + params->frame_overhead;
1003         port->frame_overhead = params->frame_overhead;
1004
1005         /* Timing */
1006         port->time_cpu_cycles = rte_get_tsc_cycles();
1007         port->time_cpu_bytes = 0;
1008         port->time = 0;
1009
1010         /* Subport profile table */
1011         rte_sched_port_config_subport_profile_table(port, params, port->rate);
1012
1013         cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
1014                 / params->rate;
1015         port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
1016         port->cycles_per_byte = cycles_per_byte;
1017
1018         /* Grinders */
1019         port->pkts_out = NULL;
1020         port->n_pkts_out = 0;
1021         port->subport_id = 0;
1022
1023         return port;
1024 }
1025
1026 static inline void
1027 rte_sched_subport_free(struct rte_sched_port *port,
1028         struct rte_sched_subport *subport)
1029 {
1030         uint32_t n_subport_pipe_queues;
1031         uint32_t qindex;
1032
1033         if (subport == NULL)
1034                 return;
1035
1036         n_subport_pipe_queues = rte_sched_subport_pipe_queues(subport);
1037
1038         /* Free enqueued mbufs */
1039         for (qindex = 0; qindex < n_subport_pipe_queues; qindex++) {
1040                 struct rte_mbuf **mbufs =
1041                         rte_sched_subport_pipe_qbase(subport, qindex);
1042                 uint16_t qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
1043                 if (qsize != 0) {
1044                         struct rte_sched_queue *queue = subport->queue + qindex;
1045                         uint16_t qr = queue->qr & (qsize - 1);
1046                         uint16_t qw = queue->qw & (qsize - 1);
1047
1048                         for (; qr != qw; qr = (qr + 1) & (qsize - 1))
1049                                 rte_pktmbuf_free(mbufs[qr]);
1050                 }
1051         }
1052
1053         rte_free(subport);
1054 }
1055
1056 void
1057 rte_sched_port_free(struct rte_sched_port *port)
1058 {
1059         uint32_t i;
1060
1061         /* Check user parameters */
1062         if (port == NULL)
1063                 return;
1064
1065         for (i = 0; i < port->n_subports_per_port; i++)
1066                 rte_sched_subport_free(port, port->subports[i]);
1067
1068         rte_free(port->subport_profiles);
1069         rte_free(port);
1070 }
1071
1072 static void
1073 rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
1074 {
1075         uint32_t i;
1076
1077         for (i = 0; i < n_subports; i++) {
1078                 struct rte_sched_subport *subport = port->subports[i];
1079
1080                 rte_sched_subport_free(port, subport);
1081         }
1082
1083         rte_free(port->subport_profiles);
1084         rte_free(port);
1085 }
1086
1087 #ifdef RTE_SCHED_CMAN
1088 static int
1089 rte_sched_red_config(struct rte_sched_port *port,
1090         struct rte_sched_subport *s,
1091         struct rte_sched_subport_params *params,
1092         uint32_t n_subports)
1093 {
1094         uint32_t i;
1095
1096         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
1097
1098                 uint32_t j;
1099
1100                 for (j = 0; j < RTE_COLORS; j++) {
1101                         /* if min/max are both zero, then RED is disabled */
1102                         if ((params->cman_params->red_params[i][j].min_th |
1103                                  params->cman_params->red_params[i][j].max_th) == 0) {
1104                                 continue;
1105                         }
1106
1107                         if (rte_red_config_init(&s->red_config[i][j],
1108                                 params->cman_params->red_params[i][j].wq_log2,
1109                                 params->cman_params->red_params[i][j].min_th,
1110                                 params->cman_params->red_params[i][j].max_th,
1111                                 params->cman_params->red_params[i][j].maxp_inv) != 0) {
1112                                 rte_sched_free_memory(port, n_subports);
1113
1114                                 RTE_LOG(NOTICE, SCHED,
1115                                 "%s: RED configuration init fails\n", __func__);
1116                                 return -EINVAL;
1117                         }
1118                 }
1119         }
1120         s->cman = RTE_SCHED_CMAN_RED;
1121         return 0;
1122 }
1123
1124 static int
1125 rte_sched_pie_config(struct rte_sched_port *port,
1126         struct rte_sched_subport *s,
1127         struct rte_sched_subport_params *params,
1128         uint32_t n_subports)
1129 {
1130         uint32_t i;
1131
1132         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
1133                 if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
1134                         RTE_LOG(NOTICE, SCHED,
1135                         "%s: PIE tailq threshold incorrect\n", __func__);
1136                         return -EINVAL;
1137                 }
1138
1139                 if (rte_pie_config_init(&s->pie_config[i],
1140                         params->cman_params->pie_params[i].qdelay_ref,
1141                         params->cman_params->pie_params[i].dp_update_interval,
1142                         params->cman_params->pie_params[i].max_burst,
1143                         params->cman_params->pie_params[i].tailq_th) != 0) {
1144                         rte_sched_free_memory(port, n_subports);
1145
1146                         RTE_LOG(NOTICE, SCHED,
1147                         "%s: PIE configuration init fails\n", __func__);
1148                         return -EINVAL;
1149                         }
1150         }
1151         s->cman = RTE_SCHED_CMAN_PIE;
1152         return 0;
1153 }
1154
1155 static int
1156 rte_sched_cman_config(struct rte_sched_port *port,
1157         struct rte_sched_subport *s,
1158         struct rte_sched_subport_params *params,
1159         uint32_t n_subports)
1160 {
1161         if (params->cman_params->cman_mode == RTE_SCHED_CMAN_RED)
1162                 return rte_sched_red_config(port, s, params, n_subports);
1163
1164         else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
1165                 return rte_sched_pie_config(port, s, params, n_subports);
1166
1167         return -EINVAL;
1168 }
1169 #endif
1170
1171 int
1172 rte_sched_subport_tc_ov_config(struct rte_sched_port *port,
1173         uint32_t subport_id,
1174         bool tc_ov_enable)
1175 {
1176         struct rte_sched_subport *s;
1177
1178         if (port == NULL) {
1179                 RTE_LOG(ERR, SCHED,
1180                         "%s: Incorrect value for parameter port\n", __func__);
1181                 return -EINVAL;
1182         }
1183
1184         if (subport_id >= port->n_subports_per_port) {
1185                 RTE_LOG(ERR, SCHED,
1186                         "%s: Incorrect value for parameter subport id\n", __func__);
1187                 return  -EINVAL;
1188         }
1189
1190         s = port->subports[subport_id];
1191         s->tc_ov_enabled = tc_ov_enable ? 1 : 0;
1192
1193         return 0;
1194 }
1195
1196 int
1197 rte_sched_subport_config(struct rte_sched_port *port,
1198         uint32_t subport_id,
1199         struct rte_sched_subport_params *params,
1200         uint32_t subport_profile_id)
1201 {
1202         struct rte_sched_subport *s = NULL;
1203         uint32_t n_subports = subport_id;
1204         struct rte_sched_subport_profile *profile;
1205         uint32_t n_subport_pipe_queues, i;
1206         uint32_t size0, size1, bmp_mem_size;
1207         int status;
1208         int ret;
1209
1210         /* Check user parameters */
1211         if (port == NULL) {
1212                 RTE_LOG(ERR, SCHED,
1213                         "%s: Incorrect value for parameter port\n", __func__);
1214                 return 0;
1215         }
1216
1217         if (subport_id >= port->n_subports_per_port) {
1218                 RTE_LOG(ERR, SCHED,
1219                         "%s: Incorrect value for subport id\n", __func__);
1220                 ret = -EINVAL;
1221                 goto out;
1222         }
1223
1224         if (subport_profile_id >= port->n_max_subport_profiles) {
1225                 RTE_LOG(ERR, SCHED, "%s: "
1226                         "Number of subport profile exceeds the max limit\n",
1227                         __func__);
1228                 ret = -EINVAL;
1229                 goto out;
1230         }
1231
1232         /** Memory is allocated only on first invocation of the api for a
1233          * given subport. Subsequent invocation on same subport will just
1234          * update subport bandwidth parameter.
1235          **/
1236         if (port->subports[subport_id] == NULL) {
1237
1238                 status = rte_sched_subport_check_params(params,
1239                         port->n_pipes_per_subport,
1240                         port->rate);
1241                 if (status != 0) {
1242                         RTE_LOG(NOTICE, SCHED,
1243                                 "%s: Port scheduler params check failed (%d)\n",
1244                                 __func__, status);
1245                         ret = -EINVAL;
1246                         goto out;
1247                 }
1248
1249                 /* Determine the amount of memory to allocate */
1250                 size0 = sizeof(struct rte_sched_subport);
1251                 size1 = rte_sched_subport_get_array_base(params,
1252                                         e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
1253
1254                 /* Allocate memory to store the data structures */
1255                 s = rte_zmalloc_socket("subport_params", size0 + size1,
1256                         RTE_CACHE_LINE_SIZE, port->socket);
1257                 if (s == NULL) {
1258                         RTE_LOG(ERR, SCHED,
1259                                 "%s: Memory allocation fails\n", __func__);
1260                         ret = -ENOMEM;
1261                         goto out;
1262                 }
1263
1264                 n_subports++;
1265
1266                 subport_profile_id = 0;
1267
1268                 /* Port */
1269                 port->subports[subport_id] = s;
1270
1271                 s->tb_time = port->time;
1272
1273                 /* compile time checks */
1274                 RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS == 0);
1275                 RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS &
1276                         (RTE_SCHED_PORT_N_GRINDERS - 1));
1277
1278                 /* User parameters */
1279                 s->n_pipes_per_subport_enabled =
1280                                 params->n_pipes_per_subport_enabled;
1281                 memcpy(s->qsize, params->qsize, sizeof(params->qsize));
1282                 s->n_pipe_profiles = params->n_pipe_profiles;
1283                 s->n_max_pipe_profiles = params->n_max_pipe_profiles;
1284
1285                 /* TC oversubscription is enabled by default */
1286                 s->tc_ov_enabled = 1;
1287
1288 #ifdef RTE_SCHED_CMAN
1289                 if (params->cman_params != NULL) {
1290                         s->cman_enabled = true;
1291                         status = rte_sched_cman_config(port, s, params, n_subports);
1292                         if (status) {
1293                                 RTE_LOG(NOTICE, SCHED,
1294                                         "%s: CMAN configuration fails\n", __func__);
1295                                 return status;
1296                         }
1297                 } else {
1298                         s->cman_enabled = false;
1299                 }
1300 #endif
1301
1302                 /* Scheduling loop detection */
1303                 s->pipe_loop = RTE_SCHED_PIPE_INVALID;
1304                 s->pipe_exhaustion = 0;
1305
1306                 /* Grinders */
1307                 s->busy_grinders = 0;
1308
1309                 /* Queue base calculation */
1310                 rte_sched_subport_config_qsize(s);
1311
1312                 /* Large data structures */
1313                 s->pipe = (struct rte_sched_pipe *)
1314                         (s->memory + rte_sched_subport_get_array_base(params,
1315                         e_RTE_SCHED_SUBPORT_ARRAY_PIPE));
1316                 s->queue = (struct rte_sched_queue *)
1317                         (s->memory + rte_sched_subport_get_array_base(params,
1318                         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE));
1319                 s->queue_extra = (struct rte_sched_queue_extra *)
1320                         (s->memory + rte_sched_subport_get_array_base(params,
1321                         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA));
1322                 s->pipe_profiles = (struct rte_sched_pipe_profile *)
1323                         (s->memory + rte_sched_subport_get_array_base(params,
1324                         e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES));
1325                 s->bmp_array =  s->memory + rte_sched_subport_get_array_base(
1326                                 params, e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY);
1327                 s->queue_array = (struct rte_mbuf **)
1328                         (s->memory + rte_sched_subport_get_array_base(params,
1329                         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY));
1330
1331                 /* Pipe profile table */
1332                 rte_sched_subport_config_pipe_profile_table(s, params,
1333                                                             port->rate);
1334
1335                 /* Bitmap */
1336                 n_subport_pipe_queues = rte_sched_subport_pipe_queues(s);
1337                 bmp_mem_size = rte_bitmap_get_memory_footprint(
1338                                                 n_subport_pipe_queues);
1339                 s->bmp = rte_bitmap_init(n_subport_pipe_queues, s->bmp_array,
1340                                         bmp_mem_size);
1341                 if (s->bmp == NULL) {
1342                         RTE_LOG(ERR, SCHED,
1343                                 "%s: Subport bitmap init error\n", __func__);
1344                         ret = -EINVAL;
1345                         goto out;
1346                 }
1347
1348                 for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
1349                         s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
1350
1351                 /* TC oversubscription */
1352                 s->tc_ov_wm_min = port->mtu;
1353                 s->tc_ov_period_id = 0;
1354                 s->tc_ov = 0;
1355                 s->tc_ov_n = 0;
1356                 s->tc_ov_rate = 0;
1357         }
1358
1359         {
1360         /* update subport parameters from subport profile table*/
1361                 profile = port->subport_profiles + subport_profile_id;
1362
1363                 s = port->subports[subport_id];
1364
1365                 s->tb_credits = profile->tb_size / 2;
1366
1367                 s->tc_time = port->time + profile->tc_period;
1368
1369                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1370                         if (s->qsize[i])
1371                                 s->tc_credits[i] =
1372                                         profile->tc_credits_per_period[i];
1373                         else
1374                                 profile->tc_credits_per_period[i] = 0;
1375
1376                 s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period,
1377                                                         s->pipe_tc_be_rate_max);
1378                 s->tc_ov_wm = s->tc_ov_wm_max;
1379                 s->profile = subport_profile_id;
1380
1381         }
1382
1383         rte_sched_port_log_subport_profile(port, subport_profile_id);
1384
1385         return 0;
1386
1387 out:
1388         rte_sched_free_memory(port, n_subports);
1389
1390         return ret;
1391 }
1392
1393 int
1394 rte_sched_pipe_config(struct rte_sched_port *port,
1395         uint32_t subport_id,
1396         uint32_t pipe_id,
1397         int32_t pipe_profile)
1398 {
1399         struct rte_sched_subport *s;
1400         struct rte_sched_subport_profile *sp;
1401         struct rte_sched_pipe *p;
1402         struct rte_sched_pipe_profile *params;
1403         uint32_t n_subports = subport_id + 1;
1404         uint32_t deactivate, profile, i;
1405         int ret;
1406
1407         /* Check user parameters */
1408         profile = (uint32_t) pipe_profile;
1409         deactivate = (pipe_profile < 0);
1410
1411         if (port == NULL) {
1412                 RTE_LOG(ERR, SCHED,
1413                         "%s: Incorrect value for parameter port\n", __func__);
1414                 return -EINVAL;
1415         }
1416
1417         if (subport_id >= port->n_subports_per_port) {
1418                 RTE_LOG(ERR, SCHED,
1419                         "%s: Incorrect value for parameter subport id\n", __func__);
1420                 ret = -EINVAL;
1421                 goto out;
1422         }
1423
1424         s = port->subports[subport_id];
1425         if (pipe_id >= s->n_pipes_per_subport_enabled) {
1426                 RTE_LOG(ERR, SCHED,
1427                         "%s: Incorrect value for parameter pipe id\n", __func__);
1428                 ret = -EINVAL;
1429                 goto out;
1430         }
1431
1432         if (!deactivate && profile >= s->n_pipe_profiles) {
1433                 RTE_LOG(ERR, SCHED,
1434                         "%s: Incorrect value for parameter pipe profile\n", __func__);
1435                 ret = -EINVAL;
1436                 goto out;
1437         }
1438
1439         sp = port->subport_profiles + s->profile;
1440         /* Handle the case when pipe already has a valid configuration */
1441         p = s->pipe + pipe_id;
1442         if (p->tb_time) {
1443                 params = s->pipe_profiles + p->profile;
1444
1445                 double subport_tc_be_rate =
1446                 (double)sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1447                         / (double) sp->tc_period;
1448                 double pipe_tc_be_rate =
1449                         (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1450                         / (double) params->tc_period;
1451                 uint32_t tc_be_ov = s->tc_ov;
1452
1453                 /* Unplug pipe from its subport */
1454                 s->tc_ov_n -= params->tc_ov_weight;
1455                 s->tc_ov_rate -= pipe_tc_be_rate;
1456                 s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1457
1458                 if (s->tc_ov != tc_be_ov) {
1459                         RTE_LOG(DEBUG, SCHED,
1460                                 "Subport %u Best-effort TC oversubscription is OFF (%.4lf >= %.4lf)\n",
1461                                 subport_id, subport_tc_be_rate, s->tc_ov_rate);
1462                 }
1463
1464                 /* Reset the pipe */
1465                 memset(p, 0, sizeof(struct rte_sched_pipe));
1466         }
1467
1468         if (deactivate)
1469                 return 0;
1470
1471         /* Apply the new pipe configuration */
1472         p->profile = profile;
1473         params = s->pipe_profiles + p->profile;
1474
1475         /* Token Bucket (TB) */
1476         p->tb_time = port->time;
1477         p->tb_credits = params->tb_size / 2;
1478
1479         /* Traffic Classes (TCs) */
1480         p->tc_time = port->time + params->tc_period;
1481
1482         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1483                 if (s->qsize[i])
1484                         p->tc_credits[i] = params->tc_credits_per_period[i];
1485
1486         {
1487                 /* Subport best effort tc oversubscription */
1488                 double subport_tc_be_rate =
1489                 (double)sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1490                         / (double) sp->tc_period;
1491                 double pipe_tc_be_rate =
1492                         (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1493                         / (double) params->tc_period;
1494                 uint32_t tc_be_ov = s->tc_ov;
1495
1496                 s->tc_ov_n += params->tc_ov_weight;
1497                 s->tc_ov_rate += pipe_tc_be_rate;
1498                 s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1499
1500                 if (s->tc_ov != tc_be_ov) {
1501                         RTE_LOG(DEBUG, SCHED,
1502                                 "Subport %u Best effort TC oversubscription is ON (%.4lf < %.4lf)\n",
1503                                 subport_id, subport_tc_be_rate, s->tc_ov_rate);
1504                 }
1505                 p->tc_ov_period_id = s->tc_ov_period_id;
1506                 p->tc_ov_credits = s->tc_ov_wm;
1507         }
1508
1509         return 0;
1510
1511 out:
1512         rte_sched_free_memory(port, n_subports);
1513
1514         return ret;
1515 }
1516
1517 int
1518 rte_sched_subport_pipe_profile_add(struct rte_sched_port *port,
1519         uint32_t subport_id,
1520         struct rte_sched_pipe_params *params,
1521         uint32_t *pipe_profile_id)
1522 {
1523         struct rte_sched_subport *s;
1524         struct rte_sched_pipe_profile *pp;
1525         uint32_t i;
1526         int status;
1527
1528         /* Port */
1529         if (port == NULL) {
1530                 RTE_LOG(ERR, SCHED,
1531                         "%s: Incorrect value for parameter port\n", __func__);
1532                 return -EINVAL;
1533         }
1534
1535         /* Subport id not exceeds the max limit */
1536         if (subport_id > port->n_subports_per_port) {
1537                 RTE_LOG(ERR, SCHED,
1538                         "%s: Incorrect value for subport id\n", __func__);
1539                 return -EINVAL;
1540         }
1541
1542         s = port->subports[subport_id];
1543
1544         /* Pipe profiles exceeds the max limit */
1545         if (s->n_pipe_profiles >= s->n_max_pipe_profiles) {
1546                 RTE_LOG(ERR, SCHED,
1547                         "%s: Number of pipe profiles exceeds the max limit\n", __func__);
1548                 return -EINVAL;
1549         }
1550
1551         /* Pipe params */
1552         status = pipe_profile_check(params, port->rate, &s->qsize[0]);
1553         if (status != 0) {
1554                 RTE_LOG(ERR, SCHED,
1555                         "%s: Pipe profile check failed(%d)\n", __func__, status);
1556                 return -EINVAL;
1557         }
1558
1559         pp = &s->pipe_profiles[s->n_pipe_profiles];
1560         rte_sched_pipe_profile_convert(s, params, pp, port->rate);
1561
1562         /* Pipe profile should not exists */
1563         for (i = 0; i < s->n_pipe_profiles; i++)
1564                 if (memcmp(s->pipe_profiles + i, pp, sizeof(*pp)) == 0) {
1565                         RTE_LOG(ERR, SCHED,
1566                                 "%s: Pipe profile exists\n", __func__);
1567                         return -EINVAL;
1568                 }
1569
1570         /* Pipe profile commit */
1571         *pipe_profile_id = s->n_pipe_profiles;
1572         s->n_pipe_profiles++;
1573
1574         if (s->pipe_tc_be_rate_max < params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE])
1575                 s->pipe_tc_be_rate_max = params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
1576
1577         rte_sched_port_log_pipe_profile(s, *pipe_profile_id);
1578
1579         return 0;
1580 }
1581
1582 int
1583 rte_sched_port_subport_profile_add(struct rte_sched_port *port,
1584         struct rte_sched_subport_profile_params *params,
1585         uint32_t *subport_profile_id)
1586 {
1587         int status;
1588         uint32_t i;
1589         struct rte_sched_subport_profile *dst;
1590
1591         /* Port */
1592         if (port == NULL) {
1593                 RTE_LOG(ERR, SCHED, "%s: "
1594                 "Incorrect value for parameter port\n", __func__);
1595                 return -EINVAL;
1596         }
1597
1598         if (params == NULL) {
1599                 RTE_LOG(ERR, SCHED, "%s: "
1600                 "Incorrect value for parameter profile\n", __func__);
1601                 return -EINVAL;
1602         }
1603
1604         if (subport_profile_id == NULL) {
1605                 RTE_LOG(ERR, SCHED, "%s: "
1606                 "Incorrect value for parameter subport_profile_id\n",
1607                 __func__);
1608                 return -EINVAL;
1609         }
1610
1611         dst = port->subport_profiles + port->n_subport_profiles;
1612
1613         /* Subport profiles exceeds the max limit */
1614         if (port->n_subport_profiles >= port->n_max_subport_profiles) {
1615                 RTE_LOG(ERR, SCHED, "%s: "
1616                 "Number of subport profiles exceeds the max limit\n",
1617                  __func__);
1618                 return -EINVAL;
1619         }
1620
1621         status = subport_profile_check(params, port->rate);
1622         if (status != 0) {
1623                 RTE_LOG(ERR, SCHED,
1624                 "%s: subport profile check failed(%d)\n", __func__, status);
1625                 return -EINVAL;
1626         }
1627
1628         rte_sched_subport_profile_convert(params, dst, port->rate);
1629
1630         /* Subport profile should not exists */
1631         for (i = 0; i < port->n_subport_profiles; i++)
1632                 if (memcmp(port->subport_profiles + i,
1633                     dst, sizeof(*dst)) == 0) {
1634                         RTE_LOG(ERR, SCHED,
1635                         "%s: subport profile exists\n", __func__);
1636                         return -EINVAL;
1637                 }
1638
1639         /* Subport profile commit */
1640         *subport_profile_id = port->n_subport_profiles;
1641         port->n_subport_profiles++;
1642
1643         rte_sched_port_log_subport_profile(port, *subport_profile_id);
1644
1645         return 0;
1646 }
1647
1648 static inline uint32_t
1649 rte_sched_port_qindex(struct rte_sched_port *port,
1650         uint32_t subport,
1651         uint32_t pipe,
1652         uint32_t traffic_class,
1653         uint32_t queue)
1654 {
1655         return ((subport & (port->n_subports_per_port - 1)) <<
1656                 (port->n_pipes_per_subport_log2 + 4)) |
1657                 ((pipe &
1658                 (port->subports[subport]->n_pipes_per_subport_enabled - 1)) << 4) |
1659                 ((rte_sched_port_pipe_queue(port, traffic_class) + queue) &
1660                 (RTE_SCHED_QUEUES_PER_PIPE - 1));
1661 }
1662
1663 void
1664 rte_sched_port_pkt_write(struct rte_sched_port *port,
1665                          struct rte_mbuf *pkt,
1666                          uint32_t subport, uint32_t pipe,
1667                          uint32_t traffic_class,
1668                          uint32_t queue, enum rte_color color)
1669 {
1670         uint32_t queue_id =
1671                 rte_sched_port_qindex(port, subport, pipe, traffic_class, queue);
1672
1673         rte_mbuf_sched_set(pkt, queue_id, traffic_class, (uint8_t)color);
1674 }
1675
1676 void
1677 rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port,
1678                                   const struct rte_mbuf *pkt,
1679                                   uint32_t *subport, uint32_t *pipe,
1680                                   uint32_t *traffic_class, uint32_t *queue)
1681 {
1682         uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1683
1684         *subport = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1685         *pipe = (queue_id >> 4) &
1686                 (port->subports[*subport]->n_pipes_per_subport_enabled - 1);
1687         *traffic_class = rte_sched_port_pipe_tc(port, queue_id);
1688         *queue = rte_sched_port_tc_queue(port, queue_id);
1689 }
1690
1691 enum rte_color
1692 rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt)
1693 {
1694         return (enum rte_color)rte_mbuf_sched_color_get(pkt);
1695 }
1696
1697 int
1698 rte_sched_subport_read_stats(struct rte_sched_port *port,
1699                              uint32_t subport_id,
1700                              struct rte_sched_subport_stats *stats,
1701                              uint32_t *tc_ov)
1702 {
1703         struct rte_sched_subport *s;
1704
1705         /* Check user parameters */
1706         if (port == NULL) {
1707                 RTE_LOG(ERR, SCHED,
1708                         "%s: Incorrect value for parameter port\n", __func__);
1709                 return -EINVAL;
1710         }
1711
1712         if (subport_id >= port->n_subports_per_port) {
1713                 RTE_LOG(ERR, SCHED,
1714                         "%s: Incorrect value for subport id\n", __func__);
1715                 return -EINVAL;
1716         }
1717
1718         if (stats == NULL) {
1719                 RTE_LOG(ERR, SCHED,
1720                         "%s: Incorrect value for parameter stats\n", __func__);
1721                 return -EINVAL;
1722         }
1723
1724         if (tc_ov == NULL) {
1725                 RTE_LOG(ERR, SCHED,
1726                         "%s: Incorrect value for tc_ov\n", __func__);
1727                 return -EINVAL;
1728         }
1729
1730         s = port->subports[subport_id];
1731
1732         /* Copy subport stats and clear */
1733         memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
1734         memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
1735
1736         /* Subport TC oversubscription status */
1737         *tc_ov = s->tc_ov;
1738
1739         return 0;
1740 }
1741
1742 int
1743 rte_sched_queue_read_stats(struct rte_sched_port *port,
1744         uint32_t queue_id,
1745         struct rte_sched_queue_stats *stats,
1746         uint16_t *qlen)
1747 {
1748         struct rte_sched_subport *s;
1749         struct rte_sched_queue *q;
1750         struct rte_sched_queue_extra *qe;
1751         uint32_t subport_id, subport_qmask, subport_qindex;
1752
1753         /* Check user parameters */
1754         if (port == NULL) {
1755                 RTE_LOG(ERR, SCHED,
1756                         "%s: Incorrect value for parameter port\n", __func__);
1757                 return -EINVAL;
1758         }
1759
1760         if (queue_id >= rte_sched_port_queues_per_port(port)) {
1761                 RTE_LOG(ERR, SCHED,
1762                         "%s: Incorrect value for queue id\n", __func__);
1763                 return -EINVAL;
1764         }
1765
1766         if (stats == NULL) {
1767                 RTE_LOG(ERR, SCHED,
1768                         "%s: Incorrect value for parameter stats\n", __func__);
1769                 return -EINVAL;
1770         }
1771
1772         if (qlen == NULL) {
1773                 RTE_LOG(ERR, SCHED,
1774                         "%s: Incorrect value for parameter qlen\n", __func__);
1775                 return -EINVAL;
1776         }
1777         subport_qmask = port->n_pipes_per_subport_log2 + 4;
1778         subport_id = (queue_id >> subport_qmask) & (port->n_subports_per_port - 1);
1779
1780         s = port->subports[subport_id];
1781         subport_qindex = ((1 << subport_qmask) - 1) & queue_id;
1782         q = s->queue + subport_qindex;
1783         qe = s->queue_extra + subport_qindex;
1784
1785         /* Copy queue stats and clear */
1786         memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats));
1787         memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
1788
1789         /* Queue length */
1790         *qlen = q->qw - q->qr;
1791
1792         return 0;
1793 }
1794
1795 #ifdef RTE_SCHED_DEBUG
1796
1797 static inline int
1798 rte_sched_port_queue_is_empty(struct rte_sched_subport *subport,
1799         uint32_t qindex)
1800 {
1801         struct rte_sched_queue *queue = subport->queue + qindex;
1802
1803         return queue->qr == queue->qw;
1804 }
1805
1806 #endif /* RTE_SCHED_DEBUG */
1807
1808 static inline void
1809 rte_sched_port_update_subport_stats(struct rte_sched_port *port,
1810         struct rte_sched_subport *subport,
1811         uint32_t qindex,
1812         struct rte_mbuf *pkt)
1813 {
1814         uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1815         uint32_t pkt_len = pkt->pkt_len;
1816
1817         subport->stats.n_pkts_tc[tc_index] += 1;
1818         subport->stats.n_bytes_tc[tc_index] += pkt_len;
1819 }
1820
1821 static inline void
1822 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
1823         struct rte_sched_subport *subport,
1824         uint32_t qindex,
1825         struct rte_mbuf *pkt,
1826         __rte_unused uint32_t n_pkts_cman_dropped)
1827 {
1828         uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1829         uint32_t pkt_len = pkt->pkt_len;
1830
1831         subport->stats.n_pkts_tc_dropped[tc_index] += 1;
1832         subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
1833         subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
1834 }
1835
1836 static inline void
1837 rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
1838         uint32_t qindex,
1839         struct rte_mbuf *pkt)
1840 {
1841         struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1842         uint32_t pkt_len = pkt->pkt_len;
1843
1844         qe->stats.n_pkts += 1;
1845         qe->stats.n_bytes += pkt_len;
1846 }
1847
1848 static inline void
1849 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
1850         uint32_t qindex,
1851         struct rte_mbuf *pkt,
1852         __rte_unused uint32_t n_pkts_cman_dropped)
1853 {
1854         struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1855         uint32_t pkt_len = pkt->pkt_len;
1856
1857         qe->stats.n_pkts_dropped += 1;
1858         qe->stats.n_bytes_dropped += pkt_len;
1859 #ifdef RTE_SCHED_CMAN
1860         if (subport->cman_enabled)
1861                 qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
1862 #endif
1863 }
1864
1865 #ifdef RTE_SCHED_CMAN
1866
1867 static inline int
1868 rte_sched_port_cman_drop(struct rte_sched_port *port,
1869         struct rte_sched_subport *subport,
1870         struct rte_mbuf *pkt,
1871         uint32_t qindex,
1872         uint16_t qlen)
1873 {
1874         if (!subport->cman_enabled)
1875                 return 0;
1876
1877         struct rte_sched_queue_extra *qe;
1878         uint32_t tc_index;
1879
1880         tc_index = rte_sched_port_pipe_tc(port, qindex);
1881         qe = subport->queue_extra + qindex;
1882
1883         /* RED */
1884         if (subport->cman == RTE_SCHED_CMAN_RED) {
1885                 struct rte_red_config *red_cfg;
1886                 struct rte_red *red;
1887                 enum rte_color color;
1888
1889                 color = rte_sched_port_pkt_read_color(pkt);
1890                 red_cfg = &subport->red_config[tc_index][color];
1891
1892                 if ((red_cfg->min_th | red_cfg->max_th) == 0)
1893                         return 0;
1894
1895                 red = &qe->red;
1896
1897                 return rte_red_enqueue(red_cfg, red, qlen, port->time);
1898         }
1899
1900         /* PIE */
1901         struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
1902         struct rte_pie *pie = &qe->pie;
1903
1904         return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
1905 }
1906
1907 static inline void
1908 rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
1909         struct rte_sched_subport *subport, uint32_t qindex)
1910 {
1911         if (subport->cman_enabled) {
1912                 struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1913                 if (subport->cman == RTE_SCHED_CMAN_RED) {
1914                         struct rte_red *red = &qe->red;
1915
1916                         rte_red_mark_queue_empty(red, port->time);
1917                 }
1918         }
1919 }
1920
1921 static inline void
1922 rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
1923 uint32_t qindex, uint32_t pkt_len, uint64_t time) {
1924         if (subport->cman_enabled && subport->cman == RTE_SCHED_CMAN_PIE) {
1925                 struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1926                 struct rte_pie *pie = &qe->pie;
1927
1928                 /* Update queue length */
1929                 pie->qlen -= 1;
1930                 pie->qlen_bytes -= pkt_len;
1931
1932                 rte_pie_dequeue(pie, pkt_len, time);
1933         }
1934 }
1935
1936 #else
1937
1938 static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
1939         struct rte_sched_subport *subport __rte_unused,
1940         struct rte_mbuf *pkt __rte_unused,
1941         uint32_t qindex __rte_unused,
1942         uint16_t qlen __rte_unused)
1943 {
1944         return 0;
1945 }
1946
1947 #define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
1948
1949 static inline void
1950 rte_sched_port_pie_dequeue(struct rte_sched_subport *subport __rte_unused,
1951         uint32_t qindex __rte_unused,
1952         uint32_t pkt_len __rte_unused,
1953         uint64_t time __rte_unused) {
1954         /* do-nothing when RTE_SCHED_CMAN not defined */
1955 }
1956
1957 #endif /* RTE_SCHED_CMAN */
1958
1959 #ifdef RTE_SCHED_DEBUG
1960
1961 static inline void
1962 debug_check_queue_slab(struct rte_sched_subport *subport, uint32_t bmp_pos,
1963                        uint64_t bmp_slab)
1964 {
1965         uint64_t mask;
1966         uint32_t i, panic;
1967
1968         if (bmp_slab == 0)
1969                 rte_panic("Empty slab at position %u\n", bmp_pos);
1970
1971         panic = 0;
1972         for (i = 0, mask = 1; i < 64; i++, mask <<= 1) {
1973                 if (mask & bmp_slab) {
1974                         if (rte_sched_port_queue_is_empty(subport, bmp_pos + i)) {
1975                                 printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i);
1976                                 panic = 1;
1977                         }
1978                 }
1979         }
1980
1981         if (panic)
1982                 rte_panic("Empty queues in slab 0x%" PRIx64 "starting at position %u\n",
1983                         bmp_slab, bmp_pos);
1984 }
1985
1986 #endif /* RTE_SCHED_DEBUG */
1987
1988 static inline struct rte_sched_subport *
1989 rte_sched_port_subport(struct rte_sched_port *port,
1990         struct rte_mbuf *pkt)
1991 {
1992         uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1993         uint32_t subport_id = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1994
1995         return port->subports[subport_id];
1996 }
1997
1998 static inline uint32_t
1999 rte_sched_port_enqueue_qptrs_prefetch0(struct rte_sched_subport *subport,
2000         struct rte_mbuf *pkt, uint32_t subport_qmask)
2001 {
2002         struct rte_sched_queue *q;
2003         struct rte_sched_queue_extra *qe;
2004         uint32_t qindex = rte_mbuf_sched_queue_get(pkt);
2005         uint32_t subport_queue_id = subport_qmask & qindex;
2006
2007         q = subport->queue + subport_queue_id;
2008         rte_prefetch0(q);
2009         qe = subport->queue_extra + subport_queue_id;
2010         rte_prefetch0(qe);
2011
2012         return subport_queue_id;
2013 }
2014
2015 static inline void
2016 rte_sched_port_enqueue_qwa_prefetch0(struct rte_sched_port *port,
2017         struct rte_sched_subport *subport,
2018         uint32_t qindex,
2019         struct rte_mbuf **qbase)
2020 {
2021         struct rte_sched_queue *q;
2022         struct rte_mbuf **q_qw;
2023         uint16_t qsize;
2024
2025         q = subport->queue + qindex;
2026         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
2027         q_qw = qbase + (q->qw & (qsize - 1));
2028
2029         rte_prefetch0(q_qw);
2030         rte_bitmap_prefetch0(subport->bmp, qindex);
2031 }
2032
2033 static inline int
2034 rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
2035         struct rte_sched_subport *subport,
2036         uint32_t qindex,
2037         struct rte_mbuf **qbase,
2038         struct rte_mbuf *pkt)
2039 {
2040         struct rte_sched_queue *q;
2041         uint16_t qsize;
2042         uint16_t qlen;
2043
2044         q = subport->queue + qindex;
2045         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
2046         qlen = q->qw - q->qr;
2047
2048         /* Drop the packet (and update drop stats) when queue is full */
2049         if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
2050                      (qlen >= qsize))) {
2051                 rte_pktmbuf_free(pkt);
2052                 rte_sched_port_update_subport_stats_on_drop(port, subport,
2053                         qindex, pkt, qlen < qsize);
2054                 rte_sched_port_update_queue_stats_on_drop(subport, qindex, pkt,
2055                         qlen < qsize);
2056                 return 0;
2057         }
2058
2059         /* Enqueue packet */
2060         qbase[q->qw & (qsize - 1)] = pkt;
2061         q->qw++;
2062
2063         /* Activate queue in the subport bitmap */
2064         rte_bitmap_set(subport->bmp, qindex);
2065
2066         /* Statistics */
2067         rte_sched_port_update_subport_stats(port, subport, qindex, pkt);
2068         rte_sched_port_update_queue_stats(subport, qindex, pkt);
2069
2070         return 1;
2071 }
2072
2073
2074 /*
2075  * The enqueue function implements a 4-level pipeline with each stage
2076  * processing two different packets. The purpose of using a pipeline
2077  * is to hide the latency of prefetching the data structures. The
2078  * naming convention is presented in the diagram below:
2079  *
2080  *   p00  _______   p10  _______   p20  _______   p30  _______
2081  * ----->|       |----->|       |----->|       |----->|       |----->
2082  *       |   0   |      |   1   |      |   2   |      |   3   |
2083  * ----->|_______|----->|_______|----->|_______|----->|_______|----->
2084  *   p01            p11            p21            p31
2085  *
2086  */
2087 int
2088 rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts,
2089                        uint32_t n_pkts)
2090 {
2091         struct rte_mbuf *pkt00, *pkt01, *pkt10, *pkt11, *pkt20, *pkt21,
2092                 *pkt30, *pkt31, *pkt_last;
2093         struct rte_mbuf **q00_base, **q01_base, **q10_base, **q11_base,
2094                 **q20_base, **q21_base, **q30_base, **q31_base, **q_last_base;
2095         struct rte_sched_subport *subport00, *subport01, *subport10, *subport11,
2096                 *subport20, *subport21, *subport30, *subport31, *subport_last;
2097         uint32_t q00, q01, q10, q11, q20, q21, q30, q31, q_last;
2098         uint32_t r00, r01, r10, r11, r20, r21, r30, r31, r_last;
2099         uint32_t subport_qmask;
2100         uint32_t result, i;
2101
2102         result = 0;
2103         subport_qmask = (1 << (port->n_pipes_per_subport_log2 + 4)) - 1;
2104
2105         /*
2106          * Less then 6 input packets available, which is not enough to
2107          * feed the pipeline
2108          */
2109         if (unlikely(n_pkts < 6)) {
2110                 struct rte_sched_subport *subports[5];
2111                 struct rte_mbuf **q_base[5];
2112                 uint32_t q[5];
2113
2114                 /* Prefetch the mbuf structure of each packet */
2115                 for (i = 0; i < n_pkts; i++)
2116                         rte_prefetch0(pkts[i]);
2117
2118                 /* Prefetch the subport structure for each packet */
2119                 for (i = 0; i < n_pkts; i++)
2120                         subports[i] = rte_sched_port_subport(port, pkts[i]);
2121
2122                 /* Prefetch the queue structure for each queue */
2123                 for (i = 0; i < n_pkts; i++)
2124                         q[i] = rte_sched_port_enqueue_qptrs_prefetch0(subports[i],
2125                                         pkts[i], subport_qmask);
2126
2127                 /* Prefetch the write pointer location of each queue */
2128                 for (i = 0; i < n_pkts; i++) {
2129                         q_base[i] = rte_sched_subport_pipe_qbase(subports[i], q[i]);
2130                         rte_sched_port_enqueue_qwa_prefetch0(port, subports[i],
2131                                 q[i], q_base[i]);
2132                 }
2133
2134                 /* Write each packet to its queue */
2135                 for (i = 0; i < n_pkts; i++)
2136                         result += rte_sched_port_enqueue_qwa(port, subports[i],
2137                                                 q[i], q_base[i], pkts[i]);
2138
2139                 return result;
2140         }
2141
2142         /* Feed the first 3 stages of the pipeline (6 packets needed) */
2143         pkt20 = pkts[0];
2144         pkt21 = pkts[1];
2145         rte_prefetch0(pkt20);
2146         rte_prefetch0(pkt21);
2147
2148         pkt10 = pkts[2];
2149         pkt11 = pkts[3];
2150         rte_prefetch0(pkt10);
2151         rte_prefetch0(pkt11);
2152
2153         subport20 = rte_sched_port_subport(port, pkt20);
2154         subport21 = rte_sched_port_subport(port, pkt21);
2155         q20 = rte_sched_port_enqueue_qptrs_prefetch0(subport20,
2156                         pkt20, subport_qmask);
2157         q21 = rte_sched_port_enqueue_qptrs_prefetch0(subport21,
2158                         pkt21, subport_qmask);
2159
2160         pkt00 = pkts[4];
2161         pkt01 = pkts[5];
2162         rte_prefetch0(pkt00);
2163         rte_prefetch0(pkt01);
2164
2165         subport10 = rte_sched_port_subport(port, pkt10);
2166         subport11 = rte_sched_port_subport(port, pkt11);
2167         q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
2168                         pkt10, subport_qmask);
2169         q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
2170                         pkt11, subport_qmask);
2171
2172         q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
2173         q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
2174         rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
2175         rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
2176
2177         /* Run the pipeline */
2178         for (i = 6; i < (n_pkts & (~1)); i += 2) {
2179                 /* Propagate stage inputs */
2180                 pkt30 = pkt20;
2181                 pkt31 = pkt21;
2182                 pkt20 = pkt10;
2183                 pkt21 = pkt11;
2184                 pkt10 = pkt00;
2185                 pkt11 = pkt01;
2186                 q30 = q20;
2187                 q31 = q21;
2188                 q20 = q10;
2189                 q21 = q11;
2190                 subport30 = subport20;
2191                 subport31 = subport21;
2192                 subport20 = subport10;
2193                 subport21 = subport11;
2194                 q30_base = q20_base;
2195                 q31_base = q21_base;
2196
2197                 /* Stage 0: Get packets in */
2198                 pkt00 = pkts[i];
2199                 pkt01 = pkts[i + 1];
2200                 rte_prefetch0(pkt00);
2201                 rte_prefetch0(pkt01);
2202
2203                 /* Stage 1: Prefetch subport and queue structure storing queue pointers */
2204                 subport10 = rte_sched_port_subport(port, pkt10);
2205                 subport11 = rte_sched_port_subport(port, pkt11);
2206                 q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
2207                                 pkt10, subport_qmask);
2208                 q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
2209                                 pkt11, subport_qmask);
2210
2211                 /* Stage 2: Prefetch queue write location */
2212                 q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
2213                 q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
2214                 rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
2215                 rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
2216
2217                 /* Stage 3: Write packet to queue and activate queue */
2218                 r30 = rte_sched_port_enqueue_qwa(port, subport30,
2219                                 q30, q30_base, pkt30);
2220                 r31 = rte_sched_port_enqueue_qwa(port, subport31,
2221                                 q31, q31_base, pkt31);
2222                 result += r30 + r31;
2223         }
2224
2225         /*
2226          * Drain the pipeline (exactly 6 packets).
2227          * Handle the last packet in the case
2228          * of an odd number of input packets.
2229          */
2230         pkt_last = pkts[n_pkts - 1];
2231         rte_prefetch0(pkt_last);
2232
2233         subport00 = rte_sched_port_subport(port, pkt00);
2234         subport01 = rte_sched_port_subport(port, pkt01);
2235         q00 = rte_sched_port_enqueue_qptrs_prefetch0(subport00,
2236                         pkt00, subport_qmask);
2237         q01 = rte_sched_port_enqueue_qptrs_prefetch0(subport01,
2238                         pkt01, subport_qmask);
2239
2240         q10_base = rte_sched_subport_pipe_qbase(subport10, q10);
2241         q11_base = rte_sched_subport_pipe_qbase(subport11, q11);
2242         rte_sched_port_enqueue_qwa_prefetch0(port, subport10, q10, q10_base);
2243         rte_sched_port_enqueue_qwa_prefetch0(port, subport11, q11, q11_base);
2244
2245         r20 = rte_sched_port_enqueue_qwa(port, subport20,
2246                         q20, q20_base, pkt20);
2247         r21 = rte_sched_port_enqueue_qwa(port, subport21,
2248                         q21, q21_base, pkt21);
2249         result += r20 + r21;
2250
2251         subport_last = rte_sched_port_subport(port, pkt_last);
2252         q_last = rte_sched_port_enqueue_qptrs_prefetch0(subport_last,
2253                                 pkt_last, subport_qmask);
2254
2255         q00_base = rte_sched_subport_pipe_qbase(subport00, q00);
2256         q01_base = rte_sched_subport_pipe_qbase(subport01, q01);
2257         rte_sched_port_enqueue_qwa_prefetch0(port, subport00, q00, q00_base);
2258         rte_sched_port_enqueue_qwa_prefetch0(port, subport01, q01, q01_base);
2259
2260         r10 = rte_sched_port_enqueue_qwa(port, subport10, q10,
2261                         q10_base, pkt10);
2262         r11 = rte_sched_port_enqueue_qwa(port, subport11, q11,
2263                         q11_base, pkt11);
2264         result += r10 + r11;
2265
2266         q_last_base = rte_sched_subport_pipe_qbase(subport_last, q_last);
2267         rte_sched_port_enqueue_qwa_prefetch0(port, subport_last,
2268                 q_last, q_last_base);
2269
2270         r00 = rte_sched_port_enqueue_qwa(port, subport00, q00,
2271                         q00_base, pkt00);
2272         r01 = rte_sched_port_enqueue_qwa(port, subport01, q01,
2273                         q01_base, pkt01);
2274         result += r00 + r01;
2275
2276         if (n_pkts & 1) {
2277                 r_last = rte_sched_port_enqueue_qwa(port, subport_last,
2278                                         q_last, q_last_base, pkt_last);
2279                 result += r_last;
2280         }
2281
2282         return result;
2283 }
2284
2285 static inline uint64_t
2286 grinder_tc_ov_credits_update(struct rte_sched_port *port,
2287         struct rte_sched_subport *subport, uint32_t pos)
2288 {
2289         struct rte_sched_grinder *grinder = subport->grinder + pos;
2290         struct rte_sched_subport_profile *sp = grinder->subport_params;
2291         uint64_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2292         uint64_t tc_consumption = 0, tc_ov_consumption_max;
2293         uint64_t tc_ov_wm = subport->tc_ov_wm;
2294         uint32_t i;
2295
2296         if (subport->tc_ov == 0)
2297                 return subport->tc_ov_wm_max;
2298
2299         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2300                 tc_ov_consumption[i] = sp->tc_credits_per_period[i]
2301                                         -  subport->tc_credits[i];
2302                 tc_consumption += tc_ov_consumption[i];
2303         }
2304
2305         tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] =
2306         sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2307                 subport->tc_credits[RTE_SCHED_TRAFFIC_CLASS_BE];
2308
2309         tc_ov_consumption_max =
2310         sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2311                         tc_consumption;
2312
2313         if (tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] >
2314                 (tc_ov_consumption_max - port->mtu)) {
2315                 tc_ov_wm  -= tc_ov_wm >> 7;
2316                 if (tc_ov_wm < subport->tc_ov_wm_min)
2317                         tc_ov_wm = subport->tc_ov_wm_min;
2318
2319                 return tc_ov_wm;
2320         }
2321
2322         tc_ov_wm += (tc_ov_wm >> 7) + 1;
2323         if (tc_ov_wm > subport->tc_ov_wm_max)
2324                 tc_ov_wm = subport->tc_ov_wm_max;
2325
2326         return tc_ov_wm;
2327 }
2328
2329 static inline void
2330 grinder_credits_update(struct rte_sched_port *port,
2331         struct rte_sched_subport *subport, uint32_t pos)
2332 {
2333         struct rte_sched_grinder *grinder = subport->grinder + pos;
2334         struct rte_sched_pipe *pipe = grinder->pipe;
2335         struct rte_sched_pipe_profile *params = grinder->pipe_params;
2336         struct rte_sched_subport_profile *sp = grinder->subport_params;
2337         uint64_t n_periods;
2338         uint32_t i;
2339
2340         /* Subport TB */
2341         n_periods = (port->time - subport->tb_time) / sp->tb_period;
2342         subport->tb_credits += n_periods * sp->tb_credits_per_period;
2343         subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size);
2344         subport->tb_time += n_periods * sp->tb_period;
2345
2346         /* Pipe TB */
2347         n_periods = (port->time - pipe->tb_time) / params->tb_period;
2348         pipe->tb_credits += n_periods * params->tb_credits_per_period;
2349         pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
2350         pipe->tb_time += n_periods * params->tb_period;
2351
2352         /* Subport TCs */
2353         if (unlikely(port->time >= subport->tc_time)) {
2354                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2355                         subport->tc_credits[i] = sp->tc_credits_per_period[i];
2356
2357                 subport->tc_time = port->time + sp->tc_period;
2358         }
2359
2360         /* Pipe TCs */
2361         if (unlikely(port->time >= pipe->tc_time)) {
2362                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2363                         pipe->tc_credits[i] = params->tc_credits_per_period[i];
2364                 pipe->tc_time = port->time + params->tc_period;
2365         }
2366 }
2367
2368 static inline void
2369 grinder_credits_update_with_tc_ov(struct rte_sched_port *port,
2370         struct rte_sched_subport *subport, uint32_t pos)
2371 {
2372         struct rte_sched_grinder *grinder = subport->grinder + pos;
2373         struct rte_sched_pipe *pipe = grinder->pipe;
2374         struct rte_sched_pipe_profile *params = grinder->pipe_params;
2375         struct rte_sched_subport_profile *sp = grinder->subport_params;
2376         uint64_t n_periods;
2377         uint32_t i;
2378
2379         /* Subport TB */
2380         n_periods = (port->time - subport->tb_time) / sp->tb_period;
2381         subport->tb_credits += n_periods * sp->tb_credits_per_period;
2382         subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size);
2383         subport->tb_time += n_periods * sp->tb_period;
2384
2385         /* Pipe TB */
2386         n_periods = (port->time - pipe->tb_time) / params->tb_period;
2387         pipe->tb_credits += n_periods * params->tb_credits_per_period;
2388         pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
2389         pipe->tb_time += n_periods * params->tb_period;
2390
2391         /* Subport TCs */
2392         if (unlikely(port->time >= subport->tc_time)) {
2393                 subport->tc_ov_wm =
2394                         grinder_tc_ov_credits_update(port, subport, pos);
2395
2396                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2397                         subport->tc_credits[i] = sp->tc_credits_per_period[i];
2398
2399                 subport->tc_time = port->time + sp->tc_period;
2400                 subport->tc_ov_period_id++;
2401         }
2402
2403         /* Pipe TCs */
2404         if (unlikely(port->time >= pipe->tc_time)) {
2405                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2406                         pipe->tc_credits[i] = params->tc_credits_per_period[i];
2407                 pipe->tc_time = port->time + params->tc_period;
2408         }
2409
2410         /* Pipe TCs - Oversubscription */
2411         if (unlikely(pipe->tc_ov_period_id != subport->tc_ov_period_id)) {
2412                 pipe->tc_ov_credits = subport->tc_ov_wm * params->tc_ov_weight;
2413
2414                 pipe->tc_ov_period_id = subport->tc_ov_period_id;
2415         }
2416 }
2417
2418 static inline int
2419 grinder_credits_check(struct rte_sched_port *port,
2420         struct rte_sched_subport *subport, uint32_t pos)
2421 {
2422         struct rte_sched_grinder *grinder = subport->grinder + pos;
2423         struct rte_sched_pipe *pipe = grinder->pipe;
2424         struct rte_mbuf *pkt = grinder->pkt;
2425         uint32_t tc_index = grinder->tc_index;
2426         uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
2427         uint64_t subport_tb_credits = subport->tb_credits;
2428         uint64_t subport_tc_credits = subport->tc_credits[tc_index];
2429         uint64_t pipe_tb_credits = pipe->tb_credits;
2430         uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
2431         int enough_credits;
2432
2433         /* Check pipe and subport credits */
2434         enough_credits = (pkt_len <= subport_tb_credits) &&
2435                 (pkt_len <= subport_tc_credits) &&
2436                 (pkt_len <= pipe_tb_credits) &&
2437                 (pkt_len <= pipe_tc_credits);
2438
2439         if (!enough_credits)
2440                 return 0;
2441
2442         /* Update pipe and subport credits */
2443         subport->tb_credits -= pkt_len;
2444         subport->tc_credits[tc_index] -= pkt_len;
2445         pipe->tb_credits -= pkt_len;
2446         pipe->tc_credits[tc_index] -= pkt_len;
2447
2448         return 1;
2449 }
2450
2451 static inline int
2452 grinder_credits_check_with_tc_ov(struct rte_sched_port *port,
2453         struct rte_sched_subport *subport, uint32_t pos)
2454 {
2455         struct rte_sched_grinder *grinder = subport->grinder + pos;
2456         struct rte_sched_pipe *pipe = grinder->pipe;
2457         struct rte_mbuf *pkt = grinder->pkt;
2458         uint32_t tc_index = grinder->tc_index;
2459         uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
2460         uint64_t subport_tb_credits = subport->tb_credits;
2461         uint64_t subport_tc_credits = subport->tc_credits[tc_index];
2462         uint64_t pipe_tb_credits = pipe->tb_credits;
2463         uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
2464         uint64_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2465         uint64_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0};
2466         uint64_t pipe_tc_ov_credits;
2467         uint32_t i;
2468         int enough_credits;
2469
2470         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2471                 pipe_tc_ov_mask1[i] = ~0LLU;
2472
2473         pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe->tc_ov_credits;
2474         pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = ~0LLU;
2475         pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
2476
2477         /* Check pipe and subport credits */
2478         enough_credits = (pkt_len <= subport_tb_credits) &&
2479                 (pkt_len <= subport_tc_credits) &&
2480                 (pkt_len <= pipe_tb_credits) &&
2481                 (pkt_len <= pipe_tc_credits) &&
2482                 (pkt_len <= pipe_tc_ov_credits);
2483
2484         if (!enough_credits)
2485                 return 0;
2486
2487         /* Update pipe and subport credits */
2488         subport->tb_credits -= pkt_len;
2489         subport->tc_credits[tc_index] -= pkt_len;
2490         pipe->tb_credits -= pkt_len;
2491         pipe->tc_credits[tc_index] -= pkt_len;
2492         pipe->tc_ov_credits -= pipe_tc_ov_mask2[tc_index] & pkt_len;
2493
2494         return 1;
2495 }
2496
2497
2498 static inline int
2499 grinder_schedule(struct rte_sched_port *port,
2500         struct rte_sched_subport *subport, uint32_t pos)
2501 {
2502         struct rte_sched_grinder *grinder = subport->grinder + pos;
2503         struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
2504         uint32_t qindex = grinder->qindex[grinder->qpos];
2505         struct rte_mbuf *pkt = grinder->pkt;
2506         uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
2507         uint32_t be_tc_active;
2508
2509         if (subport->tc_ov_enabled) {
2510                 if (!grinder_credits_check_with_tc_ov(port, subport, pos))
2511                         return 0;
2512         } else {
2513                 if (!grinder_credits_check(port, subport, pos))
2514                         return 0;
2515         }
2516
2517         /* Advance port time */
2518         port->time += pkt_len;
2519
2520         /* Send packet */
2521         port->pkts_out[port->n_pkts_out++] = pkt;
2522         queue->qr++;
2523
2524         be_tc_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE) ? ~0x0 : 0x0;
2525         grinder->wrr_tokens[grinder->qpos] +=
2526                 (pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
2527
2528         if (queue->qr == queue->qw) {
2529                 rte_bitmap_clear(subport->bmp, qindex);
2530                 grinder->qmask &= ~(1 << grinder->qpos);
2531                 if (be_tc_active)
2532                         grinder->wrr_mask[grinder->qpos] = 0;
2533
2534                 rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
2535         }
2536
2537         rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
2538
2539         /* Reset pipe loop detection */
2540         subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
2541         grinder->productive = 1;
2542
2543         return 1;
2544 }
2545
2546 static inline int
2547 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2548 {
2549         uint32_t i;
2550
2551         for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) {
2552                 if (subport->grinder_base_bmp_pos[i] == base_pipe)
2553                         return 1;
2554         }
2555
2556         return 0;
2557 }
2558
2559 static inline void
2560 grinder_pcache_populate(struct rte_sched_subport *subport,
2561         uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab)
2562 {
2563         struct rte_sched_grinder *grinder = subport->grinder + pos;
2564         uint16_t w[4];
2565
2566         grinder->pcache_w = 0;
2567         grinder->pcache_r = 0;
2568
2569         w[0] = (uint16_t) bmp_slab;
2570         w[1] = (uint16_t) (bmp_slab >> 16);
2571         w[2] = (uint16_t) (bmp_slab >> 32);
2572         w[3] = (uint16_t) (bmp_slab >> 48);
2573
2574         grinder->pcache_qmask[grinder->pcache_w] = w[0];
2575         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos;
2576         grinder->pcache_w += (w[0] != 0);
2577
2578         grinder->pcache_qmask[grinder->pcache_w] = w[1];
2579         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 16;
2580         grinder->pcache_w += (w[1] != 0);
2581
2582         grinder->pcache_qmask[grinder->pcache_w] = w[2];
2583         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 32;
2584         grinder->pcache_w += (w[2] != 0);
2585
2586         grinder->pcache_qmask[grinder->pcache_w] = w[3];
2587         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 48;
2588         grinder->pcache_w += (w[3] != 0);
2589 }
2590
2591 static inline void
2592 grinder_tccache_populate(struct rte_sched_subport *subport,
2593         uint32_t pos, uint32_t qindex, uint16_t qmask)
2594 {
2595         struct rte_sched_grinder *grinder = subport->grinder + pos;
2596         uint8_t b, i;
2597
2598         grinder->tccache_w = 0;
2599         grinder->tccache_r = 0;
2600
2601         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2602                 b = (uint8_t) ((qmask >> i) & 0x1);
2603                 grinder->tccache_qmask[grinder->tccache_w] = b;
2604                 grinder->tccache_qindex[grinder->tccache_w] = qindex + i;
2605                 grinder->tccache_w += (b != 0);
2606         }
2607
2608         b = (uint8_t) (qmask >> (RTE_SCHED_TRAFFIC_CLASS_BE));
2609         grinder->tccache_qmask[grinder->tccache_w] = b;
2610         grinder->tccache_qindex[grinder->tccache_w] = qindex +
2611                 RTE_SCHED_TRAFFIC_CLASS_BE;
2612         grinder->tccache_w += (b != 0);
2613 }
2614
2615 static inline int
2616 grinder_next_tc(struct rte_sched_port *port,
2617         struct rte_sched_subport *subport, uint32_t pos)
2618 {
2619         struct rte_sched_grinder *grinder = subport->grinder + pos;
2620         struct rte_mbuf **qbase;
2621         uint32_t qindex;
2622         uint16_t qsize;
2623
2624         if (grinder->tccache_r == grinder->tccache_w)
2625                 return 0;
2626
2627         qindex = grinder->tccache_qindex[grinder->tccache_r];
2628         qbase = rte_sched_subport_pipe_qbase(subport, qindex);
2629         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
2630
2631         grinder->tc_index = rte_sched_port_pipe_tc(port, qindex);
2632         grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
2633         grinder->qsize = qsize;
2634
2635         if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2636                 grinder->queue[0] = subport->queue + qindex;
2637                 grinder->qbase[0] = qbase;
2638                 grinder->qindex[0] = qindex;
2639                 grinder->tccache_r++;
2640
2641                 return 1;
2642         }
2643
2644         grinder->queue[0] = subport->queue + qindex;
2645         grinder->queue[1] = subport->queue + qindex + 1;
2646         grinder->queue[2] = subport->queue + qindex + 2;
2647         grinder->queue[3] = subport->queue + qindex + 3;
2648
2649         grinder->qbase[0] = qbase;
2650         grinder->qbase[1] = qbase + qsize;
2651         grinder->qbase[2] = qbase + 2 * qsize;
2652         grinder->qbase[3] = qbase + 3 * qsize;
2653
2654         grinder->qindex[0] = qindex;
2655         grinder->qindex[1] = qindex + 1;
2656         grinder->qindex[2] = qindex + 2;
2657         grinder->qindex[3] = qindex + 3;
2658
2659         grinder->tccache_r++;
2660         return 1;
2661 }
2662
2663 static inline int
2664 grinder_next_pipe(struct rte_sched_port *port,
2665         struct rte_sched_subport *subport, uint32_t pos)
2666 {
2667         struct rte_sched_grinder *grinder = subport->grinder + pos;
2668         uint32_t pipe_qindex;
2669         uint16_t pipe_qmask;
2670
2671         if (grinder->pcache_r < grinder->pcache_w) {
2672                 pipe_qmask = grinder->pcache_qmask[grinder->pcache_r];
2673                 pipe_qindex = grinder->pcache_qindex[grinder->pcache_r];
2674                 grinder->pcache_r++;
2675         } else {
2676                 uint64_t bmp_slab = 0;
2677                 uint32_t bmp_pos = 0;
2678
2679                 /* Get another non-empty pipe group */
2680                 if (unlikely(rte_bitmap_scan(subport->bmp, &bmp_pos, &bmp_slab) <= 0))
2681                         return 0;
2682
2683 #ifdef RTE_SCHED_DEBUG
2684                 debug_check_queue_slab(subport, bmp_pos, bmp_slab);
2685 #endif
2686
2687                 /* Return if pipe group already in one of the other grinders */
2688                 subport->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID;
2689                 if (unlikely(grinder_pipe_exists(subport, bmp_pos)))
2690                         return 0;
2691
2692                 subport->grinder_base_bmp_pos[pos] = bmp_pos;
2693
2694                 /* Install new pipe group into grinder's pipe cache */
2695                 grinder_pcache_populate(subport, pos, bmp_pos, bmp_slab);
2696
2697                 pipe_qmask = grinder->pcache_qmask[0];
2698                 pipe_qindex = grinder->pcache_qindex[0];
2699                 grinder->pcache_r = 1;
2700         }
2701
2702         /* Install new pipe in the grinder */
2703         grinder->pindex = pipe_qindex >> 4;
2704         grinder->subport = subport;
2705         grinder->pipe = subport->pipe + grinder->pindex;
2706         grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */
2707         grinder->productive = 0;
2708
2709         grinder_tccache_populate(subport, pos, pipe_qindex, pipe_qmask);
2710         grinder_next_tc(port, subport, pos);
2711
2712         /* Check for pipe exhaustion */
2713         if (grinder->pindex == subport->pipe_loop) {
2714                 subport->pipe_exhaustion = 1;
2715                 subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
2716         }
2717
2718         return 1;
2719 }
2720
2721
2722 static inline void
2723 grinder_wrr_load(struct rte_sched_subport *subport, uint32_t pos)
2724 {
2725         struct rte_sched_grinder *grinder = subport->grinder + pos;
2726         struct rte_sched_pipe *pipe = grinder->pipe;
2727         struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params;
2728         uint32_t qmask = grinder->qmask;
2729
2730         grinder->wrr_tokens[0] =
2731                 ((uint16_t) pipe->wrr_tokens[0]) << RTE_SCHED_WRR_SHIFT;
2732         grinder->wrr_tokens[1] =
2733                 ((uint16_t) pipe->wrr_tokens[1]) << RTE_SCHED_WRR_SHIFT;
2734         grinder->wrr_tokens[2] =
2735                 ((uint16_t) pipe->wrr_tokens[2]) << RTE_SCHED_WRR_SHIFT;
2736         grinder->wrr_tokens[3] =
2737                 ((uint16_t) pipe->wrr_tokens[3]) << RTE_SCHED_WRR_SHIFT;
2738
2739         grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
2740         grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
2741         grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
2742         grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
2743
2744         grinder->wrr_cost[0] = pipe_params->wrr_cost[0];
2745         grinder->wrr_cost[1] = pipe_params->wrr_cost[1];
2746         grinder->wrr_cost[2] = pipe_params->wrr_cost[2];
2747         grinder->wrr_cost[3] = pipe_params->wrr_cost[3];
2748 }
2749
2750 static inline void
2751 grinder_wrr_store(struct rte_sched_subport *subport, uint32_t pos)
2752 {
2753         struct rte_sched_grinder *grinder = subport->grinder + pos;
2754         struct rte_sched_pipe *pipe = grinder->pipe;
2755
2756         pipe->wrr_tokens[0] =
2757                         (grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >>
2758                                 RTE_SCHED_WRR_SHIFT;
2759         pipe->wrr_tokens[1] =
2760                         (grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >>
2761                                 RTE_SCHED_WRR_SHIFT;
2762         pipe->wrr_tokens[2] =
2763                         (grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >>
2764                                 RTE_SCHED_WRR_SHIFT;
2765         pipe->wrr_tokens[3] =
2766                         (grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >>
2767                                 RTE_SCHED_WRR_SHIFT;
2768 }
2769
2770 static inline void
2771 grinder_wrr(struct rte_sched_subport *subport, uint32_t pos)
2772 {
2773         struct rte_sched_grinder *grinder = subport->grinder + pos;
2774         uint16_t wrr_tokens_min;
2775
2776         grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0];
2777         grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1];
2778         grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2];
2779         grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3];
2780
2781         grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens);
2782         wrr_tokens_min = grinder->wrr_tokens[grinder->qpos];
2783
2784         grinder->wrr_tokens[0] -= wrr_tokens_min;
2785         grinder->wrr_tokens[1] -= wrr_tokens_min;
2786         grinder->wrr_tokens[2] -= wrr_tokens_min;
2787         grinder->wrr_tokens[3] -= wrr_tokens_min;
2788 }
2789
2790
2791 #define grinder_evict(subport, pos)
2792
2793 static inline void
2794 grinder_prefetch_pipe(struct rte_sched_subport *subport, uint32_t pos)
2795 {
2796         struct rte_sched_grinder *grinder = subport->grinder + pos;
2797
2798         rte_prefetch0(grinder->pipe);
2799         rte_prefetch0(grinder->queue[0]);
2800 }
2801
2802 static inline void
2803 grinder_prefetch_tc_queue_arrays(struct rte_sched_subport *subport, uint32_t pos)
2804 {
2805         struct rte_sched_grinder *grinder = subport->grinder + pos;
2806         uint16_t qsize, qr[RTE_SCHED_MAX_QUEUES_PER_TC];
2807
2808         qsize = grinder->qsize;
2809         grinder->qpos = 0;
2810
2811         if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2812                 qr[0] = grinder->queue[0]->qr & (qsize - 1);
2813
2814                 rte_prefetch0(grinder->qbase[0] + qr[0]);
2815                 return;
2816         }
2817
2818         qr[0] = grinder->queue[0]->qr & (qsize - 1);
2819         qr[1] = grinder->queue[1]->qr & (qsize - 1);
2820         qr[2] = grinder->queue[2]->qr & (qsize - 1);
2821         qr[3] = grinder->queue[3]->qr & (qsize - 1);
2822
2823         rte_prefetch0(grinder->qbase[0] + qr[0]);
2824         rte_prefetch0(grinder->qbase[1] + qr[1]);
2825
2826         grinder_wrr_load(subport, pos);
2827         grinder_wrr(subport, pos);
2828
2829         rte_prefetch0(grinder->qbase[2] + qr[2]);
2830         rte_prefetch0(grinder->qbase[3] + qr[3]);
2831 }
2832
2833 static inline void
2834 grinder_prefetch_mbuf(struct rte_sched_subport *subport, uint32_t pos)
2835 {
2836         struct rte_sched_grinder *grinder = subport->grinder + pos;
2837         uint32_t qpos = grinder->qpos;
2838         struct rte_mbuf **qbase = grinder->qbase[qpos];
2839         uint16_t qsize = grinder->qsize;
2840         uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1);
2841
2842         grinder->pkt = qbase[qr];
2843         rte_prefetch0(grinder->pkt);
2844
2845         if (unlikely((qr & 0x7) == 7)) {
2846                 uint16_t qr_next = (grinder->queue[qpos]->qr + 1) & (qsize - 1);
2847
2848                 rte_prefetch0(qbase + qr_next);
2849         }
2850 }
2851
2852 static inline uint32_t
2853 grinder_handle(struct rte_sched_port *port,
2854         struct rte_sched_subport *subport, uint32_t pos)
2855 {
2856         struct rte_sched_grinder *grinder = subport->grinder + pos;
2857
2858         switch (grinder->state) {
2859         case e_GRINDER_PREFETCH_PIPE:
2860         {
2861                 if (grinder_next_pipe(port, subport, pos)) {
2862                         grinder_prefetch_pipe(subport, pos);
2863                         subport->busy_grinders++;
2864
2865                         grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2866                         return 0;
2867                 }
2868
2869                 return 0;
2870         }
2871
2872         case e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS:
2873         {
2874                 struct rte_sched_pipe *pipe = grinder->pipe;
2875
2876                 grinder->pipe_params = subport->pipe_profiles + pipe->profile;
2877                 grinder->subport_params = port->subport_profiles +
2878                                                 subport->profile;
2879
2880                 grinder_prefetch_tc_queue_arrays(subport, pos);
2881
2882                 if (subport->tc_ov_enabled)
2883                         grinder_credits_update_with_tc_ov(port, subport, pos);
2884                 else
2885                         grinder_credits_update(port, subport, pos);
2886
2887                 grinder->state = e_GRINDER_PREFETCH_MBUF;
2888                 return 0;
2889         }
2890
2891         case e_GRINDER_PREFETCH_MBUF:
2892         {
2893                 grinder_prefetch_mbuf(subport, pos);
2894
2895                 grinder->state = e_GRINDER_READ_MBUF;
2896                 return 0;
2897         }
2898
2899         case e_GRINDER_READ_MBUF:
2900         {
2901                 uint32_t wrr_active, result = 0;
2902
2903                 result = grinder_schedule(port, subport, pos);
2904
2905                 wrr_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE);
2906
2907                 /* Look for next packet within the same TC */
2908                 if (result && grinder->qmask) {
2909                         if (wrr_active)
2910                                 grinder_wrr(subport, pos);
2911
2912                         grinder_prefetch_mbuf(subport, pos);
2913
2914                         return 1;
2915                 }
2916
2917                 if (wrr_active)
2918                         grinder_wrr_store(subport, pos);
2919
2920                 /* Look for another active TC within same pipe */
2921                 if (grinder_next_tc(port, subport, pos)) {
2922                         grinder_prefetch_tc_queue_arrays(subport, pos);
2923
2924                         grinder->state = e_GRINDER_PREFETCH_MBUF;
2925                         return result;
2926                 }
2927
2928                 if (grinder->productive == 0 &&
2929                     subport->pipe_loop == RTE_SCHED_PIPE_INVALID)
2930                         subport->pipe_loop = grinder->pindex;
2931
2932                 grinder_evict(subport, pos);
2933
2934                 /* Look for another active pipe */
2935                 if (grinder_next_pipe(port, subport, pos)) {
2936                         grinder_prefetch_pipe(subport, pos);
2937
2938                         grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2939                         return result;
2940                 }
2941
2942                 /* No active pipe found */
2943                 subport->busy_grinders--;
2944
2945                 grinder->state = e_GRINDER_PREFETCH_PIPE;
2946                 return result;
2947         }
2948
2949         default:
2950                 rte_panic("Algorithmic error (invalid state)\n");
2951                 return 0;
2952         }
2953 }
2954
2955 static inline void
2956 rte_sched_port_time_resync(struct rte_sched_port *port)
2957 {
2958         uint64_t cycles = rte_get_tsc_cycles();
2959         uint64_t cycles_diff;
2960         uint64_t bytes_diff;
2961         uint32_t i;
2962
2963         if (cycles < port->time_cpu_cycles)
2964                 port->time_cpu_cycles = 0;
2965
2966         cycles_diff = cycles - port->time_cpu_cycles;
2967         /* Compute elapsed time in bytes */
2968         bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT,
2969                                            port->inv_cycles_per_byte);
2970
2971         /* Advance port time */
2972         port->time_cpu_cycles +=
2973                 (bytes_diff * port->cycles_per_byte) >> RTE_SCHED_TIME_SHIFT;
2974         port->time_cpu_bytes += bytes_diff;
2975         if (port->time < port->time_cpu_bytes)
2976                 port->time = port->time_cpu_bytes;
2977
2978         /* Reset pipe loop detection */
2979         for (i = 0; i < port->n_subports_per_port; i++)
2980                 port->subports[i]->pipe_loop = RTE_SCHED_PIPE_INVALID;
2981 }
2982
2983 static inline int
2984 rte_sched_port_exceptions(struct rte_sched_subport *subport, int second_pass)
2985 {
2986         int exceptions;
2987
2988         /* Check if any exception flag is set */
2989         exceptions = (second_pass && subport->busy_grinders == 0) ||
2990                 (subport->pipe_exhaustion == 1);
2991
2992         /* Clear exception flags */
2993         subport->pipe_exhaustion = 0;
2994
2995         return exceptions;
2996 }
2997
2998 int
2999 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
3000 {
3001         struct rte_sched_subport *subport;
3002         uint32_t subport_id = port->subport_id;
3003         uint32_t i, n_subports = 0, count;
3004
3005         port->pkts_out = pkts;
3006         port->n_pkts_out = 0;
3007
3008         rte_sched_port_time_resync(port);
3009
3010         /* Take each queue in the grinder one step further */
3011         for (i = 0, count = 0; ; i++)  {
3012                 subport = port->subports[subport_id];
3013
3014                 count += grinder_handle(port, subport,
3015                                 i & (RTE_SCHED_PORT_N_GRINDERS - 1));
3016
3017                 if (count == n_pkts) {
3018                         subport_id++;
3019
3020                         if (subport_id == port->n_subports_per_port)
3021                                 subport_id = 0;
3022
3023                         port->subport_id = subport_id;
3024                         break;
3025                 }
3026
3027                 if (rte_sched_port_exceptions(subport, i >= RTE_SCHED_PORT_N_GRINDERS)) {
3028                         i = 0;
3029                         subport_id++;
3030                         n_subports++;
3031                 }
3032
3033                 if (subport_id == port->n_subports_per_port)
3034                         subport_id = 0;
3035
3036                 if (n_subports == port->n_subports_per_port) {
3037                         port->subport_id = subport_id;
3038                         break;
3039                 }
3040         }
3041
3042         return count;
3043 }