lib: remove unneeded header includes
[dpdk.git] / lib / sched / rte_sched.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <string.h>
7
8 #include <rte_common.h>
9 #include <rte_log.h>
10 #include <rte_malloc.h>
11 #include <rte_cycles.h>
12 #include <rte_prefetch.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mbuf.h>
15 #include <rte_bitmap.h>
16 #include <rte_reciprocal.h>
17
18 #include "rte_sched.h"
19 #include "rte_sched_common.h"
20 #include "rte_approx.h"
21
22 #ifdef __INTEL_COMPILER
23 #pragma warning(disable:2259) /* conversion may lose significant bits */
24 #endif
25
26 #ifdef RTE_SCHED_VECTOR
27 #include <rte_vect.h>
28
29 #ifdef RTE_ARCH_X86
30 #define SCHED_VECTOR_SSE4
31 #elif defined(__ARM_NEON)
32 #define SCHED_VECTOR_NEON
33 #endif
34
35 #endif
36
37 #define RTE_SCHED_TB_RATE_CONFIG_ERR          (1e-7)
38 #define RTE_SCHED_WRR_SHIFT                   3
39 #define RTE_SCHED_MAX_QUEUES_PER_TC           RTE_SCHED_BE_QUEUES_PER_PIPE
40 #define RTE_SCHED_GRINDER_PCACHE_SIZE         (64 / RTE_SCHED_QUEUES_PER_PIPE)
41 #define RTE_SCHED_PIPE_INVALID                UINT32_MAX
42 #define RTE_SCHED_BMP_POS_INVALID             UINT32_MAX
43
44 /* Scaling for cycles_per_byte calculation
45  * Chosen so that minimum rate is 480 bit/sec
46  */
47 #define RTE_SCHED_TIME_SHIFT                  8
48
49 struct rte_sched_pipe_profile {
50         /* Token bucket (TB) */
51         uint64_t tb_period;
52         uint64_t tb_credits_per_period;
53         uint64_t tb_size;
54
55         /* Pipe traffic classes */
56         uint64_t tc_period;
57         uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
58         uint8_t tc_ov_weight;
59
60         /* Pipe best-effort traffic class queues */
61         uint8_t  wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
62 };
63
64 struct rte_sched_pipe {
65         /* Token bucket (TB) */
66         uint64_t tb_time; /* time of last update */
67         uint64_t tb_credits;
68
69         /* Pipe profile and flags */
70         uint32_t profile;
71
72         /* Traffic classes (TCs) */
73         uint64_t tc_time; /* time of next update */
74         uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
75
76         /* Weighted Round Robin (WRR) */
77         uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
78
79         /* TC oversubscription */
80         uint64_t tc_ov_credits;
81         uint8_t tc_ov_period_id;
82 } __rte_cache_aligned;
83
84 struct rte_sched_queue {
85         uint16_t qw;
86         uint16_t qr;
87 };
88
89 struct rte_sched_queue_extra {
90         struct rte_sched_queue_stats stats;
91 #ifdef RTE_SCHED_CMAN
92         RTE_STD_C11
93         union {
94                 struct rte_red red;
95                 struct rte_pie pie;
96         };
97 #endif
98 };
99
100 enum grinder_state {
101         e_GRINDER_PREFETCH_PIPE = 0,
102         e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS,
103         e_GRINDER_PREFETCH_MBUF,
104         e_GRINDER_READ_MBUF
105 };
106
107 struct rte_sched_subport_profile {
108         /* Token bucket (TB) */
109         uint64_t tb_period;
110         uint64_t tb_credits_per_period;
111         uint64_t tb_size;
112
113         uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
114         uint64_t tc_period;
115 };
116
117 struct rte_sched_grinder {
118         /* Pipe cache */
119         uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE];
120         uint32_t pcache_qindex[RTE_SCHED_GRINDER_PCACHE_SIZE];
121         uint32_t pcache_w;
122         uint32_t pcache_r;
123
124         /* Current pipe */
125         enum grinder_state state;
126         uint32_t productive;
127         uint32_t pindex;
128         struct rte_sched_subport *subport;
129         struct rte_sched_subport_profile *subport_params;
130         struct rte_sched_pipe *pipe;
131         struct rte_sched_pipe_profile *pipe_params;
132
133         /* TC cache */
134         uint8_t tccache_qmask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
135         uint32_t tccache_qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
136         uint32_t tccache_w;
137         uint32_t tccache_r;
138
139         /* Current TC */
140         uint32_t tc_index;
141         struct rte_sched_queue *queue[RTE_SCHED_MAX_QUEUES_PER_TC];
142         struct rte_mbuf **qbase[RTE_SCHED_MAX_QUEUES_PER_TC];
143         uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC];
144         uint16_t qsize;
145         uint32_t qmask;
146         uint32_t qpos;
147         struct rte_mbuf *pkt;
148
149         /* WRR */
150         uint16_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
151         uint16_t wrr_mask[RTE_SCHED_BE_QUEUES_PER_PIPE];
152         uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
153 };
154
155 struct rte_sched_subport {
156         /* Token bucket (TB) */
157         uint64_t tb_time; /* time of last update */
158         uint64_t tb_credits;
159
160         /* Traffic classes (TCs) */
161         uint64_t tc_time; /* time of next update */
162         uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
163
164         /* TC oversubscription */
165         uint64_t tc_ov_wm;
166         uint64_t tc_ov_wm_min;
167         uint64_t tc_ov_wm_max;
168         uint8_t tc_ov_period_id;
169         uint8_t tc_ov;
170         uint32_t tc_ov_n;
171         double tc_ov_rate;
172
173         /* Statistics */
174         struct rte_sched_subport_stats stats __rte_cache_aligned;
175
176         /* subport profile */
177         uint32_t profile;
178         /* Subport pipes */
179         uint32_t n_pipes_per_subport_enabled;
180         uint32_t n_pipe_profiles;
181         uint32_t n_max_pipe_profiles;
182
183         /* Pipe best-effort TC rate */
184         uint64_t pipe_tc_be_rate_max;
185
186         /* Pipe queues size */
187         uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
188
189 #ifdef RTE_SCHED_CMAN
190         bool cman_enabled;
191         enum rte_sched_cman_mode cman;
192
193         RTE_STD_C11
194         union {
195                 struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
196                 struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
197         };
198 #endif
199
200         /* Scheduling loop detection */
201         uint32_t pipe_loop;
202         uint32_t pipe_exhaustion;
203
204         /* Bitmap */
205         struct rte_bitmap *bmp;
206         uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16;
207
208         /* Grinders */
209         struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS];
210         uint32_t busy_grinders;
211
212         /* Queue base calculation */
213         uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE];
214         uint32_t qsize_sum;
215
216         struct rte_sched_pipe *pipe;
217         struct rte_sched_queue *queue;
218         struct rte_sched_queue_extra *queue_extra;
219         struct rte_sched_pipe_profile *pipe_profiles;
220         uint8_t *bmp_array;
221         struct rte_mbuf **queue_array;
222         uint8_t memory[0] __rte_cache_aligned;
223 } __rte_cache_aligned;
224
225 struct rte_sched_port {
226         /* User parameters */
227         uint32_t n_subports_per_port;
228         uint32_t n_pipes_per_subport;
229         uint32_t n_pipes_per_subport_log2;
230         uint16_t pipe_queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
231         uint8_t pipe_tc[RTE_SCHED_QUEUES_PER_PIPE];
232         uint8_t tc_queue[RTE_SCHED_QUEUES_PER_PIPE];
233         uint32_t n_subport_profiles;
234         uint32_t n_max_subport_profiles;
235         uint64_t rate;
236         uint32_t mtu;
237         uint32_t frame_overhead;
238         int socket;
239
240         /* Timing */
241         uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cycles */
242         uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
243         uint64_t time;                /* Current NIC TX time measured in bytes */
244         struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
245         uint64_t cycles_per_byte;
246
247         /* Grinders */
248         struct rte_mbuf **pkts_out;
249         uint32_t n_pkts_out;
250         uint32_t subport_id;
251
252         /* Large data structures */
253         struct rte_sched_subport_profile *subport_profiles;
254         struct rte_sched_subport *subports[0] __rte_cache_aligned;
255 } __rte_cache_aligned;
256
257 enum rte_sched_subport_array {
258         e_RTE_SCHED_SUBPORT_ARRAY_PIPE = 0,
259         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE,
260         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA,
261         e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES,
262         e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY,
263         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY,
264         e_RTE_SCHED_SUBPORT_ARRAY_TOTAL,
265 };
266
267 static inline uint32_t
268 rte_sched_subport_pipe_queues(struct rte_sched_subport *subport)
269 {
270         return RTE_SCHED_QUEUES_PER_PIPE * subport->n_pipes_per_subport_enabled;
271 }
272
273 static inline struct rte_mbuf **
274 rte_sched_subport_pipe_qbase(struct rte_sched_subport *subport, uint32_t qindex)
275 {
276         uint32_t pindex = qindex >> 4;
277         uint32_t qpos = qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1);
278
279         return (subport->queue_array + pindex *
280                 subport->qsize_sum + subport->qsize_add[qpos]);
281 }
282
283 static inline uint16_t
284 rte_sched_subport_pipe_qsize(struct rte_sched_port *port,
285 struct rte_sched_subport *subport, uint32_t qindex)
286 {
287         uint32_t tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
288
289         return subport->qsize[tc];
290 }
291
292 static inline uint32_t
293 rte_sched_port_queues_per_port(struct rte_sched_port *port)
294 {
295         uint32_t n_queues = 0, i;
296
297         for (i = 0; i < port->n_subports_per_port; i++)
298                 n_queues += rte_sched_subport_pipe_queues(port->subports[i]);
299
300         return n_queues;
301 }
302
303 static inline uint16_t
304 rte_sched_port_pipe_queue(struct rte_sched_port *port, uint32_t traffic_class)
305 {
306         uint16_t pipe_queue = port->pipe_queue[traffic_class];
307
308         return pipe_queue;
309 }
310
311 static inline uint8_t
312 rte_sched_port_pipe_tc(struct rte_sched_port *port, uint32_t qindex)
313 {
314         uint8_t pipe_tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
315
316         return pipe_tc;
317 }
318
319 static inline uint8_t
320 rte_sched_port_tc_queue(struct rte_sched_port *port, uint32_t qindex)
321 {
322         uint8_t tc_queue = port->tc_queue[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
323
324         return tc_queue;
325 }
326
327 static int
328 pipe_profile_check(struct rte_sched_pipe_params *params,
329         uint64_t rate, uint16_t *qsize)
330 {
331         uint32_t i;
332
333         /* Pipe parameters */
334         if (params == NULL) {
335                 RTE_LOG(ERR, SCHED,
336                         "%s: Incorrect value for parameter params\n", __func__);
337                 return -EINVAL;
338         }
339
340         /* TB rate: non-zero, not greater than port rate */
341         if (params->tb_rate == 0 ||
342                 params->tb_rate > rate) {
343                 RTE_LOG(ERR, SCHED,
344                         "%s: Incorrect value for tb rate\n", __func__);
345                 return -EINVAL;
346         }
347
348         /* TB size: non-zero */
349         if (params->tb_size == 0) {
350                 RTE_LOG(ERR, SCHED,
351                         "%s: Incorrect value for tb size\n", __func__);
352                 return -EINVAL;
353         }
354
355         /* TC rate: non-zero if qsize non-zero, less than pipe rate */
356         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
357                 if ((qsize[i] == 0 && params->tc_rate[i] != 0) ||
358                         (qsize[i] != 0 && (params->tc_rate[i] == 0 ||
359                         params->tc_rate[i] > params->tb_rate))) {
360                         RTE_LOG(ERR, SCHED,
361                                 "%s: Incorrect value for qsize or tc_rate\n", __func__);
362                         return -EINVAL;
363                 }
364         }
365
366         if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 ||
367                 qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
368                 RTE_LOG(ERR, SCHED,
369                         "%s: Incorrect value for be traffic class rate\n", __func__);
370                 return -EINVAL;
371         }
372
373         /* TC period: non-zero */
374         if (params->tc_period == 0) {
375                 RTE_LOG(ERR, SCHED,
376                         "%s: Incorrect value for tc period\n", __func__);
377                 return -EINVAL;
378         }
379
380         /*  Best effort tc oversubscription weight: non-zero */
381         if (params->tc_ov_weight == 0) {
382                 RTE_LOG(ERR, SCHED,
383                         "%s: Incorrect value for tc ov weight\n", __func__);
384                 return -EINVAL;
385         }
386
387         /* Queue WRR weights: non-zero */
388         for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++) {
389                 if (params->wrr_weights[i] == 0) {
390                         RTE_LOG(ERR, SCHED,
391                                 "%s: Incorrect value for wrr weight\n", __func__);
392                         return -EINVAL;
393                 }
394         }
395
396         return 0;
397 }
398
399 static int
400 subport_profile_check(struct rte_sched_subport_profile_params *params,
401         uint64_t rate)
402 {
403         uint32_t i;
404
405         /* Check user parameters */
406         if (params == NULL) {
407                 RTE_LOG(ERR, SCHED, "%s: "
408                 "Incorrect value for parameter params\n", __func__);
409                 return -EINVAL;
410         }
411
412         if (params->tb_rate == 0 || params->tb_rate > rate) {
413                 RTE_LOG(ERR, SCHED, "%s: "
414                 "Incorrect value for tb rate\n", __func__);
415                 return -EINVAL;
416         }
417
418         if (params->tb_size == 0) {
419                 RTE_LOG(ERR, SCHED, "%s: "
420                 "Incorrect value for tb size\n", __func__);
421                 return -EINVAL;
422         }
423
424         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
425                 uint64_t tc_rate = params->tc_rate[i];
426
427                 if (tc_rate == 0 || (tc_rate > params->tb_rate)) {
428                         RTE_LOG(ERR, SCHED, "%s: "
429                         "Incorrect value for tc rate\n", __func__);
430                         return -EINVAL;
431                 }
432         }
433
434         if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
435                 RTE_LOG(ERR, SCHED, "%s: "
436                 "Incorrect tc rate(best effort)\n", __func__);
437                 return -EINVAL;
438         }
439
440         if (params->tc_period == 0) {
441                 RTE_LOG(ERR, SCHED, "%s: "
442                 "Incorrect value for tc period\n", __func__);
443                 return -EINVAL;
444         }
445
446         return 0;
447 }
448
449 static int
450 rte_sched_port_check_params(struct rte_sched_port_params *params)
451 {
452         uint32_t i;
453
454         if (params == NULL) {
455                 RTE_LOG(ERR, SCHED,
456                         "%s: Incorrect value for parameter params\n", __func__);
457                 return -EINVAL;
458         }
459
460         /* socket */
461         if (params->socket < 0) {
462                 RTE_LOG(ERR, SCHED,
463                         "%s: Incorrect value for socket id\n", __func__);
464                 return -EINVAL;
465         }
466
467         /* rate */
468         if (params->rate == 0) {
469                 RTE_LOG(ERR, SCHED,
470                         "%s: Incorrect value for rate\n", __func__);
471                 return -EINVAL;
472         }
473
474         /* mtu */
475         if (params->mtu == 0) {
476                 RTE_LOG(ERR, SCHED,
477                         "%s: Incorrect value for mtu\n", __func__);
478                 return -EINVAL;
479         }
480
481         /* n_subports_per_port: non-zero, limited to 16 bits, power of 2 */
482         if (params->n_subports_per_port == 0 ||
483             params->n_subports_per_port > 1u << 16 ||
484             !rte_is_power_of_2(params->n_subports_per_port)) {
485                 RTE_LOG(ERR, SCHED,
486                         "%s: Incorrect value for number of subports\n", __func__);
487                 return -EINVAL;
488         }
489
490         if (params->subport_profiles == NULL ||
491                 params->n_subport_profiles == 0 ||
492                 params->n_max_subport_profiles == 0 ||
493                 params->n_subport_profiles > params->n_max_subport_profiles) {
494                 RTE_LOG(ERR, SCHED,
495                 "%s: Incorrect value for subport profiles\n", __func__);
496                 return -EINVAL;
497         }
498
499         for (i = 0; i < params->n_subport_profiles; i++) {
500                 struct rte_sched_subport_profile_params *p =
501                                                 params->subport_profiles + i;
502                 int status;
503
504                 status = subport_profile_check(p, params->rate);
505                 if (status != 0) {
506                         RTE_LOG(ERR, SCHED,
507                         "%s: subport profile check failed(%d)\n",
508                         __func__, status);
509                         return -EINVAL;
510                 }
511         }
512
513         /* n_pipes_per_subport: non-zero, power of 2 */
514         if (params->n_pipes_per_subport == 0 ||
515             !rte_is_power_of_2(params->n_pipes_per_subport)) {
516                 RTE_LOG(ERR, SCHED,
517                         "%s: Incorrect value for maximum pipes number\n", __func__);
518                 return -EINVAL;
519         }
520
521         return 0;
522 }
523
524 static uint32_t
525 rte_sched_subport_get_array_base(struct rte_sched_subport_params *params,
526         enum rte_sched_subport_array array)
527 {
528         uint32_t n_pipes_per_subport = params->n_pipes_per_subport_enabled;
529         uint32_t n_subport_pipe_queues =
530                 RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport;
531
532         uint32_t size_pipe = n_pipes_per_subport * sizeof(struct rte_sched_pipe);
533         uint32_t size_queue =
534                 n_subport_pipe_queues * sizeof(struct rte_sched_queue);
535         uint32_t size_queue_extra
536                 = n_subport_pipe_queues * sizeof(struct rte_sched_queue_extra);
537         uint32_t size_pipe_profiles = params->n_max_pipe_profiles *
538                 sizeof(struct rte_sched_pipe_profile);
539         uint32_t size_bmp_array =
540                 rte_bitmap_get_memory_footprint(n_subport_pipe_queues);
541         uint32_t size_per_pipe_queue_array, size_queue_array;
542
543         uint32_t base, i;
544
545         size_per_pipe_queue_array = 0;
546         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
547                 if (i < RTE_SCHED_TRAFFIC_CLASS_BE)
548                         size_per_pipe_queue_array +=
549                                 params->qsize[i] * sizeof(struct rte_mbuf *);
550                 else
551                         size_per_pipe_queue_array += RTE_SCHED_MAX_QUEUES_PER_TC *
552                                 params->qsize[i] * sizeof(struct rte_mbuf *);
553         }
554         size_queue_array = n_pipes_per_subport * size_per_pipe_queue_array;
555
556         base = 0;
557
558         if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE)
559                 return base;
560         base += RTE_CACHE_LINE_ROUNDUP(size_pipe);
561
562         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE)
563                 return base;
564         base += RTE_CACHE_LINE_ROUNDUP(size_queue);
565
566         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA)
567                 return base;
568         base += RTE_CACHE_LINE_ROUNDUP(size_queue_extra);
569
570         if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES)
571                 return base;
572         base += RTE_CACHE_LINE_ROUNDUP(size_pipe_profiles);
573
574         if (array == e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY)
575                 return base;
576         base += RTE_CACHE_LINE_ROUNDUP(size_bmp_array);
577
578         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY)
579                 return base;
580         base += RTE_CACHE_LINE_ROUNDUP(size_queue_array);
581
582         return base;
583 }
584
585 static void
586 rte_sched_subport_config_qsize(struct rte_sched_subport *subport)
587 {
588         uint32_t i;
589
590         subport->qsize_add[0] = 0;
591
592         /* Strict priority traffic class */
593         for (i = 1; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
594                 subport->qsize_add[i] = subport->qsize_add[i-1] + subport->qsize[i-1];
595
596         /* Best-effort traffic class */
597         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] =
598                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE] +
599                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
600         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] =
601                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] +
602                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
603         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] =
604                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] +
605                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
606
607         subport->qsize_sum = subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] +
608                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
609 }
610
611 static void
612 rte_sched_port_log_pipe_profile(struct rte_sched_subport *subport, uint32_t i)
613 {
614         struct rte_sched_pipe_profile *p = subport->pipe_profiles + i;
615
616         RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n"
617                 "       Token bucket: period = %"PRIu64", credits per period = %"PRIu64", size = %"PRIu64"\n"
618                 "       Traffic classes: period = %"PRIu64",\n"
619                 "       credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
620                 ", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
621                 ", %"PRIu64", %"PRIu64", %"PRIu64"]\n"
622                 "       Best-effort traffic class oversubscription: weight = %hhu\n"
623                 "       WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
624                 i,
625
626                 /* Token bucket */
627                 p->tb_period,
628                 p->tb_credits_per_period,
629                 p->tb_size,
630
631                 /* Traffic classes */
632                 p->tc_period,
633                 p->tc_credits_per_period[0],
634                 p->tc_credits_per_period[1],
635                 p->tc_credits_per_period[2],
636                 p->tc_credits_per_period[3],
637                 p->tc_credits_per_period[4],
638                 p->tc_credits_per_period[5],
639                 p->tc_credits_per_period[6],
640                 p->tc_credits_per_period[7],
641                 p->tc_credits_per_period[8],
642                 p->tc_credits_per_period[9],
643                 p->tc_credits_per_period[10],
644                 p->tc_credits_per_period[11],
645                 p->tc_credits_per_period[12],
646
647                 /* Best-effort traffic class oversubscription */
648                 p->tc_ov_weight,
649
650                 /* WRR */
651                 p->wrr_cost[0], p->wrr_cost[1], p->wrr_cost[2], p->wrr_cost[3]);
652 }
653
654 static void
655 rte_sched_port_log_subport_profile(struct rte_sched_port *port, uint32_t i)
656 {
657         struct rte_sched_subport_profile *p = port->subport_profiles + i;
658
659         RTE_LOG(DEBUG, SCHED, "Low level config for subport profile %u:\n"
660         "Token bucket: period = %"PRIu64", credits per period = %"PRIu64","
661         "size = %"PRIu64"\n"
662         "Traffic classes: period = %"PRIu64",\n"
663         "credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
664         " %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
665         " %"PRIu64", %"PRIu64", %"PRIu64"]\n",
666         i,
667
668         /* Token bucket */
669         p->tb_period,
670         p->tb_credits_per_period,
671         p->tb_size,
672
673         /* Traffic classes */
674         p->tc_period,
675         p->tc_credits_per_period[0],
676         p->tc_credits_per_period[1],
677         p->tc_credits_per_period[2],
678         p->tc_credits_per_period[3],
679         p->tc_credits_per_period[4],
680         p->tc_credits_per_period[5],
681         p->tc_credits_per_period[6],
682         p->tc_credits_per_period[7],
683         p->tc_credits_per_period[8],
684         p->tc_credits_per_period[9],
685         p->tc_credits_per_period[10],
686         p->tc_credits_per_period[11],
687         p->tc_credits_per_period[12]);
688 }
689
690 static inline uint64_t
691 rte_sched_time_ms_to_bytes(uint64_t time_ms, uint64_t rate)
692 {
693         uint64_t time = time_ms;
694
695         time = (time * rate) / 1000;
696
697         return time;
698 }
699
700 static void
701 rte_sched_pipe_profile_convert(struct rte_sched_subport *subport,
702         struct rte_sched_pipe_params *src,
703         struct rte_sched_pipe_profile *dst,
704         uint64_t rate)
705 {
706         uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
707         uint32_t lcd1, lcd2, lcd;
708         uint32_t i;
709
710         /* Token Bucket */
711         if (src->tb_rate == rate) {
712                 dst->tb_credits_per_period = 1;
713                 dst->tb_period = 1;
714         } else {
715                 double tb_rate = (double) src->tb_rate
716                                 / (double) rate;
717                 double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
718
719                 rte_approx_64(tb_rate, d, &dst->tb_credits_per_period,
720                         &dst->tb_period);
721         }
722
723         dst->tb_size = src->tb_size;
724
725         /* Traffic Classes */
726         dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period,
727                                                 rate);
728
729         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
730                 if (subport->qsize[i])
731                         dst->tc_credits_per_period[i]
732                                 = rte_sched_time_ms_to_bytes(src->tc_period,
733                                         src->tc_rate[i]);
734
735         dst->tc_ov_weight = src->tc_ov_weight;
736
737         /* WRR queues */
738         wrr_cost[0] = src->wrr_weights[0];
739         wrr_cost[1] = src->wrr_weights[1];
740         wrr_cost[2] = src->wrr_weights[2];
741         wrr_cost[3] = src->wrr_weights[3];
742
743         lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
744         lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
745         lcd = rte_get_lcd(lcd1, lcd2);
746
747         wrr_cost[0] = lcd / wrr_cost[0];
748         wrr_cost[1] = lcd / wrr_cost[1];
749         wrr_cost[2] = lcd / wrr_cost[2];
750         wrr_cost[3] = lcd / wrr_cost[3];
751
752         dst->wrr_cost[0] = (uint8_t) wrr_cost[0];
753         dst->wrr_cost[1] = (uint8_t) wrr_cost[1];
754         dst->wrr_cost[2] = (uint8_t) wrr_cost[2];
755         dst->wrr_cost[3] = (uint8_t) wrr_cost[3];
756 }
757
758 static void
759 rte_sched_subport_profile_convert(struct rte_sched_subport_profile_params *src,
760         struct rte_sched_subport_profile *dst,
761         uint64_t rate)
762 {
763         uint32_t i;
764
765         /* Token Bucket */
766         if (src->tb_rate == rate) {
767                 dst->tb_credits_per_period = 1;
768                 dst->tb_period = 1;
769         } else {
770                 double tb_rate = (double) src->tb_rate
771                                 / (double) rate;
772                 double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
773
774                 rte_approx_64(tb_rate, d, &dst->tb_credits_per_period,
775                         &dst->tb_period);
776         }
777
778         dst->tb_size = src->tb_size;
779
780         /* Traffic Classes */
781         dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period, rate);
782
783         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
784                 dst->tc_credits_per_period[i]
785                         = rte_sched_time_ms_to_bytes(src->tc_period,
786                                 src->tc_rate[i]);
787 }
788
789 static void
790 rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport,
791         struct rte_sched_subport_params *params, uint64_t rate)
792 {
793         uint32_t i;
794
795         for (i = 0; i < subport->n_pipe_profiles; i++) {
796                 struct rte_sched_pipe_params *src = params->pipe_profiles + i;
797                 struct rte_sched_pipe_profile *dst = subport->pipe_profiles + i;
798
799                 rte_sched_pipe_profile_convert(subport, src, dst, rate);
800                 rte_sched_port_log_pipe_profile(subport, i);
801         }
802
803         subport->pipe_tc_be_rate_max = 0;
804         for (i = 0; i < subport->n_pipe_profiles; i++) {
805                 struct rte_sched_pipe_params *src = params->pipe_profiles + i;
806                 uint64_t pipe_tc_be_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
807
808                 if (subport->pipe_tc_be_rate_max < pipe_tc_be_rate)
809                         subport->pipe_tc_be_rate_max = pipe_tc_be_rate;
810         }
811 }
812
813 static void
814 rte_sched_port_config_subport_profile_table(struct rte_sched_port *port,
815         struct rte_sched_port_params *params,
816         uint64_t rate)
817 {
818         uint32_t i;
819
820         for (i = 0; i < port->n_subport_profiles; i++) {
821                 struct rte_sched_subport_profile_params *src
822                                 = params->subport_profiles + i;
823                 struct rte_sched_subport_profile *dst
824                                 = port->subport_profiles + i;
825
826                 rte_sched_subport_profile_convert(src, dst, rate);
827                 rte_sched_port_log_subport_profile(port, i);
828         }
829 }
830
831 static int
832 rte_sched_subport_check_params(struct rte_sched_subport_params *params,
833         uint32_t n_max_pipes_per_subport,
834         uint64_t rate)
835 {
836         uint32_t i;
837
838         /* Check user parameters */
839         if (params == NULL) {
840                 RTE_LOG(ERR, SCHED,
841                         "%s: Incorrect value for parameter params\n", __func__);
842                 return -EINVAL;
843         }
844
845         /* qsize: if non-zero, power of 2,
846          * no bigger than 32K (due to 16-bit read/write pointers)
847          */
848         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
849                 uint16_t qsize = params->qsize[i];
850
851                 if (qsize != 0 && !rte_is_power_of_2(qsize)) {
852                         RTE_LOG(ERR, SCHED,
853                                 "%s: Incorrect value for qsize\n", __func__);
854                         return -EINVAL;
855                 }
856         }
857
858         if (params->qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
859                 RTE_LOG(ERR, SCHED, "%s: Incorrect qsize\n", __func__);
860                 return -EINVAL;
861         }
862
863         /* n_pipes_per_subport: non-zero, power of 2 */
864         if (params->n_pipes_per_subport_enabled == 0 ||
865                 params->n_pipes_per_subport_enabled > n_max_pipes_per_subport ||
866             !rte_is_power_of_2(params->n_pipes_per_subport_enabled)) {
867                 RTE_LOG(ERR, SCHED,
868                         "%s: Incorrect value for pipes number\n", __func__);
869                 return -EINVAL;
870         }
871
872         /* pipe_profiles and n_pipe_profiles */
873         if (params->pipe_profiles == NULL ||
874             params->n_pipe_profiles == 0 ||
875                 params->n_max_pipe_profiles == 0 ||
876                 params->n_pipe_profiles > params->n_max_pipe_profiles) {
877                 RTE_LOG(ERR, SCHED,
878                         "%s: Incorrect value for pipe profiles\n", __func__);
879                 return -EINVAL;
880         }
881
882         for (i = 0; i < params->n_pipe_profiles; i++) {
883                 struct rte_sched_pipe_params *p = params->pipe_profiles + i;
884                 int status;
885
886                 status = pipe_profile_check(p, rate, &params->qsize[0]);
887                 if (status != 0) {
888                         RTE_LOG(ERR, SCHED,
889                                 "%s: Pipe profile check failed(%d)\n", __func__, status);
890                         return -EINVAL;
891                 }
892         }
893
894         return 0;
895 }
896
897 uint32_t
898 rte_sched_port_get_memory_footprint(struct rte_sched_port_params *port_params,
899         struct rte_sched_subport_params **subport_params)
900 {
901         uint32_t size0 = 0, size1 = 0, i;
902         int status;
903
904         status = rte_sched_port_check_params(port_params);
905         if (status != 0) {
906                 RTE_LOG(ERR, SCHED,
907                         "%s: Port scheduler port params check failed (%d)\n",
908                         __func__, status);
909
910                 return 0;
911         }
912
913         for (i = 0; i < port_params->n_subports_per_port; i++) {
914                 struct rte_sched_subport_params *sp = subport_params[i];
915
916                 status = rte_sched_subport_check_params(sp,
917                                 port_params->n_pipes_per_subport,
918                                 port_params->rate);
919                 if (status != 0) {
920                         RTE_LOG(ERR, SCHED,
921                                 "%s: Port scheduler subport params check failed (%d)\n",
922                                 __func__, status);
923
924                         return 0;
925                 }
926         }
927
928         size0 = sizeof(struct rte_sched_port);
929
930         for (i = 0; i < port_params->n_subports_per_port; i++) {
931                 struct rte_sched_subport_params *sp = subport_params[i];
932
933                 size1 += rte_sched_subport_get_array_base(sp,
934                                         e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
935         }
936
937         return size0 + size1;
938 }
939
940 struct rte_sched_port *
941 rte_sched_port_config(struct rte_sched_port_params *params)
942 {
943         struct rte_sched_port *port = NULL;
944         uint32_t size0, size1, size2;
945         uint32_t cycles_per_byte;
946         uint32_t i, j;
947         int status;
948
949         status = rte_sched_port_check_params(params);
950         if (status != 0) {
951                 RTE_LOG(ERR, SCHED,
952                         "%s: Port scheduler params check failed (%d)\n",
953                         __func__, status);
954                 return NULL;
955         }
956
957         size0 = sizeof(struct rte_sched_port);
958         size1 = params->n_subports_per_port * sizeof(struct rte_sched_subport *);
959         size2 = params->n_max_subport_profiles *
960                 sizeof(struct rte_sched_subport_profile);
961
962         /* Allocate memory to store the data structures */
963         port = rte_zmalloc_socket("qos_params", size0 + size1,
964                                  RTE_CACHE_LINE_SIZE, params->socket);
965         if (port == NULL) {
966                 RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
967
968                 return NULL;
969         }
970
971         /* Allocate memory to store the subport profile */
972         port->subport_profiles  = rte_zmalloc_socket("subport_profile", size2,
973                                         RTE_CACHE_LINE_SIZE, params->socket);
974         if (port->subport_profiles == NULL) {
975                 RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
976                 rte_free(port);
977                 return NULL;
978         }
979
980         /* User parameters */
981         port->n_subports_per_port = params->n_subports_per_port;
982         port->n_subport_profiles = params->n_subport_profiles;
983         port->n_max_subport_profiles = params->n_max_subport_profiles;
984         port->n_pipes_per_subport = params->n_pipes_per_subport;
985         port->n_pipes_per_subport_log2 =
986                         __builtin_ctz(params->n_pipes_per_subport);
987         port->socket = params->socket;
988
989         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
990                 port->pipe_queue[i] = i;
991
992         for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
993                 port->pipe_tc[i] = j;
994
995                 if (j < RTE_SCHED_TRAFFIC_CLASS_BE)
996                         j++;
997         }
998
999         for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
1000                 port->tc_queue[i] = j;
1001
1002                 if (i >= RTE_SCHED_TRAFFIC_CLASS_BE)
1003                         j++;
1004         }
1005         port->rate = params->rate;
1006         port->mtu = params->mtu + params->frame_overhead;
1007         port->frame_overhead = params->frame_overhead;
1008
1009         /* Timing */
1010         port->time_cpu_cycles = rte_get_tsc_cycles();
1011         port->time_cpu_bytes = 0;
1012         port->time = 0;
1013
1014         /* Subport profile table */
1015         rte_sched_port_config_subport_profile_table(port, params, port->rate);
1016
1017         cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
1018                 / params->rate;
1019         port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
1020         port->cycles_per_byte = cycles_per_byte;
1021
1022         /* Grinders */
1023         port->pkts_out = NULL;
1024         port->n_pkts_out = 0;
1025         port->subport_id = 0;
1026
1027         return port;
1028 }
1029
1030 static inline void
1031 rte_sched_subport_free(struct rte_sched_port *port,
1032         struct rte_sched_subport *subport)
1033 {
1034         uint32_t n_subport_pipe_queues;
1035         uint32_t qindex;
1036
1037         if (subport == NULL)
1038                 return;
1039
1040         n_subport_pipe_queues = rte_sched_subport_pipe_queues(subport);
1041
1042         /* Free enqueued mbufs */
1043         for (qindex = 0; qindex < n_subport_pipe_queues; qindex++) {
1044                 struct rte_mbuf **mbufs =
1045                         rte_sched_subport_pipe_qbase(subport, qindex);
1046                 uint16_t qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
1047                 if (qsize != 0) {
1048                         struct rte_sched_queue *queue = subport->queue + qindex;
1049                         uint16_t qr = queue->qr & (qsize - 1);
1050                         uint16_t qw = queue->qw & (qsize - 1);
1051
1052                         for (; qr != qw; qr = (qr + 1) & (qsize - 1))
1053                                 rte_pktmbuf_free(mbufs[qr]);
1054                 }
1055         }
1056
1057         rte_free(subport);
1058 }
1059
1060 void
1061 rte_sched_port_free(struct rte_sched_port *port)
1062 {
1063         uint32_t i;
1064
1065         /* Check user parameters */
1066         if (port == NULL)
1067                 return;
1068
1069         for (i = 0; i < port->n_subports_per_port; i++)
1070                 rte_sched_subport_free(port, port->subports[i]);
1071
1072         rte_free(port->subport_profiles);
1073         rte_free(port);
1074 }
1075
1076 static void
1077 rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
1078 {
1079         uint32_t i;
1080
1081         for (i = 0; i < n_subports; i++) {
1082                 struct rte_sched_subport *subport = port->subports[i];
1083
1084                 rte_sched_subport_free(port, subport);
1085         }
1086
1087         rte_free(port->subport_profiles);
1088         rte_free(port);
1089 }
1090
1091 #ifdef RTE_SCHED_CMAN
1092 static int
1093 rte_sched_red_config(struct rte_sched_port *port,
1094         struct rte_sched_subport *s,
1095         struct rte_sched_subport_params *params,
1096         uint32_t n_subports)
1097 {
1098         uint32_t i;
1099
1100         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
1101
1102                 uint32_t j;
1103
1104                 for (j = 0; j < RTE_COLORS; j++) {
1105                         /* if min/max are both zero, then RED is disabled */
1106                         if ((params->cman_params->red_params[i][j].min_th |
1107                                  params->cman_params->red_params[i][j].max_th) == 0) {
1108                                 continue;
1109                         }
1110
1111                         if (rte_red_config_init(&s->red_config[i][j],
1112                                 params->cman_params->red_params[i][j].wq_log2,
1113                                 params->cman_params->red_params[i][j].min_th,
1114                                 params->cman_params->red_params[i][j].max_th,
1115                                 params->cman_params->red_params[i][j].maxp_inv) != 0) {
1116                                 rte_sched_free_memory(port, n_subports);
1117
1118                                 RTE_LOG(NOTICE, SCHED,
1119                                 "%s: RED configuration init fails\n", __func__);
1120                                 return -EINVAL;
1121                         }
1122                 }
1123         }
1124         s->cman = RTE_SCHED_CMAN_RED;
1125         return 0;
1126 }
1127
1128 static int
1129 rte_sched_pie_config(struct rte_sched_port *port,
1130         struct rte_sched_subport *s,
1131         struct rte_sched_subport_params *params,
1132         uint32_t n_subports)
1133 {
1134         uint32_t i;
1135
1136         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
1137                 if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
1138                         RTE_LOG(NOTICE, SCHED,
1139                         "%s: PIE tailq threshold incorrect\n", __func__);
1140                         return -EINVAL;
1141                 }
1142
1143                 if (rte_pie_config_init(&s->pie_config[i],
1144                         params->cman_params->pie_params[i].qdelay_ref,
1145                         params->cman_params->pie_params[i].dp_update_interval,
1146                         params->cman_params->pie_params[i].max_burst,
1147                         params->cman_params->pie_params[i].tailq_th) != 0) {
1148                         rte_sched_free_memory(port, n_subports);
1149
1150                         RTE_LOG(NOTICE, SCHED,
1151                         "%s: PIE configuration init fails\n", __func__);
1152                         return -EINVAL;
1153                         }
1154         }
1155         s->cman = RTE_SCHED_CMAN_PIE;
1156         return 0;
1157 }
1158
1159 static int
1160 rte_sched_cman_config(struct rte_sched_port *port,
1161         struct rte_sched_subport *s,
1162         struct rte_sched_subport_params *params,
1163         uint32_t n_subports)
1164 {
1165         if (params->cman_params->cman_mode == RTE_SCHED_CMAN_RED)
1166                 return rte_sched_red_config(port, s, params, n_subports);
1167
1168         else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
1169                 return rte_sched_pie_config(port, s, params, n_subports);
1170
1171         return -EINVAL;
1172 }
1173 #endif
1174
1175 int
1176 rte_sched_subport_config(struct rte_sched_port *port,
1177         uint32_t subport_id,
1178         struct rte_sched_subport_params *params,
1179         uint32_t subport_profile_id)
1180 {
1181         struct rte_sched_subport *s = NULL;
1182         uint32_t n_subports = subport_id;
1183         struct rte_sched_subport_profile *profile;
1184         uint32_t n_subport_pipe_queues, i;
1185         uint32_t size0, size1, bmp_mem_size;
1186         int status;
1187         int ret;
1188
1189         /* Check user parameters */
1190         if (port == NULL) {
1191                 RTE_LOG(ERR, SCHED,
1192                         "%s: Incorrect value for parameter port\n", __func__);
1193                 return 0;
1194         }
1195
1196         if (subport_id >= port->n_subports_per_port) {
1197                 RTE_LOG(ERR, SCHED,
1198                         "%s: Incorrect value for subport id\n", __func__);
1199                 ret = -EINVAL;
1200                 goto out;
1201         }
1202
1203         if (subport_profile_id >= port->n_max_subport_profiles) {
1204                 RTE_LOG(ERR, SCHED, "%s: "
1205                         "Number of subport profile exceeds the max limit\n",
1206                         __func__);
1207                 ret = -EINVAL;
1208                 goto out;
1209         }
1210
1211         /** Memory is allocated only on first invocation of the api for a
1212          * given subport. Subsequent invocation on same subport will just
1213          * update subport bandwidth parameter.
1214          **/
1215         if (port->subports[subport_id] == NULL) {
1216
1217                 status = rte_sched_subport_check_params(params,
1218                         port->n_pipes_per_subport,
1219                         port->rate);
1220                 if (status != 0) {
1221                         RTE_LOG(NOTICE, SCHED,
1222                                 "%s: Port scheduler params check failed (%d)\n",
1223                                 __func__, status);
1224                         ret = -EINVAL;
1225                         goto out;
1226                 }
1227
1228                 /* Determine the amount of memory to allocate */
1229                 size0 = sizeof(struct rte_sched_subport);
1230                 size1 = rte_sched_subport_get_array_base(params,
1231                                         e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
1232
1233                 /* Allocate memory to store the data structures */
1234                 s = rte_zmalloc_socket("subport_params", size0 + size1,
1235                         RTE_CACHE_LINE_SIZE, port->socket);
1236                 if (s == NULL) {
1237                         RTE_LOG(ERR, SCHED,
1238                                 "%s: Memory allocation fails\n", __func__);
1239                         ret = -ENOMEM;
1240                         goto out;
1241                 }
1242
1243                 n_subports++;
1244
1245                 subport_profile_id = 0;
1246
1247                 /* Port */
1248                 port->subports[subport_id] = s;
1249
1250                 s->tb_time = port->time;
1251
1252                 /* compile time checks */
1253                 RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS == 0);
1254                 RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS &
1255                         (RTE_SCHED_PORT_N_GRINDERS - 1));
1256
1257                 /* User parameters */
1258                 s->n_pipes_per_subport_enabled =
1259                                 params->n_pipes_per_subport_enabled;
1260                 memcpy(s->qsize, params->qsize, sizeof(params->qsize));
1261                 s->n_pipe_profiles = params->n_pipe_profiles;
1262                 s->n_max_pipe_profiles = params->n_max_pipe_profiles;
1263
1264 #ifdef RTE_SCHED_CMAN
1265                 if (params->cman_params != NULL) {
1266                         s->cman_enabled = true;
1267                         status = rte_sched_cman_config(port, s, params, n_subports);
1268                         if (status) {
1269                                 RTE_LOG(NOTICE, SCHED,
1270                                         "%s: CMAN configuration fails\n", __func__);
1271                                 return status;
1272                         }
1273                 } else {
1274                         s->cman_enabled = false;
1275                 }
1276 #endif
1277
1278                 /* Scheduling loop detection */
1279                 s->pipe_loop = RTE_SCHED_PIPE_INVALID;
1280                 s->pipe_exhaustion = 0;
1281
1282                 /* Grinders */
1283                 s->busy_grinders = 0;
1284
1285                 /* Queue base calculation */
1286                 rte_sched_subport_config_qsize(s);
1287
1288                 /* Large data structures */
1289                 s->pipe = (struct rte_sched_pipe *)
1290                         (s->memory + rte_sched_subport_get_array_base(params,
1291                         e_RTE_SCHED_SUBPORT_ARRAY_PIPE));
1292                 s->queue = (struct rte_sched_queue *)
1293                         (s->memory + rte_sched_subport_get_array_base(params,
1294                         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE));
1295                 s->queue_extra = (struct rte_sched_queue_extra *)
1296                         (s->memory + rte_sched_subport_get_array_base(params,
1297                         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA));
1298                 s->pipe_profiles = (struct rte_sched_pipe_profile *)
1299                         (s->memory + rte_sched_subport_get_array_base(params,
1300                         e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES));
1301                 s->bmp_array =  s->memory + rte_sched_subport_get_array_base(
1302                                 params, e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY);
1303                 s->queue_array = (struct rte_mbuf **)
1304                         (s->memory + rte_sched_subport_get_array_base(params,
1305                         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY));
1306
1307                 /* Pipe profile table */
1308                 rte_sched_subport_config_pipe_profile_table(s, params,
1309                                                             port->rate);
1310
1311                 /* Bitmap */
1312                 n_subport_pipe_queues = rte_sched_subport_pipe_queues(s);
1313                 bmp_mem_size = rte_bitmap_get_memory_footprint(
1314                                                 n_subport_pipe_queues);
1315                 s->bmp = rte_bitmap_init(n_subport_pipe_queues, s->bmp_array,
1316                                         bmp_mem_size);
1317                 if (s->bmp == NULL) {
1318                         RTE_LOG(ERR, SCHED,
1319                                 "%s: Subport bitmap init error\n", __func__);
1320                         ret = -EINVAL;
1321                         goto out;
1322                 }
1323
1324                 for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
1325                         s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
1326
1327 #ifdef RTE_SCHED_SUBPORT_TC_OV
1328                 /* TC oversubscription */
1329                 s->tc_ov_wm_min = port->mtu;
1330                 s->tc_ov_period_id = 0;
1331                 s->tc_ov = 0;
1332                 s->tc_ov_n = 0;
1333                 s->tc_ov_rate = 0;
1334 #endif
1335         }
1336
1337         {
1338         /* update subport parameters from subport profile table*/
1339                 profile = port->subport_profiles + subport_profile_id;
1340
1341                 s = port->subports[subport_id];
1342
1343                 s->tb_credits = profile->tb_size / 2;
1344
1345                 s->tc_time = port->time + profile->tc_period;
1346
1347                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1348                         if (s->qsize[i])
1349                                 s->tc_credits[i] =
1350                                         profile->tc_credits_per_period[i];
1351                         else
1352                                 profile->tc_credits_per_period[i] = 0;
1353
1354 #ifdef RTE_SCHED_SUBPORT_TC_OV
1355                 s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period,
1356                                                         s->pipe_tc_be_rate_max);
1357                 s->tc_ov_wm = s->tc_ov_wm_max;
1358 #endif
1359                 s->profile = subport_profile_id;
1360
1361         }
1362
1363         rte_sched_port_log_subport_profile(port, subport_profile_id);
1364
1365         return 0;
1366
1367 out:
1368         rte_sched_free_memory(port, n_subports);
1369
1370         return ret;
1371 }
1372
1373 int
1374 rte_sched_pipe_config(struct rte_sched_port *port,
1375         uint32_t subport_id,
1376         uint32_t pipe_id,
1377         int32_t pipe_profile)
1378 {
1379         struct rte_sched_subport *s;
1380         struct rte_sched_subport_profile *sp;
1381         struct rte_sched_pipe *p;
1382         struct rte_sched_pipe_profile *params;
1383         uint32_t n_subports = subport_id + 1;
1384         uint32_t deactivate, profile, i;
1385         int ret;
1386
1387         /* Check user parameters */
1388         profile = (uint32_t) pipe_profile;
1389         deactivate = (pipe_profile < 0);
1390
1391         if (port == NULL) {
1392                 RTE_LOG(ERR, SCHED,
1393                         "%s: Incorrect value for parameter port\n", __func__);
1394                 return -EINVAL;
1395         }
1396
1397         if (subport_id >= port->n_subports_per_port) {
1398                 RTE_LOG(ERR, SCHED,
1399                         "%s: Incorrect value for parameter subport id\n", __func__);
1400                 ret = -EINVAL;
1401                 goto out;
1402         }
1403
1404         s = port->subports[subport_id];
1405         if (pipe_id >= s->n_pipes_per_subport_enabled) {
1406                 RTE_LOG(ERR, SCHED,
1407                         "%s: Incorrect value for parameter pipe id\n", __func__);
1408                 ret = -EINVAL;
1409                 goto out;
1410         }
1411
1412         if (!deactivate && profile >= s->n_pipe_profiles) {
1413                 RTE_LOG(ERR, SCHED,
1414                         "%s: Incorrect value for parameter pipe profile\n", __func__);
1415                 ret = -EINVAL;
1416                 goto out;
1417         }
1418
1419         sp = port->subport_profiles + s->profile;
1420         /* Handle the case when pipe already has a valid configuration */
1421         p = s->pipe + pipe_id;
1422         if (p->tb_time) {
1423                 params = s->pipe_profiles + p->profile;
1424
1425                 double subport_tc_be_rate =
1426                 (double)sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1427                         / (double) sp->tc_period;
1428                 double pipe_tc_be_rate =
1429                         (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1430                         / (double) params->tc_period;
1431                 uint32_t tc_be_ov = s->tc_ov;
1432
1433                 /* Unplug pipe from its subport */
1434                 s->tc_ov_n -= params->tc_ov_weight;
1435                 s->tc_ov_rate -= pipe_tc_be_rate;
1436                 s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1437
1438                 if (s->tc_ov != tc_be_ov) {
1439                         RTE_LOG(DEBUG, SCHED,
1440                                 "Subport %u Best-effort TC oversubscription is OFF (%.4lf >= %.4lf)\n",
1441                                 subport_id, subport_tc_be_rate, s->tc_ov_rate);
1442                 }
1443
1444                 /* Reset the pipe */
1445                 memset(p, 0, sizeof(struct rte_sched_pipe));
1446         }
1447
1448         if (deactivate)
1449                 return 0;
1450
1451         /* Apply the new pipe configuration */
1452         p->profile = profile;
1453         params = s->pipe_profiles + p->profile;
1454
1455         /* Token Bucket (TB) */
1456         p->tb_time = port->time;
1457         p->tb_credits = params->tb_size / 2;
1458
1459         /* Traffic Classes (TCs) */
1460         p->tc_time = port->time + params->tc_period;
1461
1462         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1463                 if (s->qsize[i])
1464                         p->tc_credits[i] = params->tc_credits_per_period[i];
1465
1466         {
1467                 /* Subport best effort tc oversubscription */
1468                 double subport_tc_be_rate =
1469                 (double)sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1470                         / (double) sp->tc_period;
1471                 double pipe_tc_be_rate =
1472                         (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1473                         / (double) params->tc_period;
1474                 uint32_t tc_be_ov = s->tc_ov;
1475
1476                 s->tc_ov_n += params->tc_ov_weight;
1477                 s->tc_ov_rate += pipe_tc_be_rate;
1478                 s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1479
1480                 if (s->tc_ov != tc_be_ov) {
1481                         RTE_LOG(DEBUG, SCHED,
1482                                 "Subport %u Best effort TC oversubscription is ON (%.4lf < %.4lf)\n",
1483                                 subport_id, subport_tc_be_rate, s->tc_ov_rate);
1484                 }
1485                 p->tc_ov_period_id = s->tc_ov_period_id;
1486                 p->tc_ov_credits = s->tc_ov_wm;
1487         }
1488
1489         return 0;
1490
1491 out:
1492         rte_sched_free_memory(port, n_subports);
1493
1494         return ret;
1495 }
1496
1497 int
1498 rte_sched_subport_pipe_profile_add(struct rte_sched_port *port,
1499         uint32_t subport_id,
1500         struct rte_sched_pipe_params *params,
1501         uint32_t *pipe_profile_id)
1502 {
1503         struct rte_sched_subport *s;
1504         struct rte_sched_pipe_profile *pp;
1505         uint32_t i;
1506         int status;
1507
1508         /* Port */
1509         if (port == NULL) {
1510                 RTE_LOG(ERR, SCHED,
1511                         "%s: Incorrect value for parameter port\n", __func__);
1512                 return -EINVAL;
1513         }
1514
1515         /* Subport id not exceeds the max limit */
1516         if (subport_id > port->n_subports_per_port) {
1517                 RTE_LOG(ERR, SCHED,
1518                         "%s: Incorrect value for subport id\n", __func__);
1519                 return -EINVAL;
1520         }
1521
1522         s = port->subports[subport_id];
1523
1524         /* Pipe profiles exceeds the max limit */
1525         if (s->n_pipe_profiles >= s->n_max_pipe_profiles) {
1526                 RTE_LOG(ERR, SCHED,
1527                         "%s: Number of pipe profiles exceeds the max limit\n", __func__);
1528                 return -EINVAL;
1529         }
1530
1531         /* Pipe params */
1532         status = pipe_profile_check(params, port->rate, &s->qsize[0]);
1533         if (status != 0) {
1534                 RTE_LOG(ERR, SCHED,
1535                         "%s: Pipe profile check failed(%d)\n", __func__, status);
1536                 return -EINVAL;
1537         }
1538
1539         pp = &s->pipe_profiles[s->n_pipe_profiles];
1540         rte_sched_pipe_profile_convert(s, params, pp, port->rate);
1541
1542         /* Pipe profile should not exists */
1543         for (i = 0; i < s->n_pipe_profiles; i++)
1544                 if (memcmp(s->pipe_profiles + i, pp, sizeof(*pp)) == 0) {
1545                         RTE_LOG(ERR, SCHED,
1546                                 "%s: Pipe profile exists\n", __func__);
1547                         return -EINVAL;
1548                 }
1549
1550         /* Pipe profile commit */
1551         *pipe_profile_id = s->n_pipe_profiles;
1552         s->n_pipe_profiles++;
1553
1554         if (s->pipe_tc_be_rate_max < params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE])
1555                 s->pipe_tc_be_rate_max = params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
1556
1557         rte_sched_port_log_pipe_profile(s, *pipe_profile_id);
1558
1559         return 0;
1560 }
1561
1562 int
1563 rte_sched_port_subport_profile_add(struct rte_sched_port *port,
1564         struct rte_sched_subport_profile_params *params,
1565         uint32_t *subport_profile_id)
1566 {
1567         int status;
1568         uint32_t i;
1569         struct rte_sched_subport_profile *dst;
1570
1571         /* Port */
1572         if (port == NULL) {
1573                 RTE_LOG(ERR, SCHED, "%s: "
1574                 "Incorrect value for parameter port\n", __func__);
1575                 return -EINVAL;
1576         }
1577
1578         if (params == NULL) {
1579                 RTE_LOG(ERR, SCHED, "%s: "
1580                 "Incorrect value for parameter profile\n", __func__);
1581                 return -EINVAL;
1582         }
1583
1584         if (subport_profile_id == NULL) {
1585                 RTE_LOG(ERR, SCHED, "%s: "
1586                 "Incorrect value for parameter subport_profile_id\n",
1587                 __func__);
1588                 return -EINVAL;
1589         }
1590
1591         dst = port->subport_profiles + port->n_subport_profiles;
1592
1593         /* Subport profiles exceeds the max limit */
1594         if (port->n_subport_profiles >= port->n_max_subport_profiles) {
1595                 RTE_LOG(ERR, SCHED, "%s: "
1596                 "Number of subport profiles exceeds the max limit\n",
1597                  __func__);
1598                 return -EINVAL;
1599         }
1600
1601         status = subport_profile_check(params, port->rate);
1602         if (status != 0) {
1603                 RTE_LOG(ERR, SCHED,
1604                 "%s: subport profile check failed(%d)\n", __func__, status);
1605                 return -EINVAL;
1606         }
1607
1608         rte_sched_subport_profile_convert(params, dst, port->rate);
1609
1610         /* Subport profile should not exists */
1611         for (i = 0; i < port->n_subport_profiles; i++)
1612                 if (memcmp(port->subport_profiles + i,
1613                     dst, sizeof(*dst)) == 0) {
1614                         RTE_LOG(ERR, SCHED,
1615                         "%s: subport profile exists\n", __func__);
1616                         return -EINVAL;
1617                 }
1618
1619         /* Subport profile commit */
1620         *subport_profile_id = port->n_subport_profiles;
1621         port->n_subport_profiles++;
1622
1623         rte_sched_port_log_subport_profile(port, *subport_profile_id);
1624
1625         return 0;
1626 }
1627
1628 static inline uint32_t
1629 rte_sched_port_qindex(struct rte_sched_port *port,
1630         uint32_t subport,
1631         uint32_t pipe,
1632         uint32_t traffic_class,
1633         uint32_t queue)
1634 {
1635         return ((subport & (port->n_subports_per_port - 1)) <<
1636                 (port->n_pipes_per_subport_log2 + 4)) |
1637                 ((pipe &
1638                 (port->subports[subport]->n_pipes_per_subport_enabled - 1)) << 4) |
1639                 ((rte_sched_port_pipe_queue(port, traffic_class) + queue) &
1640                 (RTE_SCHED_QUEUES_PER_PIPE - 1));
1641 }
1642
1643 void
1644 rte_sched_port_pkt_write(struct rte_sched_port *port,
1645                          struct rte_mbuf *pkt,
1646                          uint32_t subport, uint32_t pipe,
1647                          uint32_t traffic_class,
1648                          uint32_t queue, enum rte_color color)
1649 {
1650         uint32_t queue_id =
1651                 rte_sched_port_qindex(port, subport, pipe, traffic_class, queue);
1652
1653         rte_mbuf_sched_set(pkt, queue_id, traffic_class, (uint8_t)color);
1654 }
1655
1656 void
1657 rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port,
1658                                   const struct rte_mbuf *pkt,
1659                                   uint32_t *subport, uint32_t *pipe,
1660                                   uint32_t *traffic_class, uint32_t *queue)
1661 {
1662         uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1663
1664         *subport = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1665         *pipe = (queue_id >> 4) &
1666                 (port->subports[*subport]->n_pipes_per_subport_enabled - 1);
1667         *traffic_class = rte_sched_port_pipe_tc(port, queue_id);
1668         *queue = rte_sched_port_tc_queue(port, queue_id);
1669 }
1670
1671 enum rte_color
1672 rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt)
1673 {
1674         return (enum rte_color)rte_mbuf_sched_color_get(pkt);
1675 }
1676
1677 int
1678 rte_sched_subport_read_stats(struct rte_sched_port *port,
1679                              uint32_t subport_id,
1680                              struct rte_sched_subport_stats *stats,
1681                              uint32_t *tc_ov)
1682 {
1683         struct rte_sched_subport *s;
1684
1685         /* Check user parameters */
1686         if (port == NULL) {
1687                 RTE_LOG(ERR, SCHED,
1688                         "%s: Incorrect value for parameter port\n", __func__);
1689                 return -EINVAL;
1690         }
1691
1692         if (subport_id >= port->n_subports_per_port) {
1693                 RTE_LOG(ERR, SCHED,
1694                         "%s: Incorrect value for subport id\n", __func__);
1695                 return -EINVAL;
1696         }
1697
1698         if (stats == NULL) {
1699                 RTE_LOG(ERR, SCHED,
1700                         "%s: Incorrect value for parameter stats\n", __func__);
1701                 return -EINVAL;
1702         }
1703
1704         if (tc_ov == NULL) {
1705                 RTE_LOG(ERR, SCHED,
1706                         "%s: Incorrect value for tc_ov\n", __func__);
1707                 return -EINVAL;
1708         }
1709
1710         s = port->subports[subport_id];
1711
1712         /* Copy subport stats and clear */
1713         memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
1714         memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
1715
1716         /* Subport TC oversubscription status */
1717         *tc_ov = s->tc_ov;
1718
1719         return 0;
1720 }
1721
1722 int
1723 rte_sched_queue_read_stats(struct rte_sched_port *port,
1724         uint32_t queue_id,
1725         struct rte_sched_queue_stats *stats,
1726         uint16_t *qlen)
1727 {
1728         struct rte_sched_subport *s;
1729         struct rte_sched_queue *q;
1730         struct rte_sched_queue_extra *qe;
1731         uint32_t subport_id, subport_qmask, subport_qindex;
1732
1733         /* Check user parameters */
1734         if (port == NULL) {
1735                 RTE_LOG(ERR, SCHED,
1736                         "%s: Incorrect value for parameter port\n", __func__);
1737                 return -EINVAL;
1738         }
1739
1740         if (queue_id >= rte_sched_port_queues_per_port(port)) {
1741                 RTE_LOG(ERR, SCHED,
1742                         "%s: Incorrect value for queue id\n", __func__);
1743                 return -EINVAL;
1744         }
1745
1746         if (stats == NULL) {
1747                 RTE_LOG(ERR, SCHED,
1748                         "%s: Incorrect value for parameter stats\n", __func__);
1749                 return -EINVAL;
1750         }
1751
1752         if (qlen == NULL) {
1753                 RTE_LOG(ERR, SCHED,
1754                         "%s: Incorrect value for parameter qlen\n", __func__);
1755                 return -EINVAL;
1756         }
1757         subport_qmask = port->n_pipes_per_subport_log2 + 4;
1758         subport_id = (queue_id >> subport_qmask) & (port->n_subports_per_port - 1);
1759
1760         s = port->subports[subport_id];
1761         subport_qindex = ((1 << subport_qmask) - 1) & queue_id;
1762         q = s->queue + subport_qindex;
1763         qe = s->queue_extra + subport_qindex;
1764
1765         /* Copy queue stats and clear */
1766         memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats));
1767         memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
1768
1769         /* Queue length */
1770         *qlen = q->qw - q->qr;
1771
1772         return 0;
1773 }
1774
1775 #ifdef RTE_SCHED_DEBUG
1776
1777 static inline int
1778 rte_sched_port_queue_is_empty(struct rte_sched_subport *subport,
1779         uint32_t qindex)
1780 {
1781         struct rte_sched_queue *queue = subport->queue + qindex;
1782
1783         return queue->qr == queue->qw;
1784 }
1785
1786 #endif /* RTE_SCHED_DEBUG */
1787
1788 #ifdef RTE_SCHED_COLLECT_STATS
1789
1790 static inline void
1791 rte_sched_port_update_subport_stats(struct rte_sched_port *port,
1792         struct rte_sched_subport *subport,
1793         uint32_t qindex,
1794         struct rte_mbuf *pkt)
1795 {
1796         uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1797         uint32_t pkt_len = pkt->pkt_len;
1798
1799         subport->stats.n_pkts_tc[tc_index] += 1;
1800         subport->stats.n_bytes_tc[tc_index] += pkt_len;
1801 }
1802
1803 static inline void
1804 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
1805         struct rte_sched_subport *subport,
1806         uint32_t qindex,
1807         struct rte_mbuf *pkt,
1808         __rte_unused uint32_t n_pkts_cman_dropped)
1809 {
1810         uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1811         uint32_t pkt_len = pkt->pkt_len;
1812
1813         subport->stats.n_pkts_tc_dropped[tc_index] += 1;
1814         subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
1815         subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
1816 }
1817
1818 static inline void
1819 rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
1820         uint32_t qindex,
1821         struct rte_mbuf *pkt)
1822 {
1823         struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1824         uint32_t pkt_len = pkt->pkt_len;
1825
1826         qe->stats.n_pkts += 1;
1827         qe->stats.n_bytes += pkt_len;
1828 }
1829
1830 static inline void
1831 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
1832         uint32_t qindex,
1833         struct rte_mbuf *pkt,
1834         __rte_unused uint32_t n_pkts_cman_dropped)
1835 {
1836         struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1837         uint32_t pkt_len = pkt->pkt_len;
1838
1839         qe->stats.n_pkts_dropped += 1;
1840         qe->stats.n_bytes_dropped += pkt_len;
1841 #ifdef RTE_SCHED_CMAN
1842         if (subport->cman_enabled)
1843                 qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
1844 #endif
1845 }
1846
1847 #endif /* RTE_SCHED_COLLECT_STATS */
1848
1849 #ifdef RTE_SCHED_CMAN
1850
1851 static inline int
1852 rte_sched_port_cman_drop(struct rte_sched_port *port,
1853         struct rte_sched_subport *subport,
1854         struct rte_mbuf *pkt,
1855         uint32_t qindex,
1856         uint16_t qlen)
1857 {
1858         if (!subport->cman_enabled)
1859                 return 0;
1860
1861         struct rte_sched_queue_extra *qe;
1862         uint32_t tc_index;
1863
1864         tc_index = rte_sched_port_pipe_tc(port, qindex);
1865         qe = subport->queue_extra + qindex;
1866
1867         /* RED */
1868         if (subport->cman == RTE_SCHED_CMAN_RED) {
1869                 struct rte_red_config *red_cfg;
1870                 struct rte_red *red;
1871                 enum rte_color color;
1872
1873                 color = rte_sched_port_pkt_read_color(pkt);
1874                 red_cfg = &subport->red_config[tc_index][color];
1875
1876                 if ((red_cfg->min_th | red_cfg->max_th) == 0)
1877                         return 0;
1878
1879                 red = &qe->red;
1880
1881                 return rte_red_enqueue(red_cfg, red, qlen, port->time);
1882         }
1883
1884         /* PIE */
1885         struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
1886         struct rte_pie *pie = &qe->pie;
1887
1888         return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
1889 }
1890
1891 static inline void
1892 rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
1893         struct rte_sched_subport *subport, uint32_t qindex)
1894 {
1895         if (subport->cman_enabled) {
1896                 struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1897                 if (subport->cman == RTE_SCHED_CMAN_RED) {
1898                         struct rte_red *red = &qe->red;
1899
1900                         rte_red_mark_queue_empty(red, port->time);
1901                 }
1902         }
1903 }
1904
1905 static inline void
1906 rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
1907 uint32_t qindex, uint32_t pkt_len, uint64_t time) {
1908         if (subport->cman_enabled && subport->cman == RTE_SCHED_CMAN_PIE) {
1909                 struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1910                 struct rte_pie *pie = &qe->pie;
1911
1912                 /* Update queue length */
1913                 pie->qlen -= 1;
1914                 pie->qlen_bytes -= pkt_len;
1915
1916                 rte_pie_dequeue(pie, pkt_len, time);
1917         }
1918 }
1919
1920 #else
1921
1922 static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
1923         struct rte_sched_subport *subport __rte_unused,
1924         struct rte_mbuf *pkt __rte_unused,
1925         uint32_t qindex __rte_unused,
1926         uint16_t qlen __rte_unused)
1927 {
1928         return 0;
1929 }
1930
1931 #define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
1932
1933 static inline void
1934 rte_sched_port_pie_dequeue(struct rte_sched_subport *subport __rte_unused,
1935         uint32_t qindex __rte_unused,
1936         uint32_t pkt_len __rte_unused,
1937         uint64_t time __rte_unused) {
1938         /* do-nothing when RTE_SCHED_CMAN not defined */
1939 }
1940
1941 #endif /* RTE_SCHED_CMAN */
1942
1943 #ifdef RTE_SCHED_DEBUG
1944
1945 static inline void
1946 debug_check_queue_slab(struct rte_sched_subport *subport, uint32_t bmp_pos,
1947                        uint64_t bmp_slab)
1948 {
1949         uint64_t mask;
1950         uint32_t i, panic;
1951
1952         if (bmp_slab == 0)
1953                 rte_panic("Empty slab at position %u\n", bmp_pos);
1954
1955         panic = 0;
1956         for (i = 0, mask = 1; i < 64; i++, mask <<= 1) {
1957                 if (mask & bmp_slab) {
1958                         if (rte_sched_port_queue_is_empty(subport, bmp_pos + i)) {
1959                                 printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i);
1960                                 panic = 1;
1961                         }
1962                 }
1963         }
1964
1965         if (panic)
1966                 rte_panic("Empty queues in slab 0x%" PRIx64 "starting at position %u\n",
1967                         bmp_slab, bmp_pos);
1968 }
1969
1970 #endif /* RTE_SCHED_DEBUG */
1971
1972 static inline struct rte_sched_subport *
1973 rte_sched_port_subport(struct rte_sched_port *port,
1974         struct rte_mbuf *pkt)
1975 {
1976         uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1977         uint32_t subport_id = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1978
1979         return port->subports[subport_id];
1980 }
1981
1982 static inline uint32_t
1983 rte_sched_port_enqueue_qptrs_prefetch0(struct rte_sched_subport *subport,
1984         struct rte_mbuf *pkt, uint32_t subport_qmask)
1985 {
1986         struct rte_sched_queue *q;
1987 #ifdef RTE_SCHED_COLLECT_STATS
1988         struct rte_sched_queue_extra *qe;
1989 #endif
1990         uint32_t qindex = rte_mbuf_sched_queue_get(pkt);
1991         uint32_t subport_queue_id = subport_qmask & qindex;
1992
1993         q = subport->queue + subport_queue_id;
1994         rte_prefetch0(q);
1995 #ifdef RTE_SCHED_COLLECT_STATS
1996         qe = subport->queue_extra + subport_queue_id;
1997         rte_prefetch0(qe);
1998 #endif
1999
2000         return subport_queue_id;
2001 }
2002
2003 static inline void
2004 rte_sched_port_enqueue_qwa_prefetch0(struct rte_sched_port *port,
2005         struct rte_sched_subport *subport,
2006         uint32_t qindex,
2007         struct rte_mbuf **qbase)
2008 {
2009         struct rte_sched_queue *q;
2010         struct rte_mbuf **q_qw;
2011         uint16_t qsize;
2012
2013         q = subport->queue + qindex;
2014         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
2015         q_qw = qbase + (q->qw & (qsize - 1));
2016
2017         rte_prefetch0(q_qw);
2018         rte_bitmap_prefetch0(subport->bmp, qindex);
2019 }
2020
2021 static inline int
2022 rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
2023         struct rte_sched_subport *subport,
2024         uint32_t qindex,
2025         struct rte_mbuf **qbase,
2026         struct rte_mbuf *pkt)
2027 {
2028         struct rte_sched_queue *q;
2029         uint16_t qsize;
2030         uint16_t qlen;
2031
2032         q = subport->queue + qindex;
2033         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
2034         qlen = q->qw - q->qr;
2035
2036         /* Drop the packet (and update drop stats) when queue is full */
2037         if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
2038                      (qlen >= qsize))) {
2039                 rte_pktmbuf_free(pkt);
2040 #ifdef RTE_SCHED_COLLECT_STATS
2041                 rte_sched_port_update_subport_stats_on_drop(port, subport,
2042                         qindex, pkt, qlen < qsize);
2043                 rte_sched_port_update_queue_stats_on_drop(subport, qindex, pkt,
2044                         qlen < qsize);
2045 #endif
2046                 return 0;
2047         }
2048
2049         /* Enqueue packet */
2050         qbase[q->qw & (qsize - 1)] = pkt;
2051         q->qw++;
2052
2053         /* Activate queue in the subport bitmap */
2054         rte_bitmap_set(subport->bmp, qindex);
2055
2056         /* Statistics */
2057 #ifdef RTE_SCHED_COLLECT_STATS
2058         rte_sched_port_update_subport_stats(port, subport, qindex, pkt);
2059         rte_sched_port_update_queue_stats(subport, qindex, pkt);
2060 #endif
2061
2062         return 1;
2063 }
2064
2065
2066 /*
2067  * The enqueue function implements a 4-level pipeline with each stage
2068  * processing two different packets. The purpose of using a pipeline
2069  * is to hide the latency of prefetching the data structures. The
2070  * naming convention is presented in the diagram below:
2071  *
2072  *   p00  _______   p10  _______   p20  _______   p30  _______
2073  * ----->|       |----->|       |----->|       |----->|       |----->
2074  *       |   0   |      |   1   |      |   2   |      |   3   |
2075  * ----->|_______|----->|_______|----->|_______|----->|_______|----->
2076  *   p01            p11            p21            p31
2077  *
2078  */
2079 int
2080 rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts,
2081                        uint32_t n_pkts)
2082 {
2083         struct rte_mbuf *pkt00, *pkt01, *pkt10, *pkt11, *pkt20, *pkt21,
2084                 *pkt30, *pkt31, *pkt_last;
2085         struct rte_mbuf **q00_base, **q01_base, **q10_base, **q11_base,
2086                 **q20_base, **q21_base, **q30_base, **q31_base, **q_last_base;
2087         struct rte_sched_subport *subport00, *subport01, *subport10, *subport11,
2088                 *subport20, *subport21, *subport30, *subport31, *subport_last;
2089         uint32_t q00, q01, q10, q11, q20, q21, q30, q31, q_last;
2090         uint32_t r00, r01, r10, r11, r20, r21, r30, r31, r_last;
2091         uint32_t subport_qmask;
2092         uint32_t result, i;
2093
2094         result = 0;
2095         subport_qmask = (1 << (port->n_pipes_per_subport_log2 + 4)) - 1;
2096
2097         /*
2098          * Less then 6 input packets available, which is not enough to
2099          * feed the pipeline
2100          */
2101         if (unlikely(n_pkts < 6)) {
2102                 struct rte_sched_subport *subports[5];
2103                 struct rte_mbuf **q_base[5];
2104                 uint32_t q[5];
2105
2106                 /* Prefetch the mbuf structure of each packet */
2107                 for (i = 0; i < n_pkts; i++)
2108                         rte_prefetch0(pkts[i]);
2109
2110                 /* Prefetch the subport structure for each packet */
2111                 for (i = 0; i < n_pkts; i++)
2112                         subports[i] = rte_sched_port_subport(port, pkts[i]);
2113
2114                 /* Prefetch the queue structure for each queue */
2115                 for (i = 0; i < n_pkts; i++)
2116                         q[i] = rte_sched_port_enqueue_qptrs_prefetch0(subports[i],
2117                                         pkts[i], subport_qmask);
2118
2119                 /* Prefetch the write pointer location of each queue */
2120                 for (i = 0; i < n_pkts; i++) {
2121                         q_base[i] = rte_sched_subport_pipe_qbase(subports[i], q[i]);
2122                         rte_sched_port_enqueue_qwa_prefetch0(port, subports[i],
2123                                 q[i], q_base[i]);
2124                 }
2125
2126                 /* Write each packet to its queue */
2127                 for (i = 0; i < n_pkts; i++)
2128                         result += rte_sched_port_enqueue_qwa(port, subports[i],
2129                                                 q[i], q_base[i], pkts[i]);
2130
2131                 return result;
2132         }
2133
2134         /* Feed the first 3 stages of the pipeline (6 packets needed) */
2135         pkt20 = pkts[0];
2136         pkt21 = pkts[1];
2137         rte_prefetch0(pkt20);
2138         rte_prefetch0(pkt21);
2139
2140         pkt10 = pkts[2];
2141         pkt11 = pkts[3];
2142         rte_prefetch0(pkt10);
2143         rte_prefetch0(pkt11);
2144
2145         subport20 = rte_sched_port_subport(port, pkt20);
2146         subport21 = rte_sched_port_subport(port, pkt21);
2147         q20 = rte_sched_port_enqueue_qptrs_prefetch0(subport20,
2148                         pkt20, subport_qmask);
2149         q21 = rte_sched_port_enqueue_qptrs_prefetch0(subport21,
2150                         pkt21, subport_qmask);
2151
2152         pkt00 = pkts[4];
2153         pkt01 = pkts[5];
2154         rte_prefetch0(pkt00);
2155         rte_prefetch0(pkt01);
2156
2157         subport10 = rte_sched_port_subport(port, pkt10);
2158         subport11 = rte_sched_port_subport(port, pkt11);
2159         q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
2160                         pkt10, subport_qmask);
2161         q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
2162                         pkt11, subport_qmask);
2163
2164         q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
2165         q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
2166         rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
2167         rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
2168
2169         /* Run the pipeline */
2170         for (i = 6; i < (n_pkts & (~1)); i += 2) {
2171                 /* Propagate stage inputs */
2172                 pkt30 = pkt20;
2173                 pkt31 = pkt21;
2174                 pkt20 = pkt10;
2175                 pkt21 = pkt11;
2176                 pkt10 = pkt00;
2177                 pkt11 = pkt01;
2178                 q30 = q20;
2179                 q31 = q21;
2180                 q20 = q10;
2181                 q21 = q11;
2182                 subport30 = subport20;
2183                 subport31 = subport21;
2184                 subport20 = subport10;
2185                 subport21 = subport11;
2186                 q30_base = q20_base;
2187                 q31_base = q21_base;
2188
2189                 /* Stage 0: Get packets in */
2190                 pkt00 = pkts[i];
2191                 pkt01 = pkts[i + 1];
2192                 rte_prefetch0(pkt00);
2193                 rte_prefetch0(pkt01);
2194
2195                 /* Stage 1: Prefetch subport and queue structure storing queue pointers */
2196                 subport10 = rte_sched_port_subport(port, pkt10);
2197                 subport11 = rte_sched_port_subport(port, pkt11);
2198                 q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
2199                                 pkt10, subport_qmask);
2200                 q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
2201                                 pkt11, subport_qmask);
2202
2203                 /* Stage 2: Prefetch queue write location */
2204                 q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
2205                 q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
2206                 rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
2207                 rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
2208
2209                 /* Stage 3: Write packet to queue and activate queue */
2210                 r30 = rte_sched_port_enqueue_qwa(port, subport30,
2211                                 q30, q30_base, pkt30);
2212                 r31 = rte_sched_port_enqueue_qwa(port, subport31,
2213                                 q31, q31_base, pkt31);
2214                 result += r30 + r31;
2215         }
2216
2217         /*
2218          * Drain the pipeline (exactly 6 packets).
2219          * Handle the last packet in the case
2220          * of an odd number of input packets.
2221          */
2222         pkt_last = pkts[n_pkts - 1];
2223         rte_prefetch0(pkt_last);
2224
2225         subport00 = rte_sched_port_subport(port, pkt00);
2226         subport01 = rte_sched_port_subport(port, pkt01);
2227         q00 = rte_sched_port_enqueue_qptrs_prefetch0(subport00,
2228                         pkt00, subport_qmask);
2229         q01 = rte_sched_port_enqueue_qptrs_prefetch0(subport01,
2230                         pkt01, subport_qmask);
2231
2232         q10_base = rte_sched_subport_pipe_qbase(subport10, q10);
2233         q11_base = rte_sched_subport_pipe_qbase(subport11, q11);
2234         rte_sched_port_enqueue_qwa_prefetch0(port, subport10, q10, q10_base);
2235         rte_sched_port_enqueue_qwa_prefetch0(port, subport11, q11, q11_base);
2236
2237         r20 = rte_sched_port_enqueue_qwa(port, subport20,
2238                         q20, q20_base, pkt20);
2239         r21 = rte_sched_port_enqueue_qwa(port, subport21,
2240                         q21, q21_base, pkt21);
2241         result += r20 + r21;
2242
2243         subport_last = rte_sched_port_subport(port, pkt_last);
2244         q_last = rte_sched_port_enqueue_qptrs_prefetch0(subport_last,
2245                                 pkt_last, subport_qmask);
2246
2247         q00_base = rte_sched_subport_pipe_qbase(subport00, q00);
2248         q01_base = rte_sched_subport_pipe_qbase(subport01, q01);
2249         rte_sched_port_enqueue_qwa_prefetch0(port, subport00, q00, q00_base);
2250         rte_sched_port_enqueue_qwa_prefetch0(port, subport01, q01, q01_base);
2251
2252         r10 = rte_sched_port_enqueue_qwa(port, subport10, q10,
2253                         q10_base, pkt10);
2254         r11 = rte_sched_port_enqueue_qwa(port, subport11, q11,
2255                         q11_base, pkt11);
2256         result += r10 + r11;
2257
2258         q_last_base = rte_sched_subport_pipe_qbase(subport_last, q_last);
2259         rte_sched_port_enqueue_qwa_prefetch0(port, subport_last,
2260                 q_last, q_last_base);
2261
2262         r00 = rte_sched_port_enqueue_qwa(port, subport00, q00,
2263                         q00_base, pkt00);
2264         r01 = rte_sched_port_enqueue_qwa(port, subport01, q01,
2265                         q01_base, pkt01);
2266         result += r00 + r01;
2267
2268         if (n_pkts & 1) {
2269                 r_last = rte_sched_port_enqueue_qwa(port, subport_last,
2270                                         q_last, q_last_base, pkt_last);
2271                 result += r_last;
2272         }
2273
2274         return result;
2275 }
2276
2277 #ifndef RTE_SCHED_SUBPORT_TC_OV
2278
2279 static inline void
2280 grinder_credits_update(struct rte_sched_port *port,
2281         struct rte_sched_subport *subport, uint32_t pos)
2282 {
2283         struct rte_sched_grinder *grinder = subport->grinder + pos;
2284         struct rte_sched_pipe *pipe = grinder->pipe;
2285         struct rte_sched_pipe_profile *params = grinder->pipe_params;
2286         struct rte_sched_subport_profile *sp = grinder->subport_params;
2287         uint64_t n_periods;
2288         uint32_t i;
2289
2290         /* Subport TB */
2291         n_periods = (port->time - subport->tb_time) / sp->tb_period;
2292         subport->tb_credits += n_periods * sp->tb_credits_per_period;
2293         subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size);
2294         subport->tb_time += n_periods * sp->tb_period;
2295
2296         /* Pipe TB */
2297         n_periods = (port->time - pipe->tb_time) / params->tb_period;
2298         pipe->tb_credits += n_periods * params->tb_credits_per_period;
2299         pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
2300         pipe->tb_time += n_periods * params->tb_period;
2301
2302         /* Subport TCs */
2303         if (unlikely(port->time >= subport->tc_time)) {
2304                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2305                         subport->tc_credits[i] = sp->tc_credits_per_period[i];
2306
2307                 subport->tc_time = port->time + sp->tc_period;
2308         }
2309
2310         /* Pipe TCs */
2311         if (unlikely(port->time >= pipe->tc_time)) {
2312                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2313                         pipe->tc_credits[i] = params->tc_credits_per_period[i];
2314
2315                 pipe->tc_time = port->time + params->tc_period;
2316         }
2317 }
2318
2319 #else
2320
2321 static inline uint64_t
2322 grinder_tc_ov_credits_update(struct rte_sched_port *port,
2323         struct rte_sched_subport *subport, uint32_t pos)
2324 {
2325         struct rte_sched_grinder *grinder = subport->grinder + pos;
2326         struct rte_sched_subport_profile *sp = grinder->subport_params;
2327         uint64_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2328         uint64_t tc_consumption = 0, tc_ov_consumption_max;
2329         uint64_t tc_ov_wm = subport->tc_ov_wm;
2330         uint32_t i;
2331
2332         if (subport->tc_ov == 0)
2333                 return subport->tc_ov_wm_max;
2334
2335         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2336                 tc_ov_consumption[i] = sp->tc_credits_per_period[i]
2337                                         -  subport->tc_credits[i];
2338                 tc_consumption += tc_ov_consumption[i];
2339         }
2340
2341         tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] =
2342         sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2343                 subport->tc_credits[RTE_SCHED_TRAFFIC_CLASS_BE];
2344
2345         tc_ov_consumption_max =
2346         sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2347                         tc_consumption;
2348
2349         if (tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] >
2350                 (tc_ov_consumption_max - port->mtu)) {
2351                 tc_ov_wm  -= tc_ov_wm >> 7;
2352                 if (tc_ov_wm < subport->tc_ov_wm_min)
2353                         tc_ov_wm = subport->tc_ov_wm_min;
2354
2355                 return tc_ov_wm;
2356         }
2357
2358         tc_ov_wm += (tc_ov_wm >> 7) + 1;
2359         if (tc_ov_wm > subport->tc_ov_wm_max)
2360                 tc_ov_wm = subport->tc_ov_wm_max;
2361
2362         return tc_ov_wm;
2363 }
2364
2365 static inline void
2366 grinder_credits_update(struct rte_sched_port *port,
2367         struct rte_sched_subport *subport, uint32_t pos)
2368 {
2369         struct rte_sched_grinder *grinder = subport->grinder + pos;
2370         struct rte_sched_pipe *pipe = grinder->pipe;
2371         struct rte_sched_pipe_profile *params = grinder->pipe_params;
2372         struct rte_sched_subport_profile *sp = grinder->subport_params;
2373         uint64_t n_periods;
2374         uint32_t i;
2375
2376         /* Subport TB */
2377         n_periods = (port->time - subport->tb_time) / sp->tb_period;
2378         subport->tb_credits += n_periods * sp->tb_credits_per_period;
2379         subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size);
2380         subport->tb_time += n_periods * sp->tb_period;
2381
2382         /* Pipe TB */
2383         n_periods = (port->time - pipe->tb_time) / params->tb_period;
2384         pipe->tb_credits += n_periods * params->tb_credits_per_period;
2385         pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
2386         pipe->tb_time += n_periods * params->tb_period;
2387
2388         /* Subport TCs */
2389         if (unlikely(port->time >= subport->tc_time)) {
2390                 subport->tc_ov_wm =
2391                         grinder_tc_ov_credits_update(port, subport, pos);
2392
2393                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2394                         subport->tc_credits[i] = sp->tc_credits_per_period[i];
2395
2396                 subport->tc_time = port->time + sp->tc_period;
2397                 subport->tc_ov_period_id++;
2398         }
2399
2400         /* Pipe TCs */
2401         if (unlikely(port->time >= pipe->tc_time)) {
2402                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2403                         pipe->tc_credits[i] = params->tc_credits_per_period[i];
2404                 pipe->tc_time = port->time + params->tc_period;
2405         }
2406
2407         /* Pipe TCs - Oversubscription */
2408         if (unlikely(pipe->tc_ov_period_id != subport->tc_ov_period_id)) {
2409                 pipe->tc_ov_credits = subport->tc_ov_wm * params->tc_ov_weight;
2410
2411                 pipe->tc_ov_period_id = subport->tc_ov_period_id;
2412         }
2413 }
2414
2415 #endif /* RTE_SCHED_TS_CREDITS_UPDATE, RTE_SCHED_SUBPORT_TC_OV */
2416
2417
2418 #ifndef RTE_SCHED_SUBPORT_TC_OV
2419
2420 static inline int
2421 grinder_credits_check(struct rte_sched_port *port,
2422         struct rte_sched_subport *subport, uint32_t pos)
2423 {
2424         struct rte_sched_grinder *grinder = subport->grinder + pos;
2425         struct rte_sched_pipe *pipe = grinder->pipe;
2426         struct rte_mbuf *pkt = grinder->pkt;
2427         uint32_t tc_index = grinder->tc_index;
2428         uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
2429         uint64_t subport_tb_credits = subport->tb_credits;
2430         uint64_t subport_tc_credits = subport->tc_credits[tc_index];
2431         uint64_t pipe_tb_credits = pipe->tb_credits;
2432         uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
2433         int enough_credits;
2434
2435         /* Check queue credits */
2436         enough_credits = (pkt_len <= subport_tb_credits) &&
2437                 (pkt_len <= subport_tc_credits) &&
2438                 (pkt_len <= pipe_tb_credits) &&
2439                 (pkt_len <= pipe_tc_credits);
2440
2441         if (!enough_credits)
2442                 return 0;
2443
2444         /* Update port credits */
2445         subport->tb_credits -= pkt_len;
2446         subport->tc_credits[tc_index] -= pkt_len;
2447         pipe->tb_credits -= pkt_len;
2448         pipe->tc_credits[tc_index] -= pkt_len;
2449
2450         return 1;
2451 }
2452
2453 #else
2454
2455 static inline int
2456 grinder_credits_check(struct rte_sched_port *port,
2457         struct rte_sched_subport *subport, uint32_t pos)
2458 {
2459         struct rte_sched_grinder *grinder = subport->grinder + pos;
2460         struct rte_sched_pipe *pipe = grinder->pipe;
2461         struct rte_mbuf *pkt = grinder->pkt;
2462         uint32_t tc_index = grinder->tc_index;
2463         uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
2464         uint64_t subport_tb_credits = subport->tb_credits;
2465         uint64_t subport_tc_credits = subport->tc_credits[tc_index];
2466         uint64_t pipe_tb_credits = pipe->tb_credits;
2467         uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
2468         uint64_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2469         uint64_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0};
2470         uint64_t pipe_tc_ov_credits;
2471         uint32_t i;
2472         int enough_credits;
2473
2474         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2475                 pipe_tc_ov_mask1[i] = ~0LLU;
2476
2477         pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe->tc_ov_credits;
2478         pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = ~0LLU;
2479         pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
2480
2481         /* Check pipe and subport credits */
2482         enough_credits = (pkt_len <= subport_tb_credits) &&
2483                 (pkt_len <= subport_tc_credits) &&
2484                 (pkt_len <= pipe_tb_credits) &&
2485                 (pkt_len <= pipe_tc_credits) &&
2486                 (pkt_len <= pipe_tc_ov_credits);
2487
2488         if (!enough_credits)
2489                 return 0;
2490
2491         /* Update pipe and subport credits */
2492         subport->tb_credits -= pkt_len;
2493         subport->tc_credits[tc_index] -= pkt_len;
2494         pipe->tb_credits -= pkt_len;
2495         pipe->tc_credits[tc_index] -= pkt_len;
2496         pipe->tc_ov_credits -= pipe_tc_ov_mask2[tc_index] & pkt_len;
2497
2498         return 1;
2499 }
2500
2501 #endif /* RTE_SCHED_SUBPORT_TC_OV */
2502
2503
2504 static inline int
2505 grinder_schedule(struct rte_sched_port *port,
2506         struct rte_sched_subport *subport, uint32_t pos)
2507 {
2508         struct rte_sched_grinder *grinder = subport->grinder + pos;
2509         struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
2510         uint32_t qindex = grinder->qindex[grinder->qpos];
2511         struct rte_mbuf *pkt = grinder->pkt;
2512         uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
2513         uint32_t be_tc_active;
2514
2515         if (!grinder_credits_check(port, subport, pos))
2516                 return 0;
2517
2518         /* Advance port time */
2519         port->time += pkt_len;
2520
2521         /* Send packet */
2522         port->pkts_out[port->n_pkts_out++] = pkt;
2523         queue->qr++;
2524
2525         be_tc_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE) ? ~0x0 : 0x0;
2526         grinder->wrr_tokens[grinder->qpos] +=
2527                 (pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
2528
2529         if (queue->qr == queue->qw) {
2530                 rte_bitmap_clear(subport->bmp, qindex);
2531                 grinder->qmask &= ~(1 << grinder->qpos);
2532                 if (be_tc_active)
2533                         grinder->wrr_mask[grinder->qpos] = 0;
2534
2535                 rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
2536         }
2537
2538         rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
2539
2540         /* Reset pipe loop detection */
2541         subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
2542         grinder->productive = 1;
2543
2544         return 1;
2545 }
2546
2547 #ifdef SCHED_VECTOR_SSE4
2548
2549 static inline int
2550 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2551 {
2552         __m128i index = _mm_set1_epi32(base_pipe);
2553         __m128i pipes = _mm_load_si128((__m128i *)subport->grinder_base_bmp_pos);
2554         __m128i res = _mm_cmpeq_epi32(pipes, index);
2555
2556         pipes = _mm_load_si128((__m128i *)(subport->grinder_base_bmp_pos + 4));
2557         pipes = _mm_cmpeq_epi32(pipes, index);
2558         res = _mm_or_si128(res, pipes);
2559
2560         if (_mm_testz_si128(res, res))
2561                 return 0;
2562
2563         return 1;
2564 }
2565
2566 #elif defined(SCHED_VECTOR_NEON)
2567
2568 static inline int
2569 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2570 {
2571         uint32x4_t index, pipes;
2572         uint32_t *pos = (uint32_t *)subport->grinder_base_bmp_pos;
2573
2574         index = vmovq_n_u32(base_pipe);
2575         pipes = vld1q_u32(pos);
2576         if (!vminvq_u32(veorq_u32(pipes, index)))
2577                 return 1;
2578
2579         pipes = vld1q_u32(pos + 4);
2580         if (!vminvq_u32(veorq_u32(pipes, index)))
2581                 return 1;
2582
2583         return 0;
2584 }
2585
2586 #else
2587
2588 static inline int
2589 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2590 {
2591         uint32_t i;
2592
2593         for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) {
2594                 if (subport->grinder_base_bmp_pos[i] == base_pipe)
2595                         return 1;
2596         }
2597
2598         return 0;
2599 }
2600
2601 #endif /* RTE_SCHED_OPTIMIZATIONS */
2602
2603 static inline void
2604 grinder_pcache_populate(struct rte_sched_subport *subport,
2605         uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab)
2606 {
2607         struct rte_sched_grinder *grinder = subport->grinder + pos;
2608         uint16_t w[4];
2609
2610         grinder->pcache_w = 0;
2611         grinder->pcache_r = 0;
2612
2613         w[0] = (uint16_t) bmp_slab;
2614         w[1] = (uint16_t) (bmp_slab >> 16);
2615         w[2] = (uint16_t) (bmp_slab >> 32);
2616         w[3] = (uint16_t) (bmp_slab >> 48);
2617
2618         grinder->pcache_qmask[grinder->pcache_w] = w[0];
2619         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos;
2620         grinder->pcache_w += (w[0] != 0);
2621
2622         grinder->pcache_qmask[grinder->pcache_w] = w[1];
2623         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 16;
2624         grinder->pcache_w += (w[1] != 0);
2625
2626         grinder->pcache_qmask[grinder->pcache_w] = w[2];
2627         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 32;
2628         grinder->pcache_w += (w[2] != 0);
2629
2630         grinder->pcache_qmask[grinder->pcache_w] = w[3];
2631         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 48;
2632         grinder->pcache_w += (w[3] != 0);
2633 }
2634
2635 static inline void
2636 grinder_tccache_populate(struct rte_sched_subport *subport,
2637         uint32_t pos, uint32_t qindex, uint16_t qmask)
2638 {
2639         struct rte_sched_grinder *grinder = subport->grinder + pos;
2640         uint8_t b, i;
2641
2642         grinder->tccache_w = 0;
2643         grinder->tccache_r = 0;
2644
2645         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2646                 b = (uint8_t) ((qmask >> i) & 0x1);
2647                 grinder->tccache_qmask[grinder->tccache_w] = b;
2648                 grinder->tccache_qindex[grinder->tccache_w] = qindex + i;
2649                 grinder->tccache_w += (b != 0);
2650         }
2651
2652         b = (uint8_t) (qmask >> (RTE_SCHED_TRAFFIC_CLASS_BE));
2653         grinder->tccache_qmask[grinder->tccache_w] = b;
2654         grinder->tccache_qindex[grinder->tccache_w] = qindex +
2655                 RTE_SCHED_TRAFFIC_CLASS_BE;
2656         grinder->tccache_w += (b != 0);
2657 }
2658
2659 static inline int
2660 grinder_next_tc(struct rte_sched_port *port,
2661         struct rte_sched_subport *subport, uint32_t pos)
2662 {
2663         struct rte_sched_grinder *grinder = subport->grinder + pos;
2664         struct rte_mbuf **qbase;
2665         uint32_t qindex;
2666         uint16_t qsize;
2667
2668         if (grinder->tccache_r == grinder->tccache_w)
2669                 return 0;
2670
2671         qindex = grinder->tccache_qindex[grinder->tccache_r];
2672         qbase = rte_sched_subport_pipe_qbase(subport, qindex);
2673         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
2674
2675         grinder->tc_index = rte_sched_port_pipe_tc(port, qindex);
2676         grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
2677         grinder->qsize = qsize;
2678
2679         if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2680                 grinder->queue[0] = subport->queue + qindex;
2681                 grinder->qbase[0] = qbase;
2682                 grinder->qindex[0] = qindex;
2683                 grinder->tccache_r++;
2684
2685                 return 1;
2686         }
2687
2688         grinder->queue[0] = subport->queue + qindex;
2689         grinder->queue[1] = subport->queue + qindex + 1;
2690         grinder->queue[2] = subport->queue + qindex + 2;
2691         grinder->queue[3] = subport->queue + qindex + 3;
2692
2693         grinder->qbase[0] = qbase;
2694         grinder->qbase[1] = qbase + qsize;
2695         grinder->qbase[2] = qbase + 2 * qsize;
2696         grinder->qbase[3] = qbase + 3 * qsize;
2697
2698         grinder->qindex[0] = qindex;
2699         grinder->qindex[1] = qindex + 1;
2700         grinder->qindex[2] = qindex + 2;
2701         grinder->qindex[3] = qindex + 3;
2702
2703         grinder->tccache_r++;
2704         return 1;
2705 }
2706
2707 static inline int
2708 grinder_next_pipe(struct rte_sched_port *port,
2709         struct rte_sched_subport *subport, uint32_t pos)
2710 {
2711         struct rte_sched_grinder *grinder = subport->grinder + pos;
2712         uint32_t pipe_qindex;
2713         uint16_t pipe_qmask;
2714
2715         if (grinder->pcache_r < grinder->pcache_w) {
2716                 pipe_qmask = grinder->pcache_qmask[grinder->pcache_r];
2717                 pipe_qindex = grinder->pcache_qindex[grinder->pcache_r];
2718                 grinder->pcache_r++;
2719         } else {
2720                 uint64_t bmp_slab = 0;
2721                 uint32_t bmp_pos = 0;
2722
2723                 /* Get another non-empty pipe group */
2724                 if (unlikely(rte_bitmap_scan(subport->bmp, &bmp_pos, &bmp_slab) <= 0))
2725                         return 0;
2726
2727 #ifdef RTE_SCHED_DEBUG
2728                 debug_check_queue_slab(subport, bmp_pos, bmp_slab);
2729 #endif
2730
2731                 /* Return if pipe group already in one of the other grinders */
2732                 subport->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID;
2733                 if (unlikely(grinder_pipe_exists(subport, bmp_pos)))
2734                         return 0;
2735
2736                 subport->grinder_base_bmp_pos[pos] = bmp_pos;
2737
2738                 /* Install new pipe group into grinder's pipe cache */
2739                 grinder_pcache_populate(subport, pos, bmp_pos, bmp_slab);
2740
2741                 pipe_qmask = grinder->pcache_qmask[0];
2742                 pipe_qindex = grinder->pcache_qindex[0];
2743                 grinder->pcache_r = 1;
2744         }
2745
2746         /* Install new pipe in the grinder */
2747         grinder->pindex = pipe_qindex >> 4;
2748         grinder->subport = subport;
2749         grinder->pipe = subport->pipe + grinder->pindex;
2750         grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */
2751         grinder->productive = 0;
2752
2753         grinder_tccache_populate(subport, pos, pipe_qindex, pipe_qmask);
2754         grinder_next_tc(port, subport, pos);
2755
2756         /* Check for pipe exhaustion */
2757         if (grinder->pindex == subport->pipe_loop) {
2758                 subport->pipe_exhaustion = 1;
2759                 subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
2760         }
2761
2762         return 1;
2763 }
2764
2765
2766 static inline void
2767 grinder_wrr_load(struct rte_sched_subport *subport, uint32_t pos)
2768 {
2769         struct rte_sched_grinder *grinder = subport->grinder + pos;
2770         struct rte_sched_pipe *pipe = grinder->pipe;
2771         struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params;
2772         uint32_t qmask = grinder->qmask;
2773
2774         grinder->wrr_tokens[0] =
2775                 ((uint16_t) pipe->wrr_tokens[0]) << RTE_SCHED_WRR_SHIFT;
2776         grinder->wrr_tokens[1] =
2777                 ((uint16_t) pipe->wrr_tokens[1]) << RTE_SCHED_WRR_SHIFT;
2778         grinder->wrr_tokens[2] =
2779                 ((uint16_t) pipe->wrr_tokens[2]) << RTE_SCHED_WRR_SHIFT;
2780         grinder->wrr_tokens[3] =
2781                 ((uint16_t) pipe->wrr_tokens[3]) << RTE_SCHED_WRR_SHIFT;
2782
2783         grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
2784         grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
2785         grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
2786         grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
2787
2788         grinder->wrr_cost[0] = pipe_params->wrr_cost[0];
2789         grinder->wrr_cost[1] = pipe_params->wrr_cost[1];
2790         grinder->wrr_cost[2] = pipe_params->wrr_cost[2];
2791         grinder->wrr_cost[3] = pipe_params->wrr_cost[3];
2792 }
2793
2794 static inline void
2795 grinder_wrr_store(struct rte_sched_subport *subport, uint32_t pos)
2796 {
2797         struct rte_sched_grinder *grinder = subport->grinder + pos;
2798         struct rte_sched_pipe *pipe = grinder->pipe;
2799
2800         pipe->wrr_tokens[0] =
2801                         (grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >>
2802                                 RTE_SCHED_WRR_SHIFT;
2803         pipe->wrr_tokens[1] =
2804                         (grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >>
2805                                 RTE_SCHED_WRR_SHIFT;
2806         pipe->wrr_tokens[2] =
2807                         (grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >>
2808                                 RTE_SCHED_WRR_SHIFT;
2809         pipe->wrr_tokens[3] =
2810                         (grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >>
2811                                 RTE_SCHED_WRR_SHIFT;
2812 }
2813
2814 static inline void
2815 grinder_wrr(struct rte_sched_subport *subport, uint32_t pos)
2816 {
2817         struct rte_sched_grinder *grinder = subport->grinder + pos;
2818         uint16_t wrr_tokens_min;
2819
2820         grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0];
2821         grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1];
2822         grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2];
2823         grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3];
2824
2825         grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens);
2826         wrr_tokens_min = grinder->wrr_tokens[grinder->qpos];
2827
2828         grinder->wrr_tokens[0] -= wrr_tokens_min;
2829         grinder->wrr_tokens[1] -= wrr_tokens_min;
2830         grinder->wrr_tokens[2] -= wrr_tokens_min;
2831         grinder->wrr_tokens[3] -= wrr_tokens_min;
2832 }
2833
2834
2835 #define grinder_evict(subport, pos)
2836
2837 static inline void
2838 grinder_prefetch_pipe(struct rte_sched_subport *subport, uint32_t pos)
2839 {
2840         struct rte_sched_grinder *grinder = subport->grinder + pos;
2841
2842         rte_prefetch0(grinder->pipe);
2843         rte_prefetch0(grinder->queue[0]);
2844 }
2845
2846 static inline void
2847 grinder_prefetch_tc_queue_arrays(struct rte_sched_subport *subport, uint32_t pos)
2848 {
2849         struct rte_sched_grinder *grinder = subport->grinder + pos;
2850         uint16_t qsize, qr[RTE_SCHED_MAX_QUEUES_PER_TC];
2851
2852         qsize = grinder->qsize;
2853         grinder->qpos = 0;
2854
2855         if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2856                 qr[0] = grinder->queue[0]->qr & (qsize - 1);
2857
2858                 rte_prefetch0(grinder->qbase[0] + qr[0]);
2859                 return;
2860         }
2861
2862         qr[0] = grinder->queue[0]->qr & (qsize - 1);
2863         qr[1] = grinder->queue[1]->qr & (qsize - 1);
2864         qr[2] = grinder->queue[2]->qr & (qsize - 1);
2865         qr[3] = grinder->queue[3]->qr & (qsize - 1);
2866
2867         rte_prefetch0(grinder->qbase[0] + qr[0]);
2868         rte_prefetch0(grinder->qbase[1] + qr[1]);
2869
2870         grinder_wrr_load(subport, pos);
2871         grinder_wrr(subport, pos);
2872
2873         rte_prefetch0(grinder->qbase[2] + qr[2]);
2874         rte_prefetch0(grinder->qbase[3] + qr[3]);
2875 }
2876
2877 static inline void
2878 grinder_prefetch_mbuf(struct rte_sched_subport *subport, uint32_t pos)
2879 {
2880         struct rte_sched_grinder *grinder = subport->grinder + pos;
2881         uint32_t qpos = grinder->qpos;
2882         struct rte_mbuf **qbase = grinder->qbase[qpos];
2883         uint16_t qsize = grinder->qsize;
2884         uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1);
2885
2886         grinder->pkt = qbase[qr];
2887         rte_prefetch0(grinder->pkt);
2888
2889         if (unlikely((qr & 0x7) == 7)) {
2890                 uint16_t qr_next = (grinder->queue[qpos]->qr + 1) & (qsize - 1);
2891
2892                 rte_prefetch0(qbase + qr_next);
2893         }
2894 }
2895
2896 static inline uint32_t
2897 grinder_handle(struct rte_sched_port *port,
2898         struct rte_sched_subport *subport, uint32_t pos)
2899 {
2900         struct rte_sched_grinder *grinder = subport->grinder + pos;
2901
2902         switch (grinder->state) {
2903         case e_GRINDER_PREFETCH_PIPE:
2904         {
2905                 if (grinder_next_pipe(port, subport, pos)) {
2906                         grinder_prefetch_pipe(subport, pos);
2907                         subport->busy_grinders++;
2908
2909                         grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2910                         return 0;
2911                 }
2912
2913                 return 0;
2914         }
2915
2916         case e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS:
2917         {
2918                 struct rte_sched_pipe *pipe = grinder->pipe;
2919
2920                 grinder->pipe_params = subport->pipe_profiles + pipe->profile;
2921                 grinder->subport_params = port->subport_profiles +
2922                                                 subport->profile;
2923
2924                 grinder_prefetch_tc_queue_arrays(subport, pos);
2925                 grinder_credits_update(port, subport, pos);
2926
2927                 grinder->state = e_GRINDER_PREFETCH_MBUF;
2928                 return 0;
2929         }
2930
2931         case e_GRINDER_PREFETCH_MBUF:
2932         {
2933                 grinder_prefetch_mbuf(subport, pos);
2934
2935                 grinder->state = e_GRINDER_READ_MBUF;
2936                 return 0;
2937         }
2938
2939         case e_GRINDER_READ_MBUF:
2940         {
2941                 uint32_t wrr_active, result = 0;
2942
2943                 result = grinder_schedule(port, subport, pos);
2944
2945                 wrr_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE);
2946
2947                 /* Look for next packet within the same TC */
2948                 if (result && grinder->qmask) {
2949                         if (wrr_active)
2950                                 grinder_wrr(subport, pos);
2951
2952                         grinder_prefetch_mbuf(subport, pos);
2953
2954                         return 1;
2955                 }
2956
2957                 if (wrr_active)
2958                         grinder_wrr_store(subport, pos);
2959
2960                 /* Look for another active TC within same pipe */
2961                 if (grinder_next_tc(port, subport, pos)) {
2962                         grinder_prefetch_tc_queue_arrays(subport, pos);
2963
2964                         grinder->state = e_GRINDER_PREFETCH_MBUF;
2965                         return result;
2966                 }
2967
2968                 if (grinder->productive == 0 &&
2969                     subport->pipe_loop == RTE_SCHED_PIPE_INVALID)
2970                         subport->pipe_loop = grinder->pindex;
2971
2972                 grinder_evict(subport, pos);
2973
2974                 /* Look for another active pipe */
2975                 if (grinder_next_pipe(port, subport, pos)) {
2976                         grinder_prefetch_pipe(subport, pos);
2977
2978                         grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2979                         return result;
2980                 }
2981
2982                 /* No active pipe found */
2983                 subport->busy_grinders--;
2984
2985                 grinder->state = e_GRINDER_PREFETCH_PIPE;
2986                 return result;
2987         }
2988
2989         default:
2990                 rte_panic("Algorithmic error (invalid state)\n");
2991                 return 0;
2992         }
2993 }
2994
2995 static inline void
2996 rte_sched_port_time_resync(struct rte_sched_port *port)
2997 {
2998         uint64_t cycles = rte_get_tsc_cycles();
2999         uint64_t cycles_diff;
3000         uint64_t bytes_diff;
3001         uint32_t i;
3002
3003         if (cycles < port->time_cpu_cycles)
3004                 port->time_cpu_cycles = 0;
3005
3006         cycles_diff = cycles - port->time_cpu_cycles;
3007         /* Compute elapsed time in bytes */
3008         bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT,
3009                                            port->inv_cycles_per_byte);
3010
3011         /* Advance port time */
3012         port->time_cpu_cycles +=
3013                 (bytes_diff * port->cycles_per_byte) >> RTE_SCHED_TIME_SHIFT;
3014         port->time_cpu_bytes += bytes_diff;
3015         if (port->time < port->time_cpu_bytes)
3016                 port->time = port->time_cpu_bytes;
3017
3018         /* Reset pipe loop detection */
3019         for (i = 0; i < port->n_subports_per_port; i++)
3020                 port->subports[i]->pipe_loop = RTE_SCHED_PIPE_INVALID;
3021 }
3022
3023 static inline int
3024 rte_sched_port_exceptions(struct rte_sched_subport *subport, int second_pass)
3025 {
3026         int exceptions;
3027
3028         /* Check if any exception flag is set */
3029         exceptions = (second_pass && subport->busy_grinders == 0) ||
3030                 (subport->pipe_exhaustion == 1);
3031
3032         /* Clear exception flags */
3033         subport->pipe_exhaustion = 0;
3034
3035         return exceptions;
3036 }
3037
3038 int
3039 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
3040 {
3041         struct rte_sched_subport *subport;
3042         uint32_t subport_id = port->subport_id;
3043         uint32_t i, n_subports = 0, count;
3044
3045         port->pkts_out = pkts;
3046         port->n_pkts_out = 0;
3047
3048         rte_sched_port_time_resync(port);
3049
3050         /* Take each queue in the grinder one step further */
3051         for (i = 0, count = 0; ; i++)  {
3052                 subport = port->subports[subport_id];
3053
3054                 count += grinder_handle(port, subport,
3055                                 i & (RTE_SCHED_PORT_N_GRINDERS - 1));
3056
3057                 if (count == n_pkts) {
3058                         subport_id++;
3059
3060                         if (subport_id == port->n_subports_per_port)
3061                                 subport_id = 0;
3062
3063                         port->subport_id = subport_id;
3064                         break;
3065                 }
3066
3067                 if (rte_sched_port_exceptions(subport, i >= RTE_SCHED_PORT_N_GRINDERS)) {
3068                         i = 0;
3069                         subport_id++;
3070                         n_subports++;
3071                 }
3072
3073                 if (subport_id == port->n_subports_per_port)
3074                         subport_id = 0;
3075
3076                 if (n_subports == port->n_subports_per_port) {
3077                         port->subport_id = subport_id;
3078                         break;
3079                 }
3080         }
3081
3082         return count;
3083 }