crypto/cnxk: use rlen from CPT result with lookaside
[dpdk.git] / lib / sched / rte_sched.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <string.h>
7
8 #include <rte_common.h>
9 #include <rte_log.h>
10 #include <rte_memory.h>
11 #include <rte_malloc.h>
12 #include <rte_cycles.h>
13 #include <rte_prefetch.h>
14 #include <rte_branch_prediction.h>
15 #include <rte_mbuf.h>
16 #include <rte_bitmap.h>
17 #include <rte_reciprocal.h>
18
19 #include "rte_sched.h"
20 #include "rte_sched_common.h"
21 #include "rte_approx.h"
22
23 #ifdef __INTEL_COMPILER
24 #pragma warning(disable:2259) /* conversion may lose significant bits */
25 #endif
26
27 #ifdef RTE_SCHED_VECTOR
28 #include <rte_vect.h>
29
30 #ifdef RTE_ARCH_X86
31 #define SCHED_VECTOR_SSE4
32 #elif defined(__ARM_NEON)
33 #define SCHED_VECTOR_NEON
34 #endif
35
36 #endif
37
38 #define RTE_SCHED_TB_RATE_CONFIG_ERR          (1e-7)
39 #define RTE_SCHED_WRR_SHIFT                   3
40 #define RTE_SCHED_MAX_QUEUES_PER_TC           RTE_SCHED_BE_QUEUES_PER_PIPE
41 #define RTE_SCHED_GRINDER_PCACHE_SIZE         (64 / RTE_SCHED_QUEUES_PER_PIPE)
42 #define RTE_SCHED_PIPE_INVALID                UINT32_MAX
43 #define RTE_SCHED_BMP_POS_INVALID             UINT32_MAX
44
45 /* Scaling for cycles_per_byte calculation
46  * Chosen so that minimum rate is 480 bit/sec
47  */
48 #define RTE_SCHED_TIME_SHIFT                  8
49
50 struct rte_sched_pipe_profile {
51         /* Token bucket (TB) */
52         uint64_t tb_period;
53         uint64_t tb_credits_per_period;
54         uint64_t tb_size;
55
56         /* Pipe traffic classes */
57         uint64_t tc_period;
58         uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
59         uint8_t tc_ov_weight;
60
61         /* Pipe best-effort traffic class queues */
62         uint8_t  wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
63 };
64
65 struct rte_sched_pipe {
66         /* Token bucket (TB) */
67         uint64_t tb_time; /* time of last update */
68         uint64_t tb_credits;
69
70         /* Pipe profile and flags */
71         uint32_t profile;
72
73         /* Traffic classes (TCs) */
74         uint64_t tc_time; /* time of next update */
75         uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
76
77         /* Weighted Round Robin (WRR) */
78         uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
79
80         /* TC oversubscription */
81         uint64_t tc_ov_credits;
82         uint8_t tc_ov_period_id;
83 } __rte_cache_aligned;
84
85 struct rte_sched_queue {
86         uint16_t qw;
87         uint16_t qr;
88 };
89
90 struct rte_sched_queue_extra {
91         struct rte_sched_queue_stats stats;
92 #ifdef RTE_SCHED_RED
93         struct rte_red red;
94 #endif
95 };
96
97 enum grinder_state {
98         e_GRINDER_PREFETCH_PIPE = 0,
99         e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS,
100         e_GRINDER_PREFETCH_MBUF,
101         e_GRINDER_READ_MBUF
102 };
103
104 struct rte_sched_subport_profile {
105         /* Token bucket (TB) */
106         uint64_t tb_period;
107         uint64_t tb_credits_per_period;
108         uint64_t tb_size;
109
110         uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
111         uint64_t tc_period;
112 };
113
114 struct rte_sched_grinder {
115         /* Pipe cache */
116         uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE];
117         uint32_t pcache_qindex[RTE_SCHED_GRINDER_PCACHE_SIZE];
118         uint32_t pcache_w;
119         uint32_t pcache_r;
120
121         /* Current pipe */
122         enum grinder_state state;
123         uint32_t productive;
124         uint32_t pindex;
125         struct rte_sched_subport *subport;
126         struct rte_sched_subport_profile *subport_params;
127         struct rte_sched_pipe *pipe;
128         struct rte_sched_pipe_profile *pipe_params;
129
130         /* TC cache */
131         uint8_t tccache_qmask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
132         uint32_t tccache_qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
133         uint32_t tccache_w;
134         uint32_t tccache_r;
135
136         /* Current TC */
137         uint32_t tc_index;
138         struct rte_sched_queue *queue[RTE_SCHED_MAX_QUEUES_PER_TC];
139         struct rte_mbuf **qbase[RTE_SCHED_MAX_QUEUES_PER_TC];
140         uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC];
141         uint16_t qsize;
142         uint32_t qmask;
143         uint32_t qpos;
144         struct rte_mbuf *pkt;
145
146         /* WRR */
147         uint16_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
148         uint16_t wrr_mask[RTE_SCHED_BE_QUEUES_PER_PIPE];
149         uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
150 };
151
152 struct rte_sched_subport {
153         /* Token bucket (TB) */
154         uint64_t tb_time; /* time of last update */
155         uint64_t tb_credits;
156
157         /* Traffic classes (TCs) */
158         uint64_t tc_time; /* time of next update */
159         uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
160
161         /* TC oversubscription */
162         uint64_t tc_ov_wm;
163         uint64_t tc_ov_wm_min;
164         uint64_t tc_ov_wm_max;
165         uint8_t tc_ov_period_id;
166         uint8_t tc_ov;
167         uint32_t tc_ov_n;
168         double tc_ov_rate;
169
170         /* Statistics */
171         struct rte_sched_subport_stats stats __rte_cache_aligned;
172
173         /* subport profile */
174         uint32_t profile;
175         /* Subport pipes */
176         uint32_t n_pipes_per_subport_enabled;
177         uint32_t n_pipe_profiles;
178         uint32_t n_max_pipe_profiles;
179
180         /* Pipe best-effort TC rate */
181         uint64_t pipe_tc_be_rate_max;
182
183         /* Pipe queues size */
184         uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
185
186 #ifdef RTE_SCHED_RED
187         struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
188 #endif
189
190         /* Scheduling loop detection */
191         uint32_t pipe_loop;
192         uint32_t pipe_exhaustion;
193
194         /* Bitmap */
195         struct rte_bitmap *bmp;
196         uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16;
197
198         /* Grinders */
199         struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS];
200         uint32_t busy_grinders;
201
202         /* Queue base calculation */
203         uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE];
204         uint32_t qsize_sum;
205
206         struct rte_sched_pipe *pipe;
207         struct rte_sched_queue *queue;
208         struct rte_sched_queue_extra *queue_extra;
209         struct rte_sched_pipe_profile *pipe_profiles;
210         uint8_t *bmp_array;
211         struct rte_mbuf **queue_array;
212         uint8_t memory[0] __rte_cache_aligned;
213 } __rte_cache_aligned;
214
215 struct rte_sched_port {
216         /* User parameters */
217         uint32_t n_subports_per_port;
218         uint32_t n_pipes_per_subport;
219         uint32_t n_pipes_per_subport_log2;
220         uint16_t pipe_queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
221         uint8_t pipe_tc[RTE_SCHED_QUEUES_PER_PIPE];
222         uint8_t tc_queue[RTE_SCHED_QUEUES_PER_PIPE];
223         uint32_t n_subport_profiles;
224         uint32_t n_max_subport_profiles;
225         uint64_t rate;
226         uint32_t mtu;
227         uint32_t frame_overhead;
228         int socket;
229
230         /* Timing */
231         uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cyles */
232         uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
233         uint64_t time;                /* Current NIC TX time measured in bytes */
234         struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
235         uint64_t cycles_per_byte;
236
237         /* Grinders */
238         struct rte_mbuf **pkts_out;
239         uint32_t n_pkts_out;
240         uint32_t subport_id;
241
242         /* Large data structures */
243         struct rte_sched_subport_profile *subport_profiles;
244         struct rte_sched_subport *subports[0] __rte_cache_aligned;
245 } __rte_cache_aligned;
246
247 enum rte_sched_subport_array {
248         e_RTE_SCHED_SUBPORT_ARRAY_PIPE = 0,
249         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE,
250         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA,
251         e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES,
252         e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY,
253         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY,
254         e_RTE_SCHED_SUBPORT_ARRAY_TOTAL,
255 };
256
257 static inline uint32_t
258 rte_sched_subport_pipe_queues(struct rte_sched_subport *subport)
259 {
260         return RTE_SCHED_QUEUES_PER_PIPE * subport->n_pipes_per_subport_enabled;
261 }
262
263 static inline struct rte_mbuf **
264 rte_sched_subport_pipe_qbase(struct rte_sched_subport *subport, uint32_t qindex)
265 {
266         uint32_t pindex = qindex >> 4;
267         uint32_t qpos = qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1);
268
269         return (subport->queue_array + pindex *
270                 subport->qsize_sum + subport->qsize_add[qpos]);
271 }
272
273 static inline uint16_t
274 rte_sched_subport_pipe_qsize(struct rte_sched_port *port,
275 struct rte_sched_subport *subport, uint32_t qindex)
276 {
277         uint32_t tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
278
279         return subport->qsize[tc];
280 }
281
282 static inline uint32_t
283 rte_sched_port_queues_per_port(struct rte_sched_port *port)
284 {
285         uint32_t n_queues = 0, i;
286
287         for (i = 0; i < port->n_subports_per_port; i++)
288                 n_queues += rte_sched_subport_pipe_queues(port->subports[i]);
289
290         return n_queues;
291 }
292
293 static inline uint16_t
294 rte_sched_port_pipe_queue(struct rte_sched_port *port, uint32_t traffic_class)
295 {
296         uint16_t pipe_queue = port->pipe_queue[traffic_class];
297
298         return pipe_queue;
299 }
300
301 static inline uint8_t
302 rte_sched_port_pipe_tc(struct rte_sched_port *port, uint32_t qindex)
303 {
304         uint8_t pipe_tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
305
306         return pipe_tc;
307 }
308
309 static inline uint8_t
310 rte_sched_port_tc_queue(struct rte_sched_port *port, uint32_t qindex)
311 {
312         uint8_t tc_queue = port->tc_queue[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
313
314         return tc_queue;
315 }
316
317 static int
318 pipe_profile_check(struct rte_sched_pipe_params *params,
319         uint64_t rate, uint16_t *qsize)
320 {
321         uint32_t i;
322
323         /* Pipe parameters */
324         if (params == NULL) {
325                 RTE_LOG(ERR, SCHED,
326                         "%s: Incorrect value for parameter params\n", __func__);
327                 return -EINVAL;
328         }
329
330         /* TB rate: non-zero, not greater than port rate */
331         if (params->tb_rate == 0 ||
332                 params->tb_rate > rate) {
333                 RTE_LOG(ERR, SCHED,
334                         "%s: Incorrect value for tb rate\n", __func__);
335                 return -EINVAL;
336         }
337
338         /* TB size: non-zero */
339         if (params->tb_size == 0) {
340                 RTE_LOG(ERR, SCHED,
341                         "%s: Incorrect value for tb size\n", __func__);
342                 return -EINVAL;
343         }
344
345         /* TC rate: non-zero if qsize non-zero, less than pipe rate */
346         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
347                 if ((qsize[i] == 0 && params->tc_rate[i] != 0) ||
348                         (qsize[i] != 0 && (params->tc_rate[i] == 0 ||
349                         params->tc_rate[i] > params->tb_rate))) {
350                         RTE_LOG(ERR, SCHED,
351                                 "%s: Incorrect value for qsize or tc_rate\n", __func__);
352                         return -EINVAL;
353                 }
354         }
355
356         if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 ||
357                 qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
358                 RTE_LOG(ERR, SCHED,
359                         "%s: Incorrect value for be traffic class rate\n", __func__);
360                 return -EINVAL;
361         }
362
363         /* TC period: non-zero */
364         if (params->tc_period == 0) {
365                 RTE_LOG(ERR, SCHED,
366                         "%s: Incorrect value for tc period\n", __func__);
367                 return -EINVAL;
368         }
369
370         /*  Best effort tc oversubscription weight: non-zero */
371         if (params->tc_ov_weight == 0) {
372                 RTE_LOG(ERR, SCHED,
373                         "%s: Incorrect value for tc ov weight\n", __func__);
374                 return -EINVAL;
375         }
376
377         /* Queue WRR weights: non-zero */
378         for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++) {
379                 if (params->wrr_weights[i] == 0) {
380                         RTE_LOG(ERR, SCHED,
381                                 "%s: Incorrect value for wrr weight\n", __func__);
382                         return -EINVAL;
383                 }
384         }
385
386         return 0;
387 }
388
389 static int
390 subport_profile_check(struct rte_sched_subport_profile_params *params,
391         uint64_t rate)
392 {
393         uint32_t i;
394
395         /* Check user parameters */
396         if (params == NULL) {
397                 RTE_LOG(ERR, SCHED, "%s: "
398                 "Incorrect value for parameter params\n", __func__);
399                 return -EINVAL;
400         }
401
402         if (params->tb_rate == 0 || params->tb_rate > rate) {
403                 RTE_LOG(ERR, SCHED, "%s: "
404                 "Incorrect value for tb rate\n", __func__);
405                 return -EINVAL;
406         }
407
408         if (params->tb_size == 0) {
409                 RTE_LOG(ERR, SCHED, "%s: "
410                 "Incorrect value for tb size\n", __func__);
411                 return -EINVAL;
412         }
413
414         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
415                 uint64_t tc_rate = params->tc_rate[i];
416
417                 if (tc_rate == 0 || (tc_rate > params->tb_rate)) {
418                         RTE_LOG(ERR, SCHED, "%s: "
419                         "Incorrect value for tc rate\n", __func__);
420                         return -EINVAL;
421                 }
422         }
423
424         if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
425                 RTE_LOG(ERR, SCHED, "%s: "
426                 "Incorrect tc rate(best effort)\n", __func__);
427                 return -EINVAL;
428         }
429
430         if (params->tc_period == 0) {
431                 RTE_LOG(ERR, SCHED, "%s: "
432                 "Incorrect value for tc period\n", __func__);
433                 return -EINVAL;
434         }
435
436         return 0;
437 }
438
439 static int
440 rte_sched_port_check_params(struct rte_sched_port_params *params)
441 {
442         uint32_t i;
443
444         if (params == NULL) {
445                 RTE_LOG(ERR, SCHED,
446                         "%s: Incorrect value for parameter params\n", __func__);
447                 return -EINVAL;
448         }
449
450         /* socket */
451         if (params->socket < 0) {
452                 RTE_LOG(ERR, SCHED,
453                         "%s: Incorrect value for socket id\n", __func__);
454                 return -EINVAL;
455         }
456
457         /* rate */
458         if (params->rate == 0) {
459                 RTE_LOG(ERR, SCHED,
460                         "%s: Incorrect value for rate\n", __func__);
461                 return -EINVAL;
462         }
463
464         /* mtu */
465         if (params->mtu == 0) {
466                 RTE_LOG(ERR, SCHED,
467                         "%s: Incorrect value for mtu\n", __func__);
468                 return -EINVAL;
469         }
470
471         /* n_subports_per_port: non-zero, limited to 16 bits, power of 2 */
472         if (params->n_subports_per_port == 0 ||
473             params->n_subports_per_port > 1u << 16 ||
474             !rte_is_power_of_2(params->n_subports_per_port)) {
475                 RTE_LOG(ERR, SCHED,
476                         "%s: Incorrect value for number of subports\n", __func__);
477                 return -EINVAL;
478         }
479
480         if (params->subport_profiles == NULL ||
481                 params->n_subport_profiles == 0 ||
482                 params->n_max_subport_profiles == 0 ||
483                 params->n_subport_profiles > params->n_max_subport_profiles) {
484                 RTE_LOG(ERR, SCHED,
485                 "%s: Incorrect value for subport profiles\n", __func__);
486                 return -EINVAL;
487         }
488
489         for (i = 0; i < params->n_subport_profiles; i++) {
490                 struct rte_sched_subport_profile_params *p =
491                                                 params->subport_profiles + i;
492                 int status;
493
494                 status = subport_profile_check(p, params->rate);
495                 if (status != 0) {
496                         RTE_LOG(ERR, SCHED,
497                         "%s: subport profile check failed(%d)\n",
498                         __func__, status);
499                         return -EINVAL;
500                 }
501         }
502
503         /* n_pipes_per_subport: non-zero, power of 2 */
504         if (params->n_pipes_per_subport == 0 ||
505             !rte_is_power_of_2(params->n_pipes_per_subport)) {
506                 RTE_LOG(ERR, SCHED,
507                         "%s: Incorrect value for maximum pipes number\n", __func__);
508                 return -EINVAL;
509         }
510
511         return 0;
512 }
513
514 static uint32_t
515 rte_sched_subport_get_array_base(struct rte_sched_subport_params *params,
516         enum rte_sched_subport_array array)
517 {
518         uint32_t n_pipes_per_subport = params->n_pipes_per_subport_enabled;
519         uint32_t n_subport_pipe_queues =
520                 RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport;
521
522         uint32_t size_pipe = n_pipes_per_subport * sizeof(struct rte_sched_pipe);
523         uint32_t size_queue =
524                 n_subport_pipe_queues * sizeof(struct rte_sched_queue);
525         uint32_t size_queue_extra
526                 = n_subport_pipe_queues * sizeof(struct rte_sched_queue_extra);
527         uint32_t size_pipe_profiles = params->n_max_pipe_profiles *
528                 sizeof(struct rte_sched_pipe_profile);
529         uint32_t size_bmp_array =
530                 rte_bitmap_get_memory_footprint(n_subport_pipe_queues);
531         uint32_t size_per_pipe_queue_array, size_queue_array;
532
533         uint32_t base, i;
534
535         size_per_pipe_queue_array = 0;
536         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
537                 if (i < RTE_SCHED_TRAFFIC_CLASS_BE)
538                         size_per_pipe_queue_array +=
539                                 params->qsize[i] * sizeof(struct rte_mbuf *);
540                 else
541                         size_per_pipe_queue_array += RTE_SCHED_MAX_QUEUES_PER_TC *
542                                 params->qsize[i] * sizeof(struct rte_mbuf *);
543         }
544         size_queue_array = n_pipes_per_subport * size_per_pipe_queue_array;
545
546         base = 0;
547
548         if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE)
549                 return base;
550         base += RTE_CACHE_LINE_ROUNDUP(size_pipe);
551
552         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE)
553                 return base;
554         base += RTE_CACHE_LINE_ROUNDUP(size_queue);
555
556         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA)
557                 return base;
558         base += RTE_CACHE_LINE_ROUNDUP(size_queue_extra);
559
560         if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES)
561                 return base;
562         base += RTE_CACHE_LINE_ROUNDUP(size_pipe_profiles);
563
564         if (array == e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY)
565                 return base;
566         base += RTE_CACHE_LINE_ROUNDUP(size_bmp_array);
567
568         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY)
569                 return base;
570         base += RTE_CACHE_LINE_ROUNDUP(size_queue_array);
571
572         return base;
573 }
574
575 static void
576 rte_sched_subport_config_qsize(struct rte_sched_subport *subport)
577 {
578         uint32_t i;
579
580         subport->qsize_add[0] = 0;
581
582         /* Strict prority traffic class */
583         for (i = 1; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
584                 subport->qsize_add[i] = subport->qsize_add[i-1] + subport->qsize[i-1];
585
586         /* Best-effort traffic class */
587         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] =
588                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE] +
589                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
590         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] =
591                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] +
592                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
593         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] =
594                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] +
595                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
596
597         subport->qsize_sum = subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] +
598                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
599 }
600
601 static void
602 rte_sched_port_log_pipe_profile(struct rte_sched_subport *subport, uint32_t i)
603 {
604         struct rte_sched_pipe_profile *p = subport->pipe_profiles + i;
605
606         RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n"
607                 "       Token bucket: period = %"PRIu64", credits per period = %"PRIu64", size = %"PRIu64"\n"
608                 "       Traffic classes: period = %"PRIu64",\n"
609                 "       credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
610                 ", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
611                 ", %"PRIu64", %"PRIu64", %"PRIu64"]\n"
612                 "       Best-effort traffic class oversubscription: weight = %hhu\n"
613                 "       WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
614                 i,
615
616                 /* Token bucket */
617                 p->tb_period,
618                 p->tb_credits_per_period,
619                 p->tb_size,
620
621                 /* Traffic classes */
622                 p->tc_period,
623                 p->tc_credits_per_period[0],
624                 p->tc_credits_per_period[1],
625                 p->tc_credits_per_period[2],
626                 p->tc_credits_per_period[3],
627                 p->tc_credits_per_period[4],
628                 p->tc_credits_per_period[5],
629                 p->tc_credits_per_period[6],
630                 p->tc_credits_per_period[7],
631                 p->tc_credits_per_period[8],
632                 p->tc_credits_per_period[9],
633                 p->tc_credits_per_period[10],
634                 p->tc_credits_per_period[11],
635                 p->tc_credits_per_period[12],
636
637                 /* Best-effort traffic class oversubscription */
638                 p->tc_ov_weight,
639
640                 /* WRR */
641                 p->wrr_cost[0], p->wrr_cost[1], p->wrr_cost[2], p->wrr_cost[3]);
642 }
643
644 static void
645 rte_sched_port_log_subport_profile(struct rte_sched_port *port, uint32_t i)
646 {
647         struct rte_sched_subport_profile *p = port->subport_profiles + i;
648
649         RTE_LOG(DEBUG, SCHED, "Low level config for subport profile %u:\n"
650         "Token bucket: period = %"PRIu64", credits per period = %"PRIu64","
651         "size = %"PRIu64"\n"
652         "Traffic classes: period = %"PRIu64",\n"
653         "credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
654         " %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
655         " %"PRIu64", %"PRIu64", %"PRIu64"]\n",
656         i,
657
658         /* Token bucket */
659         p->tb_period,
660         p->tb_credits_per_period,
661         p->tb_size,
662
663         /* Traffic classes */
664         p->tc_period,
665         p->tc_credits_per_period[0],
666         p->tc_credits_per_period[1],
667         p->tc_credits_per_period[2],
668         p->tc_credits_per_period[3],
669         p->tc_credits_per_period[4],
670         p->tc_credits_per_period[5],
671         p->tc_credits_per_period[6],
672         p->tc_credits_per_period[7],
673         p->tc_credits_per_period[8],
674         p->tc_credits_per_period[9],
675         p->tc_credits_per_period[10],
676         p->tc_credits_per_period[11],
677         p->tc_credits_per_period[12]);
678 }
679
680 static inline uint64_t
681 rte_sched_time_ms_to_bytes(uint64_t time_ms, uint64_t rate)
682 {
683         uint64_t time = time_ms;
684
685         time = (time * rate) / 1000;
686
687         return time;
688 }
689
690 static void
691 rte_sched_pipe_profile_convert(struct rte_sched_subport *subport,
692         struct rte_sched_pipe_params *src,
693         struct rte_sched_pipe_profile *dst,
694         uint64_t rate)
695 {
696         uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
697         uint32_t lcd1, lcd2, lcd;
698         uint32_t i;
699
700         /* Token Bucket */
701         if (src->tb_rate == rate) {
702                 dst->tb_credits_per_period = 1;
703                 dst->tb_period = 1;
704         } else {
705                 double tb_rate = (double) src->tb_rate
706                                 / (double) rate;
707                 double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
708
709                 rte_approx_64(tb_rate, d, &dst->tb_credits_per_period,
710                         &dst->tb_period);
711         }
712
713         dst->tb_size = src->tb_size;
714
715         /* Traffic Classes */
716         dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period,
717                                                 rate);
718
719         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
720                 if (subport->qsize[i])
721                         dst->tc_credits_per_period[i]
722                                 = rte_sched_time_ms_to_bytes(src->tc_period,
723                                         src->tc_rate[i]);
724
725         dst->tc_ov_weight = src->tc_ov_weight;
726
727         /* WRR queues */
728         wrr_cost[0] = src->wrr_weights[0];
729         wrr_cost[1] = src->wrr_weights[1];
730         wrr_cost[2] = src->wrr_weights[2];
731         wrr_cost[3] = src->wrr_weights[3];
732
733         lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
734         lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
735         lcd = rte_get_lcd(lcd1, lcd2);
736
737         wrr_cost[0] = lcd / wrr_cost[0];
738         wrr_cost[1] = lcd / wrr_cost[1];
739         wrr_cost[2] = lcd / wrr_cost[2];
740         wrr_cost[3] = lcd / wrr_cost[3];
741
742         dst->wrr_cost[0] = (uint8_t) wrr_cost[0];
743         dst->wrr_cost[1] = (uint8_t) wrr_cost[1];
744         dst->wrr_cost[2] = (uint8_t) wrr_cost[2];
745         dst->wrr_cost[3] = (uint8_t) wrr_cost[3];
746 }
747
748 static void
749 rte_sched_subport_profile_convert(struct rte_sched_subport_profile_params *src,
750         struct rte_sched_subport_profile *dst,
751         uint64_t rate)
752 {
753         uint32_t i;
754
755         /* Token Bucket */
756         if (src->tb_rate == rate) {
757                 dst->tb_credits_per_period = 1;
758                 dst->tb_period = 1;
759         } else {
760                 double tb_rate = (double) src->tb_rate
761                                 / (double) rate;
762                 double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
763
764                 rte_approx_64(tb_rate, d, &dst->tb_credits_per_period,
765                         &dst->tb_period);
766         }
767
768         dst->tb_size = src->tb_size;
769
770         /* Traffic Classes */
771         dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period, rate);
772
773         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
774                 dst->tc_credits_per_period[i]
775                         = rte_sched_time_ms_to_bytes(src->tc_period,
776                                 src->tc_rate[i]);
777 }
778
779 static void
780 rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport,
781         struct rte_sched_subport_params *params, uint64_t rate)
782 {
783         uint32_t i;
784
785         for (i = 0; i < subport->n_pipe_profiles; i++) {
786                 struct rte_sched_pipe_params *src = params->pipe_profiles + i;
787                 struct rte_sched_pipe_profile *dst = subport->pipe_profiles + i;
788
789                 rte_sched_pipe_profile_convert(subport, src, dst, rate);
790                 rte_sched_port_log_pipe_profile(subport, i);
791         }
792
793         subport->pipe_tc_be_rate_max = 0;
794         for (i = 0; i < subport->n_pipe_profiles; i++) {
795                 struct rte_sched_pipe_params *src = params->pipe_profiles + i;
796                 uint64_t pipe_tc_be_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
797
798                 if (subport->pipe_tc_be_rate_max < pipe_tc_be_rate)
799                         subport->pipe_tc_be_rate_max = pipe_tc_be_rate;
800         }
801 }
802
803 static void
804 rte_sched_port_config_subport_profile_table(struct rte_sched_port *port,
805         struct rte_sched_port_params *params,
806         uint64_t rate)
807 {
808         uint32_t i;
809
810         for (i = 0; i < port->n_subport_profiles; i++) {
811                 struct rte_sched_subport_profile_params *src
812                                 = params->subport_profiles + i;
813                 struct rte_sched_subport_profile *dst
814                                 = port->subport_profiles + i;
815
816                 rte_sched_subport_profile_convert(src, dst, rate);
817                 rte_sched_port_log_subport_profile(port, i);
818         }
819 }
820
821 static int
822 rte_sched_subport_check_params(struct rte_sched_subport_params *params,
823         uint32_t n_max_pipes_per_subport,
824         uint64_t rate)
825 {
826         uint32_t i;
827
828         /* Check user parameters */
829         if (params == NULL) {
830                 RTE_LOG(ERR, SCHED,
831                         "%s: Incorrect value for parameter params\n", __func__);
832                 return -EINVAL;
833         }
834
835         /* qsize: if non-zero, power of 2,
836          * no bigger than 32K (due to 16-bit read/write pointers)
837          */
838         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
839                 uint16_t qsize = params->qsize[i];
840
841                 if (qsize != 0 && !rte_is_power_of_2(qsize)) {
842                         RTE_LOG(ERR, SCHED,
843                                 "%s: Incorrect value for qsize\n", __func__);
844                         return -EINVAL;
845                 }
846         }
847
848         if (params->qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
849                 RTE_LOG(ERR, SCHED, "%s: Incorrect qsize\n", __func__);
850                 return -EINVAL;
851         }
852
853         /* n_pipes_per_subport: non-zero, power of 2 */
854         if (params->n_pipes_per_subport_enabled == 0 ||
855                 params->n_pipes_per_subport_enabled > n_max_pipes_per_subport ||
856             !rte_is_power_of_2(params->n_pipes_per_subport_enabled)) {
857                 RTE_LOG(ERR, SCHED,
858                         "%s: Incorrect value for pipes number\n", __func__);
859                 return -EINVAL;
860         }
861
862         /* pipe_profiles and n_pipe_profiles */
863         if (params->pipe_profiles == NULL ||
864             params->n_pipe_profiles == 0 ||
865                 params->n_max_pipe_profiles == 0 ||
866                 params->n_pipe_profiles > params->n_max_pipe_profiles) {
867                 RTE_LOG(ERR, SCHED,
868                         "%s: Incorrect value for pipe profiles\n", __func__);
869                 return -EINVAL;
870         }
871
872         for (i = 0; i < params->n_pipe_profiles; i++) {
873                 struct rte_sched_pipe_params *p = params->pipe_profiles + i;
874                 int status;
875
876                 status = pipe_profile_check(p, rate, &params->qsize[0]);
877                 if (status != 0) {
878                         RTE_LOG(ERR, SCHED,
879                                 "%s: Pipe profile check failed(%d)\n", __func__, status);
880                         return -EINVAL;
881                 }
882         }
883
884         return 0;
885 }
886
887 uint32_t
888 rte_sched_port_get_memory_footprint(struct rte_sched_port_params *port_params,
889         struct rte_sched_subport_params **subport_params)
890 {
891         uint32_t size0 = 0, size1 = 0, i;
892         int status;
893
894         status = rte_sched_port_check_params(port_params);
895         if (status != 0) {
896                 RTE_LOG(ERR, SCHED,
897                         "%s: Port scheduler port params check failed (%d)\n",
898                         __func__, status);
899
900                 return 0;
901         }
902
903         for (i = 0; i < port_params->n_subports_per_port; i++) {
904                 struct rte_sched_subport_params *sp = subport_params[i];
905
906                 status = rte_sched_subport_check_params(sp,
907                                 port_params->n_pipes_per_subport,
908                                 port_params->rate);
909                 if (status != 0) {
910                         RTE_LOG(ERR, SCHED,
911                                 "%s: Port scheduler subport params check failed (%d)\n",
912                                 __func__, status);
913
914                         return 0;
915                 }
916         }
917
918         size0 = sizeof(struct rte_sched_port);
919
920         for (i = 0; i < port_params->n_subports_per_port; i++) {
921                 struct rte_sched_subport_params *sp = subport_params[i];
922
923                 size1 += rte_sched_subport_get_array_base(sp,
924                                         e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
925         }
926
927         return size0 + size1;
928 }
929
930 struct rte_sched_port *
931 rte_sched_port_config(struct rte_sched_port_params *params)
932 {
933         struct rte_sched_port *port = NULL;
934         uint32_t size0, size1, size2;
935         uint32_t cycles_per_byte;
936         uint32_t i, j;
937         int status;
938
939         status = rte_sched_port_check_params(params);
940         if (status != 0) {
941                 RTE_LOG(ERR, SCHED,
942                         "%s: Port scheduler params check failed (%d)\n",
943                         __func__, status);
944                 return NULL;
945         }
946
947         size0 = sizeof(struct rte_sched_port);
948         size1 = params->n_subports_per_port * sizeof(struct rte_sched_subport *);
949         size2 = params->n_max_subport_profiles *
950                 sizeof(struct rte_sched_subport_profile);
951
952         /* Allocate memory to store the data structures */
953         port = rte_zmalloc_socket("qos_params", size0 + size1,
954                                  RTE_CACHE_LINE_SIZE, params->socket);
955         if (port == NULL) {
956                 RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
957
958                 return NULL;
959         }
960
961         /* Allocate memory to store the subport profile */
962         port->subport_profiles  = rte_zmalloc_socket("subport_profile", size2,
963                                         RTE_CACHE_LINE_SIZE, params->socket);
964         if (port->subport_profiles == NULL) {
965                 RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
966                 rte_free(port);
967                 return NULL;
968         }
969
970         /* User parameters */
971         port->n_subports_per_port = params->n_subports_per_port;
972         port->n_subport_profiles = params->n_subport_profiles;
973         port->n_max_subport_profiles = params->n_max_subport_profiles;
974         port->n_pipes_per_subport = params->n_pipes_per_subport;
975         port->n_pipes_per_subport_log2 =
976                         __builtin_ctz(params->n_pipes_per_subport);
977         port->socket = params->socket;
978
979         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
980                 port->pipe_queue[i] = i;
981
982         for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
983                 port->pipe_tc[i] = j;
984
985                 if (j < RTE_SCHED_TRAFFIC_CLASS_BE)
986                         j++;
987         }
988
989         for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
990                 port->tc_queue[i] = j;
991
992                 if (i >= RTE_SCHED_TRAFFIC_CLASS_BE)
993                         j++;
994         }
995         port->rate = params->rate;
996         port->mtu = params->mtu + params->frame_overhead;
997         port->frame_overhead = params->frame_overhead;
998
999         /* Timing */
1000         port->time_cpu_cycles = rte_get_tsc_cycles();
1001         port->time_cpu_bytes = 0;
1002         port->time = 0;
1003
1004         /* Subport profile table */
1005         rte_sched_port_config_subport_profile_table(port, params, port->rate);
1006
1007         cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
1008                 / params->rate;
1009         port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
1010         port->cycles_per_byte = cycles_per_byte;
1011
1012         /* Grinders */
1013         port->pkts_out = NULL;
1014         port->n_pkts_out = 0;
1015         port->subport_id = 0;
1016
1017         return port;
1018 }
1019
1020 static inline void
1021 rte_sched_subport_free(struct rte_sched_port *port,
1022         struct rte_sched_subport *subport)
1023 {
1024         uint32_t n_subport_pipe_queues;
1025         uint32_t qindex;
1026
1027         if (subport == NULL)
1028                 return;
1029
1030         n_subport_pipe_queues = rte_sched_subport_pipe_queues(subport);
1031
1032         /* Free enqueued mbufs */
1033         for (qindex = 0; qindex < n_subport_pipe_queues; qindex++) {
1034                 struct rte_mbuf **mbufs =
1035                         rte_sched_subport_pipe_qbase(subport, qindex);
1036                 uint16_t qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
1037                 if (qsize != 0) {
1038                         struct rte_sched_queue *queue = subport->queue + qindex;
1039                         uint16_t qr = queue->qr & (qsize - 1);
1040                         uint16_t qw = queue->qw & (qsize - 1);
1041
1042                         for (; qr != qw; qr = (qr + 1) & (qsize - 1))
1043                                 rte_pktmbuf_free(mbufs[qr]);
1044                 }
1045         }
1046
1047         rte_free(subport);
1048 }
1049
1050 void
1051 rte_sched_port_free(struct rte_sched_port *port)
1052 {
1053         uint32_t i;
1054
1055         /* Check user parameters */
1056         if (port == NULL)
1057                 return;
1058
1059         for (i = 0; i < port->n_subports_per_port; i++)
1060                 rte_sched_subport_free(port, port->subports[i]);
1061
1062         rte_free(port->subport_profiles);
1063         rte_free(port);
1064 }
1065
1066 static void
1067 rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
1068 {
1069         uint32_t i;
1070
1071         for (i = 0; i < n_subports; i++) {
1072                 struct rte_sched_subport *subport = port->subports[i];
1073
1074                 rte_sched_subport_free(port, subport);
1075         }
1076
1077         rte_free(port->subport_profiles);
1078         rte_free(port);
1079 }
1080
1081 int
1082 rte_sched_subport_config(struct rte_sched_port *port,
1083         uint32_t subport_id,
1084         struct rte_sched_subport_params *params,
1085         uint32_t subport_profile_id)
1086 {
1087         struct rte_sched_subport *s = NULL;
1088         uint32_t n_subports = subport_id;
1089         struct rte_sched_subport_profile *profile;
1090         uint32_t n_subport_pipe_queues, i;
1091         uint32_t size0, size1, bmp_mem_size;
1092         int status;
1093         int ret;
1094
1095         /* Check user parameters */
1096         if (port == NULL) {
1097                 RTE_LOG(ERR, SCHED,
1098                         "%s: Incorrect value for parameter port\n", __func__);
1099                 return 0;
1100         }
1101
1102         if (subport_id >= port->n_subports_per_port) {
1103                 RTE_LOG(ERR, SCHED,
1104                         "%s: Incorrect value for subport id\n", __func__);
1105                 ret = -EINVAL;
1106                 goto out;
1107         }
1108
1109         if (subport_profile_id >= port->n_max_subport_profiles) {
1110                 RTE_LOG(ERR, SCHED, "%s: "
1111                         "Number of subport profile exceeds the max limit\n",
1112                         __func__);
1113                 ret = -EINVAL;
1114                 goto out;
1115         }
1116
1117         /** Memory is allocated only on first invocation of the api for a
1118          * given subport. Subsequent invocation on same subport will just
1119          * update subport bandwidth parameter.
1120          **/
1121         if (port->subports[subport_id] == NULL) {
1122
1123                 status = rte_sched_subport_check_params(params,
1124                         port->n_pipes_per_subport,
1125                         port->rate);
1126                 if (status != 0) {
1127                         RTE_LOG(NOTICE, SCHED,
1128                                 "%s: Port scheduler params check failed (%d)\n",
1129                                 __func__, status);
1130                         ret = -EINVAL;
1131                         goto out;
1132                 }
1133
1134                 /* Determine the amount of memory to allocate */
1135                 size0 = sizeof(struct rte_sched_subport);
1136                 size1 = rte_sched_subport_get_array_base(params,
1137                                         e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
1138
1139                 /* Allocate memory to store the data structures */
1140                 s = rte_zmalloc_socket("subport_params", size0 + size1,
1141                         RTE_CACHE_LINE_SIZE, port->socket);
1142                 if (s == NULL) {
1143                         RTE_LOG(ERR, SCHED,
1144                                 "%s: Memory allocation fails\n", __func__);
1145                         ret = -ENOMEM;
1146                         goto out;
1147                 }
1148
1149                 n_subports++;
1150
1151                 subport_profile_id = 0;
1152
1153                 /* Port */
1154                 port->subports[subport_id] = s;
1155
1156                 s->tb_time = port->time;
1157
1158                 /* compile time checks */
1159                 RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS == 0);
1160                 RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS &
1161                         (RTE_SCHED_PORT_N_GRINDERS - 1));
1162
1163                 /* User parameters */
1164                 s->n_pipes_per_subport_enabled =
1165                                 params->n_pipes_per_subport_enabled;
1166                 memcpy(s->qsize, params->qsize, sizeof(params->qsize));
1167                 s->n_pipe_profiles = params->n_pipe_profiles;
1168                 s->n_max_pipe_profiles = params->n_max_pipe_profiles;
1169
1170 #ifdef RTE_SCHED_RED
1171                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
1172                         uint32_t j;
1173
1174                         for (j = 0; j < RTE_COLORS; j++) {
1175                         /* if min/max are both zero, then RED is disabled */
1176                                 if ((params->red_params[i][j].min_th |
1177                                      params->red_params[i][j].max_th) == 0) {
1178                                         continue;
1179                                 }
1180
1181                                 if (rte_red_config_init(&s->red_config[i][j],
1182                                     params->red_params[i][j].wq_log2,
1183                                     params->red_params[i][j].min_th,
1184                                     params->red_params[i][j].max_th,
1185                                     params->red_params[i][j].maxp_inv) != 0) {
1186                                         RTE_LOG(NOTICE, SCHED,
1187                                         "%s: RED configuration init fails\n",
1188                                         __func__);
1189                                         ret = -EINVAL;
1190                                         goto out;
1191                                 }
1192                         }
1193                 }
1194 #endif
1195
1196                 /* Scheduling loop detection */
1197                 s->pipe_loop = RTE_SCHED_PIPE_INVALID;
1198                 s->pipe_exhaustion = 0;
1199
1200                 /* Grinders */
1201                 s->busy_grinders = 0;
1202
1203                 /* Queue base calculation */
1204                 rte_sched_subport_config_qsize(s);
1205
1206                 /* Large data structures */
1207                 s->pipe = (struct rte_sched_pipe *)
1208                         (s->memory + rte_sched_subport_get_array_base(params,
1209                         e_RTE_SCHED_SUBPORT_ARRAY_PIPE));
1210                 s->queue = (struct rte_sched_queue *)
1211                         (s->memory + rte_sched_subport_get_array_base(params,
1212                         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE));
1213                 s->queue_extra = (struct rte_sched_queue_extra *)
1214                         (s->memory + rte_sched_subport_get_array_base(params,
1215                         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA));
1216                 s->pipe_profiles = (struct rte_sched_pipe_profile *)
1217                         (s->memory + rte_sched_subport_get_array_base(params,
1218                         e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES));
1219                 s->bmp_array =  s->memory + rte_sched_subport_get_array_base(
1220                                 params, e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY);
1221                 s->queue_array = (struct rte_mbuf **)
1222                         (s->memory + rte_sched_subport_get_array_base(params,
1223                         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY));
1224
1225                 /* Pipe profile table */
1226                 rte_sched_subport_config_pipe_profile_table(s, params,
1227                                                             port->rate);
1228
1229                 /* Bitmap */
1230                 n_subport_pipe_queues = rte_sched_subport_pipe_queues(s);
1231                 bmp_mem_size = rte_bitmap_get_memory_footprint(
1232                                                 n_subport_pipe_queues);
1233                 s->bmp = rte_bitmap_init(n_subport_pipe_queues, s->bmp_array,
1234                                         bmp_mem_size);
1235                 if (s->bmp == NULL) {
1236                         RTE_LOG(ERR, SCHED,
1237                                 "%s: Subport bitmap init error\n", __func__);
1238                         ret = -EINVAL;
1239                         goto out;
1240                 }
1241
1242                 for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
1243                         s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
1244
1245 #ifdef RTE_SCHED_SUBPORT_TC_OV
1246                 /* TC oversubscription */
1247                 s->tc_ov_wm_min = port->mtu;
1248                 s->tc_ov_period_id = 0;
1249                 s->tc_ov = 0;
1250                 s->tc_ov_n = 0;
1251                 s->tc_ov_rate = 0;
1252 #endif
1253         }
1254
1255         {
1256         /* update subport parameters from subport profile table*/
1257                 profile = port->subport_profiles + subport_profile_id;
1258
1259                 s = port->subports[subport_id];
1260
1261                 s->tb_credits = profile->tb_size / 2;
1262
1263                 s->tc_time = port->time + profile->tc_period;
1264
1265                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1266                         if (s->qsize[i])
1267                                 s->tc_credits[i] =
1268                                         profile->tc_credits_per_period[i];
1269                         else
1270                                 profile->tc_credits_per_period[i] = 0;
1271
1272 #ifdef RTE_SCHED_SUBPORT_TC_OV
1273                 s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period,
1274                                                         s->pipe_tc_be_rate_max);
1275                 s->tc_ov_wm = s->tc_ov_wm_max;
1276 #endif
1277                 s->profile = subport_profile_id;
1278
1279         }
1280
1281         rte_sched_port_log_subport_profile(port, subport_profile_id);
1282
1283         return 0;
1284
1285 out:
1286         rte_sched_free_memory(port, n_subports);
1287
1288         return ret;
1289 }
1290
1291 int
1292 rte_sched_pipe_config(struct rte_sched_port *port,
1293         uint32_t subport_id,
1294         uint32_t pipe_id,
1295         int32_t pipe_profile)
1296 {
1297         struct rte_sched_subport *s;
1298         struct rte_sched_subport_profile *sp;
1299         struct rte_sched_pipe *p;
1300         struct rte_sched_pipe_profile *params;
1301         uint32_t n_subports = subport_id + 1;
1302         uint32_t deactivate, profile, i;
1303         int ret;
1304
1305         /* Check user parameters */
1306         profile = (uint32_t) pipe_profile;
1307         deactivate = (pipe_profile < 0);
1308
1309         if (port == NULL) {
1310                 RTE_LOG(ERR, SCHED,
1311                         "%s: Incorrect value for parameter port\n", __func__);
1312                 return -EINVAL;
1313         }
1314
1315         if (subport_id >= port->n_subports_per_port) {
1316                 RTE_LOG(ERR, SCHED,
1317                         "%s: Incorrect value for parameter subport id\n", __func__);
1318                 ret = -EINVAL;
1319                 goto out;
1320         }
1321
1322         s = port->subports[subport_id];
1323         if (pipe_id >= s->n_pipes_per_subport_enabled) {
1324                 RTE_LOG(ERR, SCHED,
1325                         "%s: Incorrect value for parameter pipe id\n", __func__);
1326                 ret = -EINVAL;
1327                 goto out;
1328         }
1329
1330         if (!deactivate && profile >= s->n_pipe_profiles) {
1331                 RTE_LOG(ERR, SCHED,
1332                         "%s: Incorrect value for parameter pipe profile\n", __func__);
1333                 ret = -EINVAL;
1334                 goto out;
1335         }
1336
1337         sp = port->subport_profiles + s->profile;
1338         /* Handle the case when pipe already has a valid configuration */
1339         p = s->pipe + pipe_id;
1340         if (p->tb_time) {
1341                 params = s->pipe_profiles + p->profile;
1342
1343                 double subport_tc_be_rate =
1344                 (double)sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1345                         / (double) sp->tc_period;
1346                 double pipe_tc_be_rate =
1347                         (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1348                         / (double) params->tc_period;
1349                 uint32_t tc_be_ov = s->tc_ov;
1350
1351                 /* Unplug pipe from its subport */
1352                 s->tc_ov_n -= params->tc_ov_weight;
1353                 s->tc_ov_rate -= pipe_tc_be_rate;
1354                 s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1355
1356                 if (s->tc_ov != tc_be_ov) {
1357                         RTE_LOG(DEBUG, SCHED,
1358                                 "Subport %u Best-effort TC oversubscription is OFF (%.4lf >= %.4lf)\n",
1359                                 subport_id, subport_tc_be_rate, s->tc_ov_rate);
1360                 }
1361
1362                 /* Reset the pipe */
1363                 memset(p, 0, sizeof(struct rte_sched_pipe));
1364         }
1365
1366         if (deactivate)
1367                 return 0;
1368
1369         /* Apply the new pipe configuration */
1370         p->profile = profile;
1371         params = s->pipe_profiles + p->profile;
1372
1373         /* Token Bucket (TB) */
1374         p->tb_time = port->time;
1375         p->tb_credits = params->tb_size / 2;
1376
1377         /* Traffic Classes (TCs) */
1378         p->tc_time = port->time + params->tc_period;
1379
1380         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1381                 if (s->qsize[i])
1382                         p->tc_credits[i] = params->tc_credits_per_period[i];
1383
1384         {
1385                 /* Subport best effort tc oversubscription */
1386                 double subport_tc_be_rate =
1387                 (double)sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1388                         / (double) sp->tc_period;
1389                 double pipe_tc_be_rate =
1390                         (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1391                         / (double) params->tc_period;
1392                 uint32_t tc_be_ov = s->tc_ov;
1393
1394                 s->tc_ov_n += params->tc_ov_weight;
1395                 s->tc_ov_rate += pipe_tc_be_rate;
1396                 s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1397
1398                 if (s->tc_ov != tc_be_ov) {
1399                         RTE_LOG(DEBUG, SCHED,
1400                                 "Subport %u Best effort TC oversubscription is ON (%.4lf < %.4lf)\n",
1401                                 subport_id, subport_tc_be_rate, s->tc_ov_rate);
1402                 }
1403                 p->tc_ov_period_id = s->tc_ov_period_id;
1404                 p->tc_ov_credits = s->tc_ov_wm;
1405         }
1406
1407         return 0;
1408
1409 out:
1410         rte_sched_free_memory(port, n_subports);
1411
1412         return ret;
1413 }
1414
1415 int
1416 rte_sched_subport_pipe_profile_add(struct rte_sched_port *port,
1417         uint32_t subport_id,
1418         struct rte_sched_pipe_params *params,
1419         uint32_t *pipe_profile_id)
1420 {
1421         struct rte_sched_subport *s;
1422         struct rte_sched_pipe_profile *pp;
1423         uint32_t i;
1424         int status;
1425
1426         /* Port */
1427         if (port == NULL) {
1428                 RTE_LOG(ERR, SCHED,
1429                         "%s: Incorrect value for parameter port\n", __func__);
1430                 return -EINVAL;
1431         }
1432
1433         /* Subport id not exceeds the max limit */
1434         if (subport_id > port->n_subports_per_port) {
1435                 RTE_LOG(ERR, SCHED,
1436                         "%s: Incorrect value for subport id\n", __func__);
1437                 return -EINVAL;
1438         }
1439
1440         s = port->subports[subport_id];
1441
1442         /* Pipe profiles exceeds the max limit */
1443         if (s->n_pipe_profiles >= s->n_max_pipe_profiles) {
1444                 RTE_LOG(ERR, SCHED,
1445                         "%s: Number of pipe profiles exceeds the max limit\n", __func__);
1446                 return -EINVAL;
1447         }
1448
1449         /* Pipe params */
1450         status = pipe_profile_check(params, port->rate, &s->qsize[0]);
1451         if (status != 0) {
1452                 RTE_LOG(ERR, SCHED,
1453                         "%s: Pipe profile check failed(%d)\n", __func__, status);
1454                 return -EINVAL;
1455         }
1456
1457         pp = &s->pipe_profiles[s->n_pipe_profiles];
1458         rte_sched_pipe_profile_convert(s, params, pp, port->rate);
1459
1460         /* Pipe profile should not exists */
1461         for (i = 0; i < s->n_pipe_profiles; i++)
1462                 if (memcmp(s->pipe_profiles + i, pp, sizeof(*pp)) == 0) {
1463                         RTE_LOG(ERR, SCHED,
1464                                 "%s: Pipe profile exists\n", __func__);
1465                         return -EINVAL;
1466                 }
1467
1468         /* Pipe profile commit */
1469         *pipe_profile_id = s->n_pipe_profiles;
1470         s->n_pipe_profiles++;
1471
1472         if (s->pipe_tc_be_rate_max < params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE])
1473                 s->pipe_tc_be_rate_max = params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
1474
1475         rte_sched_port_log_pipe_profile(s, *pipe_profile_id);
1476
1477         return 0;
1478 }
1479
1480 int
1481 rte_sched_port_subport_profile_add(struct rte_sched_port *port,
1482         struct rte_sched_subport_profile_params *params,
1483         uint32_t *subport_profile_id)
1484 {
1485         int status;
1486         uint32_t i;
1487         struct rte_sched_subport_profile *dst;
1488
1489         /* Port */
1490         if (port == NULL) {
1491                 RTE_LOG(ERR, SCHED, "%s: "
1492                 "Incorrect value for parameter port\n", __func__);
1493                 return -EINVAL;
1494         }
1495
1496         if (params == NULL) {
1497                 RTE_LOG(ERR, SCHED, "%s: "
1498                 "Incorrect value for parameter profile\n", __func__);
1499                 return -EINVAL;
1500         }
1501
1502         if (subport_profile_id == NULL) {
1503                 RTE_LOG(ERR, SCHED, "%s: "
1504                 "Incorrect value for parameter subport_profile_id\n",
1505                 __func__);
1506                 return -EINVAL;
1507         }
1508
1509         dst = port->subport_profiles + port->n_subport_profiles;
1510
1511         /* Subport profiles exceeds the max limit */
1512         if (port->n_subport_profiles >= port->n_max_subport_profiles) {
1513                 RTE_LOG(ERR, SCHED, "%s: "
1514                 "Number of subport profiles exceeds the max limit\n",
1515                  __func__);
1516                 return -EINVAL;
1517         }
1518
1519         status = subport_profile_check(params, port->rate);
1520         if (status != 0) {
1521                 RTE_LOG(ERR, SCHED,
1522                 "%s: subport profile check failed(%d)\n", __func__, status);
1523                 return -EINVAL;
1524         }
1525
1526         rte_sched_subport_profile_convert(params, dst, port->rate);
1527
1528         /* Subport profile should not exists */
1529         for (i = 0; i < port->n_subport_profiles; i++)
1530                 if (memcmp(port->subport_profiles + i,
1531                     dst, sizeof(*dst)) == 0) {
1532                         RTE_LOG(ERR, SCHED,
1533                         "%s: subport profile exists\n", __func__);
1534                         return -EINVAL;
1535                 }
1536
1537         /* Subport profile commit */
1538         *subport_profile_id = port->n_subport_profiles;
1539         port->n_subport_profiles++;
1540
1541         rte_sched_port_log_subport_profile(port, *subport_profile_id);
1542
1543         return 0;
1544 }
1545
1546 static inline uint32_t
1547 rte_sched_port_qindex(struct rte_sched_port *port,
1548         uint32_t subport,
1549         uint32_t pipe,
1550         uint32_t traffic_class,
1551         uint32_t queue)
1552 {
1553         return ((subport & (port->n_subports_per_port - 1)) <<
1554                 (port->n_pipes_per_subport_log2 + 4)) |
1555                 ((pipe &
1556                 (port->subports[subport]->n_pipes_per_subport_enabled - 1)) << 4) |
1557                 ((rte_sched_port_pipe_queue(port, traffic_class) + queue) &
1558                 (RTE_SCHED_QUEUES_PER_PIPE - 1));
1559 }
1560
1561 void
1562 rte_sched_port_pkt_write(struct rte_sched_port *port,
1563                          struct rte_mbuf *pkt,
1564                          uint32_t subport, uint32_t pipe,
1565                          uint32_t traffic_class,
1566                          uint32_t queue, enum rte_color color)
1567 {
1568         uint32_t queue_id =
1569                 rte_sched_port_qindex(port, subport, pipe, traffic_class, queue);
1570
1571         rte_mbuf_sched_set(pkt, queue_id, traffic_class, (uint8_t)color);
1572 }
1573
1574 void
1575 rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port,
1576                                   const struct rte_mbuf *pkt,
1577                                   uint32_t *subport, uint32_t *pipe,
1578                                   uint32_t *traffic_class, uint32_t *queue)
1579 {
1580         uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1581
1582         *subport = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1583         *pipe = (queue_id >> 4) &
1584                 (port->subports[*subport]->n_pipes_per_subport_enabled - 1);
1585         *traffic_class = rte_sched_port_pipe_tc(port, queue_id);
1586         *queue = rte_sched_port_tc_queue(port, queue_id);
1587 }
1588
1589 enum rte_color
1590 rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt)
1591 {
1592         return (enum rte_color)rte_mbuf_sched_color_get(pkt);
1593 }
1594
1595 int
1596 rte_sched_subport_read_stats(struct rte_sched_port *port,
1597                              uint32_t subport_id,
1598                              struct rte_sched_subport_stats *stats,
1599                              uint32_t *tc_ov)
1600 {
1601         struct rte_sched_subport *s;
1602
1603         /* Check user parameters */
1604         if (port == NULL) {
1605                 RTE_LOG(ERR, SCHED,
1606                         "%s: Incorrect value for parameter port\n", __func__);
1607                 return -EINVAL;
1608         }
1609
1610         if (subport_id >= port->n_subports_per_port) {
1611                 RTE_LOG(ERR, SCHED,
1612                         "%s: Incorrect value for subport id\n", __func__);
1613                 return -EINVAL;
1614         }
1615
1616         if (stats == NULL) {
1617                 RTE_LOG(ERR, SCHED,
1618                         "%s: Incorrect value for parameter stats\n", __func__);
1619                 return -EINVAL;
1620         }
1621
1622         if (tc_ov == NULL) {
1623                 RTE_LOG(ERR, SCHED,
1624                         "%s: Incorrect value for tc_ov\n", __func__);
1625                 return -EINVAL;
1626         }
1627
1628         s = port->subports[subport_id];
1629
1630         /* Copy subport stats and clear */
1631         memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
1632         memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
1633
1634         /* Subport TC oversubscription status */
1635         *tc_ov = s->tc_ov;
1636
1637         return 0;
1638 }
1639
1640 int
1641 rte_sched_queue_read_stats(struct rte_sched_port *port,
1642         uint32_t queue_id,
1643         struct rte_sched_queue_stats *stats,
1644         uint16_t *qlen)
1645 {
1646         struct rte_sched_subport *s;
1647         struct rte_sched_queue *q;
1648         struct rte_sched_queue_extra *qe;
1649         uint32_t subport_id, subport_qmask, subport_qindex;
1650
1651         /* Check user parameters */
1652         if (port == NULL) {
1653                 RTE_LOG(ERR, SCHED,
1654                         "%s: Incorrect value for parameter port\n", __func__);
1655                 return -EINVAL;
1656         }
1657
1658         if (queue_id >= rte_sched_port_queues_per_port(port)) {
1659                 RTE_LOG(ERR, SCHED,
1660                         "%s: Incorrect value for queue id\n", __func__);
1661                 return -EINVAL;
1662         }
1663
1664         if (stats == NULL) {
1665                 RTE_LOG(ERR, SCHED,
1666                         "%s: Incorrect value for parameter stats\n", __func__);
1667                 return -EINVAL;
1668         }
1669
1670         if (qlen == NULL) {
1671                 RTE_LOG(ERR, SCHED,
1672                         "%s: Incorrect value for parameter qlen\n", __func__);
1673                 return -EINVAL;
1674         }
1675         subport_qmask = port->n_pipes_per_subport_log2 + 4;
1676         subport_id = (queue_id >> subport_qmask) & (port->n_subports_per_port - 1);
1677
1678         s = port->subports[subport_id];
1679         subport_qindex = ((1 << subport_qmask) - 1) & queue_id;
1680         q = s->queue + subport_qindex;
1681         qe = s->queue_extra + subport_qindex;
1682
1683         /* Copy queue stats and clear */
1684         memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats));
1685         memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
1686
1687         /* Queue length */
1688         *qlen = q->qw - q->qr;
1689
1690         return 0;
1691 }
1692
1693 #ifdef RTE_SCHED_DEBUG
1694
1695 static inline int
1696 rte_sched_port_queue_is_empty(struct rte_sched_subport *subport,
1697         uint32_t qindex)
1698 {
1699         struct rte_sched_queue *queue = subport->queue + qindex;
1700
1701         return queue->qr == queue->qw;
1702 }
1703
1704 #endif /* RTE_SCHED_DEBUG */
1705
1706 #ifdef RTE_SCHED_COLLECT_STATS
1707
1708 static inline void
1709 rte_sched_port_update_subport_stats(struct rte_sched_port *port,
1710         struct rte_sched_subport *subport,
1711         uint32_t qindex,
1712         struct rte_mbuf *pkt)
1713 {
1714         uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1715         uint32_t pkt_len = pkt->pkt_len;
1716
1717         subport->stats.n_pkts_tc[tc_index] += 1;
1718         subport->stats.n_bytes_tc[tc_index] += pkt_len;
1719 }
1720
1721 #ifdef RTE_SCHED_RED
1722 static inline void
1723 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
1724         struct rte_sched_subport *subport,
1725         uint32_t qindex,
1726         struct rte_mbuf *pkt,
1727         uint32_t red)
1728 #else
1729 static inline void
1730 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
1731         struct rte_sched_subport *subport,
1732         uint32_t qindex,
1733         struct rte_mbuf *pkt,
1734         __rte_unused uint32_t red)
1735 #endif
1736 {
1737         uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1738         uint32_t pkt_len = pkt->pkt_len;
1739
1740         subport->stats.n_pkts_tc_dropped[tc_index] += 1;
1741         subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
1742 #ifdef RTE_SCHED_RED
1743         subport->stats.n_pkts_red_dropped[tc_index] += red;
1744 #endif
1745 }
1746
1747 static inline void
1748 rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
1749         uint32_t qindex,
1750         struct rte_mbuf *pkt)
1751 {
1752         struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1753         uint32_t pkt_len = pkt->pkt_len;
1754
1755         qe->stats.n_pkts += 1;
1756         qe->stats.n_bytes += pkt_len;
1757 }
1758
1759 #ifdef RTE_SCHED_RED
1760 static inline void
1761 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
1762         uint32_t qindex,
1763         struct rte_mbuf *pkt,
1764         uint32_t red)
1765 #else
1766 static inline void
1767 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
1768         uint32_t qindex,
1769         struct rte_mbuf *pkt,
1770         __rte_unused uint32_t red)
1771 #endif
1772 {
1773         struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1774         uint32_t pkt_len = pkt->pkt_len;
1775
1776         qe->stats.n_pkts_dropped += 1;
1777         qe->stats.n_bytes_dropped += pkt_len;
1778 #ifdef RTE_SCHED_RED
1779         qe->stats.n_pkts_red_dropped += red;
1780 #endif
1781 }
1782
1783 #endif /* RTE_SCHED_COLLECT_STATS */
1784
1785 #ifdef RTE_SCHED_RED
1786
1787 static inline int
1788 rte_sched_port_red_drop(struct rte_sched_port *port,
1789         struct rte_sched_subport *subport,
1790         struct rte_mbuf *pkt,
1791         uint32_t qindex,
1792         uint16_t qlen)
1793 {
1794         struct rte_sched_queue_extra *qe;
1795         struct rte_red_config *red_cfg;
1796         struct rte_red *red;
1797         uint32_t tc_index;
1798         enum rte_color color;
1799
1800         tc_index = rte_sched_port_pipe_tc(port, qindex);
1801         color = rte_sched_port_pkt_read_color(pkt);
1802         red_cfg = &subport->red_config[tc_index][color];
1803
1804         if ((red_cfg->min_th | red_cfg->max_th) == 0)
1805                 return 0;
1806
1807         qe = subport->queue_extra + qindex;
1808         red = &qe->red;
1809
1810         return rte_red_enqueue(red_cfg, red, qlen, port->time);
1811 }
1812
1813 static inline void
1814 rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
1815         struct rte_sched_subport *subport, uint32_t qindex)
1816 {
1817         struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1818         struct rte_red *red = &qe->red;
1819
1820         rte_red_mark_queue_empty(red, port->time);
1821 }
1822
1823 #else
1824
1825 static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
1826         struct rte_sched_subport *subport __rte_unused,
1827         struct rte_mbuf *pkt __rte_unused,
1828         uint32_t qindex __rte_unused,
1829         uint16_t qlen __rte_unused)
1830 {
1831         return 0;
1832 }
1833
1834 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
1835
1836 #endif /* RTE_SCHED_RED */
1837
1838 #ifdef RTE_SCHED_DEBUG
1839
1840 static inline void
1841 debug_check_queue_slab(struct rte_sched_subport *subport, uint32_t bmp_pos,
1842                        uint64_t bmp_slab)
1843 {
1844         uint64_t mask;
1845         uint32_t i, panic;
1846
1847         if (bmp_slab == 0)
1848                 rte_panic("Empty slab at position %u\n", bmp_pos);
1849
1850         panic = 0;
1851         for (i = 0, mask = 1; i < 64; i++, mask <<= 1) {
1852                 if (mask & bmp_slab) {
1853                         if (rte_sched_port_queue_is_empty(subport, bmp_pos + i)) {
1854                                 printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i);
1855                                 panic = 1;
1856                         }
1857                 }
1858         }
1859
1860         if (panic)
1861                 rte_panic("Empty queues in slab 0x%" PRIx64 "starting at position %u\n",
1862                         bmp_slab, bmp_pos);
1863 }
1864
1865 #endif /* RTE_SCHED_DEBUG */
1866
1867 static inline struct rte_sched_subport *
1868 rte_sched_port_subport(struct rte_sched_port *port,
1869         struct rte_mbuf *pkt)
1870 {
1871         uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1872         uint32_t subport_id = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1873
1874         return port->subports[subport_id];
1875 }
1876
1877 static inline uint32_t
1878 rte_sched_port_enqueue_qptrs_prefetch0(struct rte_sched_subport *subport,
1879         struct rte_mbuf *pkt, uint32_t subport_qmask)
1880 {
1881         struct rte_sched_queue *q;
1882 #ifdef RTE_SCHED_COLLECT_STATS
1883         struct rte_sched_queue_extra *qe;
1884 #endif
1885         uint32_t qindex = rte_mbuf_sched_queue_get(pkt);
1886         uint32_t subport_queue_id = subport_qmask & qindex;
1887
1888         q = subport->queue + subport_queue_id;
1889         rte_prefetch0(q);
1890 #ifdef RTE_SCHED_COLLECT_STATS
1891         qe = subport->queue_extra + subport_queue_id;
1892         rte_prefetch0(qe);
1893 #endif
1894
1895         return subport_queue_id;
1896 }
1897
1898 static inline void
1899 rte_sched_port_enqueue_qwa_prefetch0(struct rte_sched_port *port,
1900         struct rte_sched_subport *subport,
1901         uint32_t qindex,
1902         struct rte_mbuf **qbase)
1903 {
1904         struct rte_sched_queue *q;
1905         struct rte_mbuf **q_qw;
1906         uint16_t qsize;
1907
1908         q = subport->queue + qindex;
1909         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
1910         q_qw = qbase + (q->qw & (qsize - 1));
1911
1912         rte_prefetch0(q_qw);
1913         rte_bitmap_prefetch0(subport->bmp, qindex);
1914 }
1915
1916 static inline int
1917 rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
1918         struct rte_sched_subport *subport,
1919         uint32_t qindex,
1920         struct rte_mbuf **qbase,
1921         struct rte_mbuf *pkt)
1922 {
1923         struct rte_sched_queue *q;
1924         uint16_t qsize;
1925         uint16_t qlen;
1926
1927         q = subport->queue + qindex;
1928         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
1929         qlen = q->qw - q->qr;
1930
1931         /* Drop the packet (and update drop stats) when queue is full */
1932         if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
1933                      (qlen >= qsize))) {
1934                 rte_pktmbuf_free(pkt);
1935 #ifdef RTE_SCHED_COLLECT_STATS
1936                 rte_sched_port_update_subport_stats_on_drop(port, subport,
1937                         qindex, pkt, qlen < qsize);
1938                 rte_sched_port_update_queue_stats_on_drop(subport, qindex, pkt,
1939                         qlen < qsize);
1940 #endif
1941                 return 0;
1942         }
1943
1944         /* Enqueue packet */
1945         qbase[q->qw & (qsize - 1)] = pkt;
1946         q->qw++;
1947
1948         /* Activate queue in the subport bitmap */
1949         rte_bitmap_set(subport->bmp, qindex);
1950
1951         /* Statistics */
1952 #ifdef RTE_SCHED_COLLECT_STATS
1953         rte_sched_port_update_subport_stats(port, subport, qindex, pkt);
1954         rte_sched_port_update_queue_stats(subport, qindex, pkt);
1955 #endif
1956
1957         return 1;
1958 }
1959
1960
1961 /*
1962  * The enqueue function implements a 4-level pipeline with each stage
1963  * processing two different packets. The purpose of using a pipeline
1964  * is to hide the latency of prefetching the data structures. The
1965  * naming convention is presented in the diagram below:
1966  *
1967  *   p00  _______   p10  _______   p20  _______   p30  _______
1968  * ----->|       |----->|       |----->|       |----->|       |----->
1969  *       |   0   |      |   1   |      |   2   |      |   3   |
1970  * ----->|_______|----->|_______|----->|_______|----->|_______|----->
1971  *   p01            p11            p21            p31
1972  *
1973  */
1974 int
1975 rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts,
1976                        uint32_t n_pkts)
1977 {
1978         struct rte_mbuf *pkt00, *pkt01, *pkt10, *pkt11, *pkt20, *pkt21,
1979                 *pkt30, *pkt31, *pkt_last;
1980         struct rte_mbuf **q00_base, **q01_base, **q10_base, **q11_base,
1981                 **q20_base, **q21_base, **q30_base, **q31_base, **q_last_base;
1982         struct rte_sched_subport *subport00, *subport01, *subport10, *subport11,
1983                 *subport20, *subport21, *subport30, *subport31, *subport_last;
1984         uint32_t q00, q01, q10, q11, q20, q21, q30, q31, q_last;
1985         uint32_t r00, r01, r10, r11, r20, r21, r30, r31, r_last;
1986         uint32_t subport_qmask;
1987         uint32_t result, i;
1988
1989         result = 0;
1990         subport_qmask = (1 << (port->n_pipes_per_subport_log2 + 4)) - 1;
1991
1992         /*
1993          * Less then 6 input packets available, which is not enough to
1994          * feed the pipeline
1995          */
1996         if (unlikely(n_pkts < 6)) {
1997                 struct rte_sched_subport *subports[5];
1998                 struct rte_mbuf **q_base[5];
1999                 uint32_t q[5];
2000
2001                 /* Prefetch the mbuf structure of each packet */
2002                 for (i = 0; i < n_pkts; i++)
2003                         rte_prefetch0(pkts[i]);
2004
2005                 /* Prefetch the subport structure for each packet */
2006                 for (i = 0; i < n_pkts; i++)
2007                         subports[i] = rte_sched_port_subport(port, pkts[i]);
2008
2009                 /* Prefetch the queue structure for each queue */
2010                 for (i = 0; i < n_pkts; i++)
2011                         q[i] = rte_sched_port_enqueue_qptrs_prefetch0(subports[i],
2012                                         pkts[i], subport_qmask);
2013
2014                 /* Prefetch the write pointer location of each queue */
2015                 for (i = 0; i < n_pkts; i++) {
2016                         q_base[i] = rte_sched_subport_pipe_qbase(subports[i], q[i]);
2017                         rte_sched_port_enqueue_qwa_prefetch0(port, subports[i],
2018                                 q[i], q_base[i]);
2019                 }
2020
2021                 /* Write each packet to its queue */
2022                 for (i = 0; i < n_pkts; i++)
2023                         result += rte_sched_port_enqueue_qwa(port, subports[i],
2024                                                 q[i], q_base[i], pkts[i]);
2025
2026                 return result;
2027         }
2028
2029         /* Feed the first 3 stages of the pipeline (6 packets needed) */
2030         pkt20 = pkts[0];
2031         pkt21 = pkts[1];
2032         rte_prefetch0(pkt20);
2033         rte_prefetch0(pkt21);
2034
2035         pkt10 = pkts[2];
2036         pkt11 = pkts[3];
2037         rte_prefetch0(pkt10);
2038         rte_prefetch0(pkt11);
2039
2040         subport20 = rte_sched_port_subport(port, pkt20);
2041         subport21 = rte_sched_port_subport(port, pkt21);
2042         q20 = rte_sched_port_enqueue_qptrs_prefetch0(subport20,
2043                         pkt20, subport_qmask);
2044         q21 = rte_sched_port_enqueue_qptrs_prefetch0(subport21,
2045                         pkt21, subport_qmask);
2046
2047         pkt00 = pkts[4];
2048         pkt01 = pkts[5];
2049         rte_prefetch0(pkt00);
2050         rte_prefetch0(pkt01);
2051
2052         subport10 = rte_sched_port_subport(port, pkt10);
2053         subport11 = rte_sched_port_subport(port, pkt11);
2054         q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
2055                         pkt10, subport_qmask);
2056         q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
2057                         pkt11, subport_qmask);
2058
2059         q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
2060         q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
2061         rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
2062         rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
2063
2064         /* Run the pipeline */
2065         for (i = 6; i < (n_pkts & (~1)); i += 2) {
2066                 /* Propagate stage inputs */
2067                 pkt30 = pkt20;
2068                 pkt31 = pkt21;
2069                 pkt20 = pkt10;
2070                 pkt21 = pkt11;
2071                 pkt10 = pkt00;
2072                 pkt11 = pkt01;
2073                 q30 = q20;
2074                 q31 = q21;
2075                 q20 = q10;
2076                 q21 = q11;
2077                 subport30 = subport20;
2078                 subport31 = subport21;
2079                 subport20 = subport10;
2080                 subport21 = subport11;
2081                 q30_base = q20_base;
2082                 q31_base = q21_base;
2083
2084                 /* Stage 0: Get packets in */
2085                 pkt00 = pkts[i];
2086                 pkt01 = pkts[i + 1];
2087                 rte_prefetch0(pkt00);
2088                 rte_prefetch0(pkt01);
2089
2090                 /* Stage 1: Prefetch subport and queue structure storing queue pointers */
2091                 subport10 = rte_sched_port_subport(port, pkt10);
2092                 subport11 = rte_sched_port_subport(port, pkt11);
2093                 q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
2094                                 pkt10, subport_qmask);
2095                 q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
2096                                 pkt11, subport_qmask);
2097
2098                 /* Stage 2: Prefetch queue write location */
2099                 q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
2100                 q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
2101                 rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
2102                 rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
2103
2104                 /* Stage 3: Write packet to queue and activate queue */
2105                 r30 = rte_sched_port_enqueue_qwa(port, subport30,
2106                                 q30, q30_base, pkt30);
2107                 r31 = rte_sched_port_enqueue_qwa(port, subport31,
2108                                 q31, q31_base, pkt31);
2109                 result += r30 + r31;
2110         }
2111
2112         /*
2113          * Drain the pipeline (exactly 6 packets).
2114          * Handle the last packet in the case
2115          * of an odd number of input packets.
2116          */
2117         pkt_last = pkts[n_pkts - 1];
2118         rte_prefetch0(pkt_last);
2119
2120         subport00 = rte_sched_port_subport(port, pkt00);
2121         subport01 = rte_sched_port_subport(port, pkt01);
2122         q00 = rte_sched_port_enqueue_qptrs_prefetch0(subport00,
2123                         pkt00, subport_qmask);
2124         q01 = rte_sched_port_enqueue_qptrs_prefetch0(subport01,
2125                         pkt01, subport_qmask);
2126
2127         q10_base = rte_sched_subport_pipe_qbase(subport10, q10);
2128         q11_base = rte_sched_subport_pipe_qbase(subport11, q11);
2129         rte_sched_port_enqueue_qwa_prefetch0(port, subport10, q10, q10_base);
2130         rte_sched_port_enqueue_qwa_prefetch0(port, subport11, q11, q11_base);
2131
2132         r20 = rte_sched_port_enqueue_qwa(port, subport20,
2133                         q20, q20_base, pkt20);
2134         r21 = rte_sched_port_enqueue_qwa(port, subport21,
2135                         q21, q21_base, pkt21);
2136         result += r20 + r21;
2137
2138         subport_last = rte_sched_port_subport(port, pkt_last);
2139         q_last = rte_sched_port_enqueue_qptrs_prefetch0(subport_last,
2140                                 pkt_last, subport_qmask);
2141
2142         q00_base = rte_sched_subport_pipe_qbase(subport00, q00);
2143         q01_base = rte_sched_subport_pipe_qbase(subport01, q01);
2144         rte_sched_port_enqueue_qwa_prefetch0(port, subport00, q00, q00_base);
2145         rte_sched_port_enqueue_qwa_prefetch0(port, subport01, q01, q01_base);
2146
2147         r10 = rte_sched_port_enqueue_qwa(port, subport10, q10,
2148                         q10_base, pkt10);
2149         r11 = rte_sched_port_enqueue_qwa(port, subport11, q11,
2150                         q11_base, pkt11);
2151         result += r10 + r11;
2152
2153         q_last_base = rte_sched_subport_pipe_qbase(subport_last, q_last);
2154         rte_sched_port_enqueue_qwa_prefetch0(port, subport_last,
2155                 q_last, q_last_base);
2156
2157         r00 = rte_sched_port_enqueue_qwa(port, subport00, q00,
2158                         q00_base, pkt00);
2159         r01 = rte_sched_port_enqueue_qwa(port, subport01, q01,
2160                         q01_base, pkt01);
2161         result += r00 + r01;
2162
2163         if (n_pkts & 1) {
2164                 r_last = rte_sched_port_enqueue_qwa(port, subport_last,
2165                                         q_last, q_last_base, pkt_last);
2166                 result += r_last;
2167         }
2168
2169         return result;
2170 }
2171
2172 #ifndef RTE_SCHED_SUBPORT_TC_OV
2173
2174 static inline void
2175 grinder_credits_update(struct rte_sched_port *port,
2176         struct rte_sched_subport *subport, uint32_t pos)
2177 {
2178         struct rte_sched_grinder *grinder = subport->grinder + pos;
2179         struct rte_sched_pipe *pipe = grinder->pipe;
2180         struct rte_sched_pipe_profile *params = grinder->pipe_params;
2181         struct rte_sched_subport_profile *sp = grinder->subport_params;
2182         uint64_t n_periods;
2183         uint32_t i;
2184
2185         /* Subport TB */
2186         n_periods = (port->time - subport->tb_time) / sp->tb_period;
2187         subport->tb_credits += n_periods * sp->tb_credits_per_period;
2188         subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size);
2189         subport->tb_time += n_periods * sp->tb_period;
2190
2191         /* Pipe TB */
2192         n_periods = (port->time - pipe->tb_time) / params->tb_period;
2193         pipe->tb_credits += n_periods * params->tb_credits_per_period;
2194         pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
2195         pipe->tb_time += n_periods * params->tb_period;
2196
2197         /* Subport TCs */
2198         if (unlikely(port->time >= subport->tc_time)) {
2199                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2200                         subport->tc_credits[i] = sp->tc_credits_per_period[i];
2201
2202                 subport->tc_time = port->time + sp->tc_period;
2203         }
2204
2205         /* Pipe TCs */
2206         if (unlikely(port->time >= pipe->tc_time)) {
2207                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2208                         pipe->tc_credits[i] = params->tc_credits_per_period[i];
2209
2210                 pipe->tc_time = port->time + params->tc_period;
2211         }
2212 }
2213
2214 #else
2215
2216 static inline uint64_t
2217 grinder_tc_ov_credits_update(struct rte_sched_port *port,
2218         struct rte_sched_subport *subport, uint32_t pos)
2219 {
2220         struct rte_sched_grinder *grinder = subport->grinder + pos;
2221         struct rte_sched_subport_profile *sp = grinder->subport_params;
2222         uint64_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2223         uint64_t tc_consumption = 0, tc_ov_consumption_max;
2224         uint64_t tc_ov_wm = subport->tc_ov_wm;
2225         uint32_t i;
2226
2227         if (subport->tc_ov == 0)
2228                 return subport->tc_ov_wm_max;
2229
2230         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2231                 tc_ov_consumption[i] = sp->tc_credits_per_period[i]
2232                                         -  subport->tc_credits[i];
2233                 tc_consumption += tc_ov_consumption[i];
2234         }
2235
2236         tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] =
2237         sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2238                 subport->tc_credits[RTE_SCHED_TRAFFIC_CLASS_BE];
2239
2240         tc_ov_consumption_max =
2241         sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2242                         tc_consumption;
2243
2244         if (tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] >
2245                 (tc_ov_consumption_max - port->mtu)) {
2246                 tc_ov_wm  -= tc_ov_wm >> 7;
2247                 if (tc_ov_wm < subport->tc_ov_wm_min)
2248                         tc_ov_wm = subport->tc_ov_wm_min;
2249
2250                 return tc_ov_wm;
2251         }
2252
2253         tc_ov_wm += (tc_ov_wm >> 7) + 1;
2254         if (tc_ov_wm > subport->tc_ov_wm_max)
2255                 tc_ov_wm = subport->tc_ov_wm_max;
2256
2257         return tc_ov_wm;
2258 }
2259
2260 static inline void
2261 grinder_credits_update(struct rte_sched_port *port,
2262         struct rte_sched_subport *subport, uint32_t pos)
2263 {
2264         struct rte_sched_grinder *grinder = subport->grinder + pos;
2265         struct rte_sched_pipe *pipe = grinder->pipe;
2266         struct rte_sched_pipe_profile *params = grinder->pipe_params;
2267         struct rte_sched_subport_profile *sp = grinder->subport_params;
2268         uint64_t n_periods;
2269         uint32_t i;
2270
2271         /* Subport TB */
2272         n_periods = (port->time - subport->tb_time) / sp->tb_period;
2273         subport->tb_credits += n_periods * sp->tb_credits_per_period;
2274         subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size);
2275         subport->tb_time += n_periods * sp->tb_period;
2276
2277         /* Pipe TB */
2278         n_periods = (port->time - pipe->tb_time) / params->tb_period;
2279         pipe->tb_credits += n_periods * params->tb_credits_per_period;
2280         pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
2281         pipe->tb_time += n_periods * params->tb_period;
2282
2283         /* Subport TCs */
2284         if (unlikely(port->time >= subport->tc_time)) {
2285                 subport->tc_ov_wm =
2286                         grinder_tc_ov_credits_update(port, subport, pos);
2287
2288                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2289                         subport->tc_credits[i] = sp->tc_credits_per_period[i];
2290
2291                 subport->tc_time = port->time + sp->tc_period;
2292                 subport->tc_ov_period_id++;
2293         }
2294
2295         /* Pipe TCs */
2296         if (unlikely(port->time >= pipe->tc_time)) {
2297                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2298                         pipe->tc_credits[i] = params->tc_credits_per_period[i];
2299                 pipe->tc_time = port->time + params->tc_period;
2300         }
2301
2302         /* Pipe TCs - Oversubscription */
2303         if (unlikely(pipe->tc_ov_period_id != subport->tc_ov_period_id)) {
2304                 pipe->tc_ov_credits = subport->tc_ov_wm * params->tc_ov_weight;
2305
2306                 pipe->tc_ov_period_id = subport->tc_ov_period_id;
2307         }
2308 }
2309
2310 #endif /* RTE_SCHED_TS_CREDITS_UPDATE, RTE_SCHED_SUBPORT_TC_OV */
2311
2312
2313 #ifndef RTE_SCHED_SUBPORT_TC_OV
2314
2315 static inline int
2316 grinder_credits_check(struct rte_sched_port *port,
2317         struct rte_sched_subport *subport, uint32_t pos)
2318 {
2319         struct rte_sched_grinder *grinder = subport->grinder + pos;
2320         struct rte_sched_pipe *pipe = grinder->pipe;
2321         struct rte_mbuf *pkt = grinder->pkt;
2322         uint32_t tc_index = grinder->tc_index;
2323         uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
2324         uint64_t subport_tb_credits = subport->tb_credits;
2325         uint64_t subport_tc_credits = subport->tc_credits[tc_index];
2326         uint64_t pipe_tb_credits = pipe->tb_credits;
2327         uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
2328         int enough_credits;
2329
2330         /* Check queue credits */
2331         enough_credits = (pkt_len <= subport_tb_credits) &&
2332                 (pkt_len <= subport_tc_credits) &&
2333                 (pkt_len <= pipe_tb_credits) &&
2334                 (pkt_len <= pipe_tc_credits);
2335
2336         if (!enough_credits)
2337                 return 0;
2338
2339         /* Update port credits */
2340         subport->tb_credits -= pkt_len;
2341         subport->tc_credits[tc_index] -= pkt_len;
2342         pipe->tb_credits -= pkt_len;
2343         pipe->tc_credits[tc_index] -= pkt_len;
2344
2345         return 1;
2346 }
2347
2348 #else
2349
2350 static inline int
2351 grinder_credits_check(struct rte_sched_port *port,
2352         struct rte_sched_subport *subport, uint32_t pos)
2353 {
2354         struct rte_sched_grinder *grinder = subport->grinder + pos;
2355         struct rte_sched_pipe *pipe = grinder->pipe;
2356         struct rte_mbuf *pkt = grinder->pkt;
2357         uint32_t tc_index = grinder->tc_index;
2358         uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
2359         uint64_t subport_tb_credits = subport->tb_credits;
2360         uint64_t subport_tc_credits = subport->tc_credits[tc_index];
2361         uint64_t pipe_tb_credits = pipe->tb_credits;
2362         uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
2363         uint64_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2364         uint64_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0};
2365         uint64_t pipe_tc_ov_credits;
2366         uint32_t i;
2367         int enough_credits;
2368
2369         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2370                 pipe_tc_ov_mask1[i] = ~0LLU;
2371
2372         pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe->tc_ov_credits;
2373         pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = ~0LLU;
2374         pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
2375
2376         /* Check pipe and subport credits */
2377         enough_credits = (pkt_len <= subport_tb_credits) &&
2378                 (pkt_len <= subport_tc_credits) &&
2379                 (pkt_len <= pipe_tb_credits) &&
2380                 (pkt_len <= pipe_tc_credits) &&
2381                 (pkt_len <= pipe_tc_ov_credits);
2382
2383         if (!enough_credits)
2384                 return 0;
2385
2386         /* Update pipe and subport credits */
2387         subport->tb_credits -= pkt_len;
2388         subport->tc_credits[tc_index] -= pkt_len;
2389         pipe->tb_credits -= pkt_len;
2390         pipe->tc_credits[tc_index] -= pkt_len;
2391         pipe->tc_ov_credits -= pipe_tc_ov_mask2[tc_index] & pkt_len;
2392
2393         return 1;
2394 }
2395
2396 #endif /* RTE_SCHED_SUBPORT_TC_OV */
2397
2398
2399 static inline int
2400 grinder_schedule(struct rte_sched_port *port,
2401         struct rte_sched_subport *subport, uint32_t pos)
2402 {
2403         struct rte_sched_grinder *grinder = subport->grinder + pos;
2404         struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
2405         struct rte_mbuf *pkt = grinder->pkt;
2406         uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
2407         uint32_t be_tc_active;
2408
2409         if (!grinder_credits_check(port, subport, pos))
2410                 return 0;
2411
2412         /* Advance port time */
2413         port->time += pkt_len;
2414
2415         /* Send packet */
2416         port->pkts_out[port->n_pkts_out++] = pkt;
2417         queue->qr++;
2418
2419         be_tc_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE) ? ~0x0 : 0x0;
2420         grinder->wrr_tokens[grinder->qpos] +=
2421                 (pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
2422
2423         if (queue->qr == queue->qw) {
2424                 uint32_t qindex = grinder->qindex[grinder->qpos];
2425
2426                 rte_bitmap_clear(subport->bmp, qindex);
2427                 grinder->qmask &= ~(1 << grinder->qpos);
2428                 if (be_tc_active)
2429                         grinder->wrr_mask[grinder->qpos] = 0;
2430                 rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
2431         }
2432
2433         /* Reset pipe loop detection */
2434         subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
2435         grinder->productive = 1;
2436
2437         return 1;
2438 }
2439
2440 #ifdef SCHED_VECTOR_SSE4
2441
2442 static inline int
2443 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2444 {
2445         __m128i index = _mm_set1_epi32(base_pipe);
2446         __m128i pipes = _mm_load_si128((__m128i *)subport->grinder_base_bmp_pos);
2447         __m128i res = _mm_cmpeq_epi32(pipes, index);
2448
2449         pipes = _mm_load_si128((__m128i *)(subport->grinder_base_bmp_pos + 4));
2450         pipes = _mm_cmpeq_epi32(pipes, index);
2451         res = _mm_or_si128(res, pipes);
2452
2453         if (_mm_testz_si128(res, res))
2454                 return 0;
2455
2456         return 1;
2457 }
2458
2459 #elif defined(SCHED_VECTOR_NEON)
2460
2461 static inline int
2462 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2463 {
2464         uint32x4_t index, pipes;
2465         uint32_t *pos = (uint32_t *)subport->grinder_base_bmp_pos;
2466
2467         index = vmovq_n_u32(base_pipe);
2468         pipes = vld1q_u32(pos);
2469         if (!vminvq_u32(veorq_u32(pipes, index)))
2470                 return 1;
2471
2472         pipes = vld1q_u32(pos + 4);
2473         if (!vminvq_u32(veorq_u32(pipes, index)))
2474                 return 1;
2475
2476         return 0;
2477 }
2478
2479 #else
2480
2481 static inline int
2482 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2483 {
2484         uint32_t i;
2485
2486         for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) {
2487                 if (subport->grinder_base_bmp_pos[i] == base_pipe)
2488                         return 1;
2489         }
2490
2491         return 0;
2492 }
2493
2494 #endif /* RTE_SCHED_OPTIMIZATIONS */
2495
2496 static inline void
2497 grinder_pcache_populate(struct rte_sched_subport *subport,
2498         uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab)
2499 {
2500         struct rte_sched_grinder *grinder = subport->grinder + pos;
2501         uint16_t w[4];
2502
2503         grinder->pcache_w = 0;
2504         grinder->pcache_r = 0;
2505
2506         w[0] = (uint16_t) bmp_slab;
2507         w[1] = (uint16_t) (bmp_slab >> 16);
2508         w[2] = (uint16_t) (bmp_slab >> 32);
2509         w[3] = (uint16_t) (bmp_slab >> 48);
2510
2511         grinder->pcache_qmask[grinder->pcache_w] = w[0];
2512         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos;
2513         grinder->pcache_w += (w[0] != 0);
2514
2515         grinder->pcache_qmask[grinder->pcache_w] = w[1];
2516         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 16;
2517         grinder->pcache_w += (w[1] != 0);
2518
2519         grinder->pcache_qmask[grinder->pcache_w] = w[2];
2520         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 32;
2521         grinder->pcache_w += (w[2] != 0);
2522
2523         grinder->pcache_qmask[grinder->pcache_w] = w[3];
2524         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 48;
2525         grinder->pcache_w += (w[3] != 0);
2526 }
2527
2528 static inline void
2529 grinder_tccache_populate(struct rte_sched_subport *subport,
2530         uint32_t pos, uint32_t qindex, uint16_t qmask)
2531 {
2532         struct rte_sched_grinder *grinder = subport->grinder + pos;
2533         uint8_t b, i;
2534
2535         grinder->tccache_w = 0;
2536         grinder->tccache_r = 0;
2537
2538         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2539                 b = (uint8_t) ((qmask >> i) & 0x1);
2540                 grinder->tccache_qmask[grinder->tccache_w] = b;
2541                 grinder->tccache_qindex[grinder->tccache_w] = qindex + i;
2542                 grinder->tccache_w += (b != 0);
2543         }
2544
2545         b = (uint8_t) (qmask >> (RTE_SCHED_TRAFFIC_CLASS_BE));
2546         grinder->tccache_qmask[grinder->tccache_w] = b;
2547         grinder->tccache_qindex[grinder->tccache_w] = qindex +
2548                 RTE_SCHED_TRAFFIC_CLASS_BE;
2549         grinder->tccache_w += (b != 0);
2550 }
2551
2552 static inline int
2553 grinder_next_tc(struct rte_sched_port *port,
2554         struct rte_sched_subport *subport, uint32_t pos)
2555 {
2556         struct rte_sched_grinder *grinder = subport->grinder + pos;
2557         struct rte_mbuf **qbase;
2558         uint32_t qindex;
2559         uint16_t qsize;
2560
2561         if (grinder->tccache_r == grinder->tccache_w)
2562                 return 0;
2563
2564         qindex = grinder->tccache_qindex[grinder->tccache_r];
2565         qbase = rte_sched_subport_pipe_qbase(subport, qindex);
2566         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
2567
2568         grinder->tc_index = rte_sched_port_pipe_tc(port, qindex);
2569         grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
2570         grinder->qsize = qsize;
2571
2572         if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2573                 grinder->queue[0] = subport->queue + qindex;
2574                 grinder->qbase[0] = qbase;
2575                 grinder->qindex[0] = qindex;
2576                 grinder->tccache_r++;
2577
2578                 return 1;
2579         }
2580
2581         grinder->queue[0] = subport->queue + qindex;
2582         grinder->queue[1] = subport->queue + qindex + 1;
2583         grinder->queue[2] = subport->queue + qindex + 2;
2584         grinder->queue[3] = subport->queue + qindex + 3;
2585
2586         grinder->qbase[0] = qbase;
2587         grinder->qbase[1] = qbase + qsize;
2588         grinder->qbase[2] = qbase + 2 * qsize;
2589         grinder->qbase[3] = qbase + 3 * qsize;
2590
2591         grinder->qindex[0] = qindex;
2592         grinder->qindex[1] = qindex + 1;
2593         grinder->qindex[2] = qindex + 2;
2594         grinder->qindex[3] = qindex + 3;
2595
2596         grinder->tccache_r++;
2597         return 1;
2598 }
2599
2600 static inline int
2601 grinder_next_pipe(struct rte_sched_port *port,
2602         struct rte_sched_subport *subport, uint32_t pos)
2603 {
2604         struct rte_sched_grinder *grinder = subport->grinder + pos;
2605         uint32_t pipe_qindex;
2606         uint16_t pipe_qmask;
2607
2608         if (grinder->pcache_r < grinder->pcache_w) {
2609                 pipe_qmask = grinder->pcache_qmask[grinder->pcache_r];
2610                 pipe_qindex = grinder->pcache_qindex[grinder->pcache_r];
2611                 grinder->pcache_r++;
2612         } else {
2613                 uint64_t bmp_slab = 0;
2614                 uint32_t bmp_pos = 0;
2615
2616                 /* Get another non-empty pipe group */
2617                 if (unlikely(rte_bitmap_scan(subport->bmp, &bmp_pos, &bmp_slab) <= 0))
2618                         return 0;
2619
2620 #ifdef RTE_SCHED_DEBUG
2621                 debug_check_queue_slab(subport, bmp_pos, bmp_slab);
2622 #endif
2623
2624                 /* Return if pipe group already in one of the other grinders */
2625                 subport->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID;
2626                 if (unlikely(grinder_pipe_exists(subport, bmp_pos)))
2627                         return 0;
2628
2629                 subport->grinder_base_bmp_pos[pos] = bmp_pos;
2630
2631                 /* Install new pipe group into grinder's pipe cache */
2632                 grinder_pcache_populate(subport, pos, bmp_pos, bmp_slab);
2633
2634                 pipe_qmask = grinder->pcache_qmask[0];
2635                 pipe_qindex = grinder->pcache_qindex[0];
2636                 grinder->pcache_r = 1;
2637         }
2638
2639         /* Install new pipe in the grinder */
2640         grinder->pindex = pipe_qindex >> 4;
2641         grinder->subport = subport;
2642         grinder->pipe = subport->pipe + grinder->pindex;
2643         grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */
2644         grinder->productive = 0;
2645
2646         grinder_tccache_populate(subport, pos, pipe_qindex, pipe_qmask);
2647         grinder_next_tc(port, subport, pos);
2648
2649         /* Check for pipe exhaustion */
2650         if (grinder->pindex == subport->pipe_loop) {
2651                 subport->pipe_exhaustion = 1;
2652                 subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
2653         }
2654
2655         return 1;
2656 }
2657
2658
2659 static inline void
2660 grinder_wrr_load(struct rte_sched_subport *subport, uint32_t pos)
2661 {
2662         struct rte_sched_grinder *grinder = subport->grinder + pos;
2663         struct rte_sched_pipe *pipe = grinder->pipe;
2664         struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params;
2665         uint32_t qmask = grinder->qmask;
2666
2667         grinder->wrr_tokens[0] =
2668                 ((uint16_t) pipe->wrr_tokens[0]) << RTE_SCHED_WRR_SHIFT;
2669         grinder->wrr_tokens[1] =
2670                 ((uint16_t) pipe->wrr_tokens[1]) << RTE_SCHED_WRR_SHIFT;
2671         grinder->wrr_tokens[2] =
2672                 ((uint16_t) pipe->wrr_tokens[2]) << RTE_SCHED_WRR_SHIFT;
2673         grinder->wrr_tokens[3] =
2674                 ((uint16_t) pipe->wrr_tokens[3]) << RTE_SCHED_WRR_SHIFT;
2675
2676         grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
2677         grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
2678         grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
2679         grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
2680
2681         grinder->wrr_cost[0] = pipe_params->wrr_cost[0];
2682         grinder->wrr_cost[1] = pipe_params->wrr_cost[1];
2683         grinder->wrr_cost[2] = pipe_params->wrr_cost[2];
2684         grinder->wrr_cost[3] = pipe_params->wrr_cost[3];
2685 }
2686
2687 static inline void
2688 grinder_wrr_store(struct rte_sched_subport *subport, uint32_t pos)
2689 {
2690         struct rte_sched_grinder *grinder = subport->grinder + pos;
2691         struct rte_sched_pipe *pipe = grinder->pipe;
2692
2693         pipe->wrr_tokens[0] =
2694                         (grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >>
2695                                 RTE_SCHED_WRR_SHIFT;
2696         pipe->wrr_tokens[1] =
2697                         (grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >>
2698                                 RTE_SCHED_WRR_SHIFT;
2699         pipe->wrr_tokens[2] =
2700                         (grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >>
2701                                 RTE_SCHED_WRR_SHIFT;
2702         pipe->wrr_tokens[3] =
2703                         (grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >>
2704                                 RTE_SCHED_WRR_SHIFT;
2705 }
2706
2707 static inline void
2708 grinder_wrr(struct rte_sched_subport *subport, uint32_t pos)
2709 {
2710         struct rte_sched_grinder *grinder = subport->grinder + pos;
2711         uint16_t wrr_tokens_min;
2712
2713         grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0];
2714         grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1];
2715         grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2];
2716         grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3];
2717
2718         grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens);
2719         wrr_tokens_min = grinder->wrr_tokens[grinder->qpos];
2720
2721         grinder->wrr_tokens[0] -= wrr_tokens_min;
2722         grinder->wrr_tokens[1] -= wrr_tokens_min;
2723         grinder->wrr_tokens[2] -= wrr_tokens_min;
2724         grinder->wrr_tokens[3] -= wrr_tokens_min;
2725 }
2726
2727
2728 #define grinder_evict(subport, pos)
2729
2730 static inline void
2731 grinder_prefetch_pipe(struct rte_sched_subport *subport, uint32_t pos)
2732 {
2733         struct rte_sched_grinder *grinder = subport->grinder + pos;
2734
2735         rte_prefetch0(grinder->pipe);
2736         rte_prefetch0(grinder->queue[0]);
2737 }
2738
2739 static inline void
2740 grinder_prefetch_tc_queue_arrays(struct rte_sched_subport *subport, uint32_t pos)
2741 {
2742         struct rte_sched_grinder *grinder = subport->grinder + pos;
2743         uint16_t qsize, qr[RTE_SCHED_MAX_QUEUES_PER_TC];
2744
2745         qsize = grinder->qsize;
2746         grinder->qpos = 0;
2747
2748         if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2749                 qr[0] = grinder->queue[0]->qr & (qsize - 1);
2750
2751                 rte_prefetch0(grinder->qbase[0] + qr[0]);
2752                 return;
2753         }
2754
2755         qr[0] = grinder->queue[0]->qr & (qsize - 1);
2756         qr[1] = grinder->queue[1]->qr & (qsize - 1);
2757         qr[2] = grinder->queue[2]->qr & (qsize - 1);
2758         qr[3] = grinder->queue[3]->qr & (qsize - 1);
2759
2760         rte_prefetch0(grinder->qbase[0] + qr[0]);
2761         rte_prefetch0(grinder->qbase[1] + qr[1]);
2762
2763         grinder_wrr_load(subport, pos);
2764         grinder_wrr(subport, pos);
2765
2766         rte_prefetch0(grinder->qbase[2] + qr[2]);
2767         rte_prefetch0(grinder->qbase[3] + qr[3]);
2768 }
2769
2770 static inline void
2771 grinder_prefetch_mbuf(struct rte_sched_subport *subport, uint32_t pos)
2772 {
2773         struct rte_sched_grinder *grinder = subport->grinder + pos;
2774         uint32_t qpos = grinder->qpos;
2775         struct rte_mbuf **qbase = grinder->qbase[qpos];
2776         uint16_t qsize = grinder->qsize;
2777         uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1);
2778
2779         grinder->pkt = qbase[qr];
2780         rte_prefetch0(grinder->pkt);
2781
2782         if (unlikely((qr & 0x7) == 7)) {
2783                 uint16_t qr_next = (grinder->queue[qpos]->qr + 1) & (qsize - 1);
2784
2785                 rte_prefetch0(qbase + qr_next);
2786         }
2787 }
2788
2789 static inline uint32_t
2790 grinder_handle(struct rte_sched_port *port,
2791         struct rte_sched_subport *subport, uint32_t pos)
2792 {
2793         struct rte_sched_grinder *grinder = subport->grinder + pos;
2794
2795         switch (grinder->state) {
2796         case e_GRINDER_PREFETCH_PIPE:
2797         {
2798                 if (grinder_next_pipe(port, subport, pos)) {
2799                         grinder_prefetch_pipe(subport, pos);
2800                         subport->busy_grinders++;
2801
2802                         grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2803                         return 0;
2804                 }
2805
2806                 return 0;
2807         }
2808
2809         case e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS:
2810         {
2811                 struct rte_sched_pipe *pipe = grinder->pipe;
2812
2813                 grinder->pipe_params = subport->pipe_profiles + pipe->profile;
2814                 grinder->subport_params = port->subport_profiles +
2815                                                 subport->profile;
2816
2817                 grinder_prefetch_tc_queue_arrays(subport, pos);
2818                 grinder_credits_update(port, subport, pos);
2819
2820                 grinder->state = e_GRINDER_PREFETCH_MBUF;
2821                 return 0;
2822         }
2823
2824         case e_GRINDER_PREFETCH_MBUF:
2825         {
2826                 grinder_prefetch_mbuf(subport, pos);
2827
2828                 grinder->state = e_GRINDER_READ_MBUF;
2829                 return 0;
2830         }
2831
2832         case e_GRINDER_READ_MBUF:
2833         {
2834                 uint32_t wrr_active, result = 0;
2835
2836                 result = grinder_schedule(port, subport, pos);
2837
2838                 wrr_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE);
2839
2840                 /* Look for next packet within the same TC */
2841                 if (result && grinder->qmask) {
2842                         if (wrr_active)
2843                                 grinder_wrr(subport, pos);
2844
2845                         grinder_prefetch_mbuf(subport, pos);
2846
2847                         return 1;
2848                 }
2849
2850                 if (wrr_active)
2851                         grinder_wrr_store(subport, pos);
2852
2853                 /* Look for another active TC within same pipe */
2854                 if (grinder_next_tc(port, subport, pos)) {
2855                         grinder_prefetch_tc_queue_arrays(subport, pos);
2856
2857                         grinder->state = e_GRINDER_PREFETCH_MBUF;
2858                         return result;
2859                 }
2860
2861                 if (grinder->productive == 0 &&
2862                     subport->pipe_loop == RTE_SCHED_PIPE_INVALID)
2863                         subport->pipe_loop = grinder->pindex;
2864
2865                 grinder_evict(subport, pos);
2866
2867                 /* Look for another active pipe */
2868                 if (grinder_next_pipe(port, subport, pos)) {
2869                         grinder_prefetch_pipe(subport, pos);
2870
2871                         grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2872                         return result;
2873                 }
2874
2875                 /* No active pipe found */
2876                 subport->busy_grinders--;
2877
2878                 grinder->state = e_GRINDER_PREFETCH_PIPE;
2879                 return result;
2880         }
2881
2882         default:
2883                 rte_panic("Algorithmic error (invalid state)\n");
2884                 return 0;
2885         }
2886 }
2887
2888 static inline void
2889 rte_sched_port_time_resync(struct rte_sched_port *port)
2890 {
2891         uint64_t cycles = rte_get_tsc_cycles();
2892         uint64_t cycles_diff;
2893         uint64_t bytes_diff;
2894         uint32_t i;
2895
2896         if (cycles < port->time_cpu_cycles)
2897                 port->time_cpu_cycles = 0;
2898
2899         cycles_diff = cycles - port->time_cpu_cycles;
2900         /* Compute elapsed time in bytes */
2901         bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT,
2902                                            port->inv_cycles_per_byte);
2903
2904         /* Advance port time */
2905         port->time_cpu_cycles +=
2906                 (bytes_diff * port->cycles_per_byte) >> RTE_SCHED_TIME_SHIFT;
2907         port->time_cpu_bytes += bytes_diff;
2908         if (port->time < port->time_cpu_bytes)
2909                 port->time = port->time_cpu_bytes;
2910
2911         /* Reset pipe loop detection */
2912         for (i = 0; i < port->n_subports_per_port; i++)
2913                 port->subports[i]->pipe_loop = RTE_SCHED_PIPE_INVALID;
2914 }
2915
2916 static inline int
2917 rte_sched_port_exceptions(struct rte_sched_subport *subport, int second_pass)
2918 {
2919         int exceptions;
2920
2921         /* Check if any exception flag is set */
2922         exceptions = (second_pass && subport->busy_grinders == 0) ||
2923                 (subport->pipe_exhaustion == 1);
2924
2925         /* Clear exception flags */
2926         subport->pipe_exhaustion = 0;
2927
2928         return exceptions;
2929 }
2930
2931 int
2932 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
2933 {
2934         struct rte_sched_subport *subport;
2935         uint32_t subport_id = port->subport_id;
2936         uint32_t i, n_subports = 0, count;
2937
2938         port->pkts_out = pkts;
2939         port->n_pkts_out = 0;
2940
2941         rte_sched_port_time_resync(port);
2942
2943         /* Take each queue in the grinder one step further */
2944         for (i = 0, count = 0; ; i++)  {
2945                 subport = port->subports[subport_id];
2946
2947                 count += grinder_handle(port, subport,
2948                                 i & (RTE_SCHED_PORT_N_GRINDERS - 1));
2949
2950                 if (count == n_pkts) {
2951                         subport_id++;
2952
2953                         if (subport_id == port->n_subports_per_port)
2954                                 subport_id = 0;
2955
2956                         port->subport_id = subport_id;
2957                         break;
2958                 }
2959
2960                 if (rte_sched_port_exceptions(subport, i >= RTE_SCHED_PORT_N_GRINDERS)) {
2961                         i = 0;
2962                         subport_id++;
2963                         n_subports++;
2964                 }
2965
2966                 if (subport_id == port->n_subports_per_port)
2967                         subport_id = 0;
2968
2969                 if (n_subports == port->n_subports_per_port) {
2970                         port->subport_id = subport_id;
2971                         break;
2972                 }
2973         }
2974
2975         return count;
2976 }