sched: update queue stats read for config flexibility
[dpdk.git] / lib / librte_sched / rte_sched.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <string.h>
7
8 #include <rte_common.h>
9 #include <rte_log.h>
10 #include <rte_memory.h>
11 #include <rte_malloc.h>
12 #include <rte_cycles.h>
13 #include <rte_prefetch.h>
14 #include <rte_branch_prediction.h>
15 #include <rte_mbuf.h>
16 #include <rte_bitmap.h>
17 #include <rte_reciprocal.h>
18
19 #include "rte_sched.h"
20 #include "rte_sched_common.h"
21 #include "rte_approx.h"
22
23 #ifdef __INTEL_COMPILER
24 #pragma warning(disable:2259) /* conversion may lose significant bits */
25 #endif
26
27 #ifdef RTE_SCHED_VECTOR
28 #include <rte_vect.h>
29
30 #ifdef RTE_ARCH_X86
31 #define SCHED_VECTOR_SSE4
32 #elif defined(RTE_MACHINE_CPUFLAG_NEON)
33 #define SCHED_VECTOR_NEON
34 #endif
35
36 #endif
37
38 #define RTE_SCHED_TB_RATE_CONFIG_ERR          (1e-7)
39 #define RTE_SCHED_WRR_SHIFT                   3
40 #define RTE_SCHED_MAX_QUEUES_PER_TC           RTE_SCHED_BE_QUEUES_PER_PIPE
41 #define RTE_SCHED_GRINDER_PCACHE_SIZE         (64 / RTE_SCHED_QUEUES_PER_PIPE)
42 #define RTE_SCHED_PIPE_INVALID                UINT32_MAX
43 #define RTE_SCHED_BMP_POS_INVALID             UINT32_MAX
44
45 /* Scaling for cycles_per_byte calculation
46  * Chosen so that minimum rate is 480 bit/sec
47  */
48 #define RTE_SCHED_TIME_SHIFT                  8
49
50 struct rte_sched_pipe_profile {
51         /* Token bucket (TB) */
52         uint32_t tb_period;
53         uint32_t tb_credits_per_period;
54         uint32_t tb_size;
55
56         /* Pipe traffic classes */
57         uint32_t tc_period;
58         uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
59         uint8_t tc_ov_weight;
60
61         /* Pipe best-effort traffic class queues */
62         uint8_t  wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
63 };
64
65 struct rte_sched_pipe {
66         /* Token bucket (TB) */
67         uint64_t tb_time; /* time of last update */
68         uint32_t tb_credits;
69
70         /* Pipe profile and flags */
71         uint32_t profile;
72
73         /* Traffic classes (TCs) */
74         uint64_t tc_time; /* time of next update */
75         uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
76
77         /* Weighted Round Robin (WRR) */
78         uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
79
80         /* TC oversubscription */
81         uint32_t tc_ov_credits;
82         uint8_t tc_ov_period_id;
83 } __rte_cache_aligned;
84
85 struct rte_sched_queue {
86         uint16_t qw;
87         uint16_t qr;
88 };
89
90 struct rte_sched_queue_extra {
91         struct rte_sched_queue_stats stats;
92 #ifdef RTE_SCHED_RED
93         struct rte_red red;
94 #endif
95 };
96
97 enum grinder_state {
98         e_GRINDER_PREFETCH_PIPE = 0,
99         e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS,
100         e_GRINDER_PREFETCH_MBUF,
101         e_GRINDER_READ_MBUF
102 };
103
104 struct rte_sched_grinder {
105         /* Pipe cache */
106         uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE];
107         uint32_t pcache_qindex[RTE_SCHED_GRINDER_PCACHE_SIZE];
108         uint32_t pcache_w;
109         uint32_t pcache_r;
110
111         /* Current pipe */
112         enum grinder_state state;
113         uint32_t productive;
114         uint32_t pindex;
115         struct rte_sched_subport *subport;
116         struct rte_sched_pipe *pipe;
117         struct rte_sched_pipe_profile *pipe_params;
118
119         /* TC cache */
120         uint8_t tccache_qmask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
121         uint32_t tccache_qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
122         uint32_t tccache_w;
123         uint32_t tccache_r;
124
125         /* Current TC */
126         uint32_t tc_index;
127         struct rte_sched_queue *queue[RTE_SCHED_MAX_QUEUES_PER_TC];
128         struct rte_mbuf **qbase[RTE_SCHED_MAX_QUEUES_PER_TC];
129         uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC];
130         uint16_t qsize;
131         uint32_t qmask;
132         uint32_t qpos;
133         struct rte_mbuf *pkt;
134
135         /* WRR */
136         uint16_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
137         uint16_t wrr_mask[RTE_SCHED_BE_QUEUES_PER_PIPE];
138         uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
139 };
140
141 struct rte_sched_subport {
142         /* Token bucket (TB) */
143         uint64_t tb_time; /* time of last update */
144         uint32_t tb_period;
145         uint32_t tb_credits_per_period;
146         uint32_t tb_size;
147         uint32_t tb_credits;
148
149         /* Traffic classes (TCs) */
150         uint64_t tc_time; /* time of next update */
151         uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
152         uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
153         uint32_t tc_period;
154
155         /* TC oversubscription */
156         uint32_t tc_ov_wm;
157         uint32_t tc_ov_wm_min;
158         uint32_t tc_ov_wm_max;
159         uint8_t tc_ov_period_id;
160         uint8_t tc_ov;
161         uint32_t tc_ov_n;
162         double tc_ov_rate;
163
164         /* Statistics */
165         struct rte_sched_subport_stats stats;
166
167         /* Subport pipes */
168         uint32_t n_pipes_per_subport_enabled;
169         uint32_t n_pipe_profiles;
170         uint32_t n_max_pipe_profiles;
171
172         /* Pipe best-effort TC rate */
173         uint32_t pipe_tc_be_rate_max;
174
175         /* Pipe queues size */
176         uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
177
178 #ifdef RTE_SCHED_RED
179         struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
180 #endif
181
182         /* Scheduling loop detection */
183         uint32_t pipe_loop;
184         uint32_t pipe_exhaustion;
185
186         /* Bitmap */
187         struct rte_bitmap *bmp;
188         uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16;
189
190         /* Grinders */
191         struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS];
192         uint32_t busy_grinders;
193
194         /* Queue base calculation */
195         uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE];
196         uint32_t qsize_sum;
197
198         struct rte_sched_pipe *pipe;
199         struct rte_sched_queue *queue;
200         struct rte_sched_queue_extra *queue_extra;
201         struct rte_sched_pipe_profile *pipe_profiles;
202         uint8_t *bmp_array;
203         struct rte_mbuf **queue_array;
204         uint8_t memory[0] __rte_cache_aligned;
205 } __rte_cache_aligned;
206
207 struct rte_sched_port {
208         /* User parameters */
209         uint32_t n_subports_per_port;
210         uint32_t n_pipes_per_subport;
211         uint32_t n_pipes_per_subport_log2;
212         uint16_t pipe_queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
213         uint8_t pipe_tc[RTE_SCHED_QUEUES_PER_PIPE];
214         uint8_t tc_queue[RTE_SCHED_QUEUES_PER_PIPE];
215         uint32_t rate;
216         uint32_t mtu;
217         uint32_t frame_overhead;
218         int socket;
219         uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
220         uint32_t n_pipe_profiles;
221         uint32_t n_max_pipe_profiles;
222         uint32_t pipe_tc_be_rate_max;
223 #ifdef RTE_SCHED_RED
224         struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
225 #endif
226
227         /* Timing */
228         uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cyles */
229         uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
230         uint64_t time;                /* Current NIC TX time measured in bytes */
231         struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
232
233         /* Scheduling loop detection */
234         uint32_t pipe_loop;
235         uint32_t pipe_exhaustion;
236
237         /* Bitmap */
238         struct rte_bitmap *bmp;
239         uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16;
240
241         /* Grinders */
242         struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS];
243         uint32_t busy_grinders;
244         struct rte_mbuf **pkts_out;
245         uint32_t n_pkts_out;
246         uint32_t subport_id;
247
248         /* Queue base calculation */
249         uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE];
250         uint32_t qsize_sum;
251
252         /* Large data structures */
253         struct rte_sched_subport *subports[0];
254         struct rte_sched_subport *subport;
255         struct rte_sched_pipe *pipe;
256         struct rte_sched_queue *queue;
257         struct rte_sched_queue_extra *queue_extra;
258         struct rte_sched_pipe_profile *pipe_profiles;
259         uint8_t *bmp_array;
260         struct rte_mbuf **queue_array;
261         uint8_t memory[0] __rte_cache_aligned;
262 } __rte_cache_aligned;
263
264 enum rte_sched_port_array {
265         e_RTE_SCHED_PORT_ARRAY_SUBPORT = 0,
266         e_RTE_SCHED_PORT_ARRAY_PIPE,
267         e_RTE_SCHED_PORT_ARRAY_QUEUE,
268         e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA,
269         e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES,
270         e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY,
271         e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY,
272         e_RTE_SCHED_PORT_ARRAY_TOTAL,
273 };
274
275 enum rte_sched_subport_array {
276         e_RTE_SCHED_SUBPORT_ARRAY_PIPE = 0,
277         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE,
278         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA,
279         e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES,
280         e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY,
281         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY,
282         e_RTE_SCHED_SUBPORT_ARRAY_TOTAL,
283 };
284
285 static inline uint32_t
286 rte_sched_subport_pipe_queues(struct rte_sched_subport *subport)
287 {
288         return RTE_SCHED_QUEUES_PER_PIPE * subport->n_pipes_per_subport_enabled;
289 }
290
291 static inline struct rte_mbuf **
292 rte_sched_subport_pipe_qbase(struct rte_sched_subport *subport, uint32_t qindex)
293 {
294         uint32_t pindex = qindex >> 4;
295         uint32_t qpos = qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1);
296
297         return (subport->queue_array + pindex *
298                 subport->qsize_sum + subport->qsize_add[qpos]);
299 }
300
301 static inline uint16_t
302 rte_sched_subport_pipe_qsize(struct rte_sched_port *port,
303 struct rte_sched_subport *subport, uint32_t qindex)
304 {
305         uint32_t tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
306
307         return subport->qsize[tc];
308 }
309
310 static inline uint32_t
311 rte_sched_port_queues_per_port(struct rte_sched_port *port)
312 {
313         uint32_t n_queues = 0, i;
314
315         for (i = 0; i < port->n_subports_per_port; i++)
316                 n_queues += rte_sched_subport_pipe_queues(port->subports[i]);
317
318         return n_queues;
319 }
320
321 static inline uint16_t
322 rte_sched_port_pipe_queue(struct rte_sched_port *port, uint32_t traffic_class)
323 {
324         uint16_t pipe_queue = port->pipe_queue[traffic_class];
325
326         return pipe_queue;
327 }
328
329 static inline uint8_t
330 rte_sched_port_pipe_tc(struct rte_sched_port *port, uint32_t qindex)
331 {
332         uint8_t pipe_tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
333
334         return pipe_tc;
335 }
336
337 static inline uint8_t
338 rte_sched_port_tc_queue(struct rte_sched_port *port, uint32_t qindex)
339 {
340         uint8_t tc_queue = port->tc_queue[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
341
342         return tc_queue;
343 }
344
345 static int
346 pipe_profile_check(struct rte_sched_pipe_params *params,
347         uint32_t rate, uint16_t *qsize)
348 {
349         uint32_t i;
350
351         /* Pipe parameters */
352         if (params == NULL) {
353                 RTE_LOG(ERR, SCHED,
354                         "%s: Incorrect value for parameter params\n", __func__);
355                 return -EINVAL;
356         }
357
358         /* TB rate: non-zero, not greater than port rate */
359         if (params->tb_rate == 0 ||
360                 params->tb_rate > rate) {
361                 RTE_LOG(ERR, SCHED,
362                         "%s: Incorrect value for tb rate\n", __func__);
363                 return -EINVAL;
364         }
365
366         /* TB size: non-zero */
367         if (params->tb_size == 0) {
368                 RTE_LOG(ERR, SCHED,
369                         "%s: Incorrect value for tb size\n", __func__);
370                 return -EINVAL;
371         }
372
373         /* TC rate: non-zero if qsize non-zero, less than pipe rate */
374         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
375                 if ((qsize[i] == 0 && params->tc_rate[i] != 0) ||
376                         (qsize[i] != 0 && (params->tc_rate[i] == 0 ||
377                         params->tc_rate[i] > params->tb_rate))) {
378                         RTE_LOG(ERR, SCHED,
379                                 "%s: Incorrect value for qsize or tc_rate\n", __func__);
380                         return -EINVAL;
381                 }
382         }
383
384         if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 ||
385                 qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
386                 RTE_LOG(ERR, SCHED,
387                         "%s: Incorrect value for be traffic class rate\n", __func__);
388                 return -EINVAL;
389         }
390
391         /* TC period: non-zero */
392         if (params->tc_period == 0) {
393                 RTE_LOG(ERR, SCHED,
394                         "%s: Incorrect value for tc period\n", __func__);
395                 return -EINVAL;
396         }
397
398         /*  Best effort tc oversubscription weight: non-zero */
399         if (params->tc_ov_weight == 0) {
400                 RTE_LOG(ERR, SCHED,
401                         "%s: Incorrect value for tc ov weight\n", __func__);
402                 return -EINVAL;
403         }
404
405         /* Queue WRR weights: non-zero */
406         for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++) {
407                 if (params->wrr_weights[i] == 0) {
408                         RTE_LOG(ERR, SCHED,
409                                 "%s: Incorrect value for wrr weight\n", __func__);
410                         return -EINVAL;
411                 }
412         }
413
414         return 0;
415 }
416
417 static int
418 rte_sched_port_check_params(struct rte_sched_port_params *params)
419 {
420         if (params == NULL) {
421                 RTE_LOG(ERR, SCHED,
422                         "%s: Incorrect value for parameter params\n", __func__);
423                 return -EINVAL;
424         }
425
426         /* socket */
427         if (params->socket < 0) {
428                 RTE_LOG(ERR, SCHED,
429                         "%s: Incorrect value for socket id\n", __func__);
430                 return -EINVAL;
431         }
432
433         /* rate */
434         if (params->rate == 0) {
435                 RTE_LOG(ERR, SCHED,
436                         "%s: Incorrect value for rate\n", __func__);
437                 return -EINVAL;
438         }
439
440         /* mtu */
441         if (params->mtu == 0) {
442                 RTE_LOG(ERR, SCHED,
443                         "%s: Incorrect value for mtu\n", __func__);
444                 return -EINVAL;
445         }
446
447         /* n_subports_per_port: non-zero, limited to 16 bits, power of 2 */
448         if (params->n_subports_per_port == 0 ||
449             params->n_subports_per_port > 1u << 16 ||
450             !rte_is_power_of_2(params->n_subports_per_port)) {
451                 RTE_LOG(ERR, SCHED,
452                         "%s: Incorrect value for number of subports\n", __func__);
453                 return -EINVAL;
454         }
455
456         /* n_pipes_per_subport: non-zero, power of 2 */
457         if (params->n_pipes_per_subport == 0 ||
458             !rte_is_power_of_2(params->n_pipes_per_subport)) {
459                 RTE_LOG(ERR, SCHED,
460                         "%s: Incorrect value for maximum pipes number\n", __func__);
461                 return -EINVAL;
462         }
463
464         return 0;
465 }
466
467 static uint32_t
468 rte_sched_subport_get_array_base(struct rte_sched_subport_params *params,
469         enum rte_sched_subport_array array)
470 {
471         uint32_t n_pipes_per_subport = params->n_pipes_per_subport_enabled;
472         uint32_t n_subport_pipe_queues =
473                 RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport;
474
475         uint32_t size_pipe = n_pipes_per_subport * sizeof(struct rte_sched_pipe);
476         uint32_t size_queue =
477                 n_subport_pipe_queues * sizeof(struct rte_sched_queue);
478         uint32_t size_queue_extra
479                 = n_subport_pipe_queues * sizeof(struct rte_sched_queue_extra);
480         uint32_t size_pipe_profiles = params->n_max_pipe_profiles *
481                 sizeof(struct rte_sched_pipe_profile);
482         uint32_t size_bmp_array =
483                 rte_bitmap_get_memory_footprint(n_subport_pipe_queues);
484         uint32_t size_per_pipe_queue_array, size_queue_array;
485
486         uint32_t base, i;
487
488         size_per_pipe_queue_array = 0;
489         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
490                 if (i < RTE_SCHED_TRAFFIC_CLASS_BE)
491                         size_per_pipe_queue_array +=
492                                 params->qsize[i] * sizeof(struct rte_mbuf *);
493                 else
494                         size_per_pipe_queue_array += RTE_SCHED_MAX_QUEUES_PER_TC *
495                                 params->qsize[i] * sizeof(struct rte_mbuf *);
496         }
497         size_queue_array = n_pipes_per_subport * size_per_pipe_queue_array;
498
499         base = 0;
500
501         if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE)
502                 return base;
503         base += RTE_CACHE_LINE_ROUNDUP(size_pipe);
504
505         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE)
506                 return base;
507         base += RTE_CACHE_LINE_ROUNDUP(size_queue);
508
509         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA)
510                 return base;
511         base += RTE_CACHE_LINE_ROUNDUP(size_queue_extra);
512
513         if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES)
514                 return base;
515         base += RTE_CACHE_LINE_ROUNDUP(size_pipe_profiles);
516
517         if (array == e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY)
518                 return base;
519         base += RTE_CACHE_LINE_ROUNDUP(size_bmp_array);
520
521         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY)
522                 return base;
523         base += RTE_CACHE_LINE_ROUNDUP(size_queue_array);
524
525         return base;
526 }
527
528 static void
529 rte_sched_subport_config_qsize(struct rte_sched_subport *subport)
530 {
531         uint32_t i;
532
533         subport->qsize_add[0] = 0;
534
535         /* Strict prority traffic class */
536         for (i = 1; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
537                 subport->qsize_add[i] = subport->qsize_add[i-1] + subport->qsize[i-1];
538
539         /* Best-effort traffic class */
540         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] =
541                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE] +
542                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
543         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] =
544                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] +
545                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
546         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] =
547                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] +
548                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
549
550         subport->qsize_sum = subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] +
551                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
552 }
553
554 static void
555 rte_sched_port_log_pipe_profile(struct rte_sched_subport *subport, uint32_t i)
556 {
557         struct rte_sched_pipe_profile *p = subport->pipe_profiles + i;
558
559         RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n"
560                 "       Token bucket: period = %u, credits per period = %u, size = %u\n"
561                 "       Traffic classes: period = %u,\n"
562                 "       credits per period = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u]\n"
563                 "       Best-effort traffic class oversubscription: weight = %hhu\n"
564                 "       WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
565                 i,
566
567                 /* Token bucket */
568                 p->tb_period,
569                 p->tb_credits_per_period,
570                 p->tb_size,
571
572                 /* Traffic classes */
573                 p->tc_period,
574                 p->tc_credits_per_period[0],
575                 p->tc_credits_per_period[1],
576                 p->tc_credits_per_period[2],
577                 p->tc_credits_per_period[3],
578                 p->tc_credits_per_period[4],
579                 p->tc_credits_per_period[5],
580                 p->tc_credits_per_period[6],
581                 p->tc_credits_per_period[7],
582                 p->tc_credits_per_period[8],
583                 p->tc_credits_per_period[9],
584                 p->tc_credits_per_period[10],
585                 p->tc_credits_per_period[11],
586                 p->tc_credits_per_period[12],
587
588                 /* Best-effort traffic class oversubscription */
589                 p->tc_ov_weight,
590
591                 /* WRR */
592                 p->wrr_cost[0], p->wrr_cost[1], p->wrr_cost[2], p->wrr_cost[3]);
593 }
594
595 static inline uint64_t
596 rte_sched_time_ms_to_bytes(uint32_t time_ms, uint32_t rate)
597 {
598         uint64_t time = time_ms;
599
600         time = (time * rate) / 1000;
601
602         return time;
603 }
604
605 static void
606 rte_sched_pipe_profile_convert(struct rte_sched_subport *subport,
607         struct rte_sched_pipe_params *src,
608         struct rte_sched_pipe_profile *dst,
609         uint32_t rate)
610 {
611         uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
612         uint32_t lcd1, lcd2, lcd;
613         uint32_t i;
614
615         /* Token Bucket */
616         if (src->tb_rate == rate) {
617                 dst->tb_credits_per_period = 1;
618                 dst->tb_period = 1;
619         } else {
620                 double tb_rate = (double) src->tb_rate
621                                 / (double) rate;
622                 double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
623
624                 rte_approx(tb_rate, d,
625                         &dst->tb_credits_per_period, &dst->tb_period);
626         }
627
628         dst->tb_size = src->tb_size;
629
630         /* Traffic Classes */
631         dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period,
632                                                 rate);
633
634         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
635                 if (subport->qsize[i])
636                         dst->tc_credits_per_period[i]
637                                 = rte_sched_time_ms_to_bytes(src->tc_period,
638                                         src->tc_rate[i]);
639
640         dst->tc_ov_weight = src->tc_ov_weight;
641
642         /* WRR queues */
643         wrr_cost[0] = src->wrr_weights[0];
644         wrr_cost[1] = src->wrr_weights[1];
645         wrr_cost[2] = src->wrr_weights[2];
646         wrr_cost[3] = src->wrr_weights[3];
647
648         lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
649         lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
650         lcd = rte_get_lcd(lcd1, lcd2);
651
652         wrr_cost[0] = lcd / wrr_cost[0];
653         wrr_cost[1] = lcd / wrr_cost[1];
654         wrr_cost[2] = lcd / wrr_cost[2];
655         wrr_cost[3] = lcd / wrr_cost[3];
656
657         dst->wrr_cost[0] = (uint8_t) wrr_cost[0];
658         dst->wrr_cost[1] = (uint8_t) wrr_cost[1];
659         dst->wrr_cost[2] = (uint8_t) wrr_cost[2];
660         dst->wrr_cost[3] = (uint8_t) wrr_cost[3];
661 }
662
663 static void
664 rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport,
665         struct rte_sched_subport_params *params, uint32_t rate)
666 {
667         uint32_t i;
668
669         for (i = 0; i < subport->n_pipe_profiles; i++) {
670                 struct rte_sched_pipe_params *src = params->pipe_profiles + i;
671                 struct rte_sched_pipe_profile *dst = subport->pipe_profiles + i;
672
673                 rte_sched_pipe_profile_convert(subport, src, dst, rate);
674                 rte_sched_port_log_pipe_profile(subport, i);
675         }
676
677         subport->pipe_tc_be_rate_max = 0;
678         for (i = 0; i < subport->n_pipe_profiles; i++) {
679                 struct rte_sched_pipe_params *src = params->pipe_profiles + i;
680                 uint32_t pipe_tc_be_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
681
682                 if (subport->pipe_tc_be_rate_max < pipe_tc_be_rate)
683                         subport->pipe_tc_be_rate_max = pipe_tc_be_rate;
684         }
685 }
686
687 static int
688 rte_sched_subport_check_params(struct rte_sched_subport_params *params,
689         uint32_t n_max_pipes_per_subport,
690         uint32_t rate)
691 {
692         uint32_t i;
693
694         /* Check user parameters */
695         if (params == NULL) {
696                 RTE_LOG(ERR, SCHED,
697                         "%s: Incorrect value for parameter params\n", __func__);
698                 return -EINVAL;
699         }
700
701         if (params->tb_rate == 0 || params->tb_rate > rate) {
702                 RTE_LOG(ERR, SCHED,
703                         "%s: Incorrect value for tb rate\n", __func__);
704                 return -EINVAL;
705         }
706
707         if (params->tb_size == 0) {
708                 RTE_LOG(ERR, SCHED,
709                         "%s: Incorrect value for tb size\n", __func__);
710                 return -EINVAL;
711         }
712
713         /* qsize: if non-zero, power of 2,
714          * no bigger than 32K (due to 16-bit read/write pointers)
715          */
716         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
717                 uint16_t qsize = params->qsize[i];
718
719                 if (qsize != 0 && !rte_is_power_of_2(qsize)) {
720                         RTE_LOG(ERR, SCHED,
721                                 "%s: Incorrect value for qsize\n", __func__);
722                         return -EINVAL;
723                 }
724         }
725
726         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
727                 uint32_t tc_rate = params->tc_rate[i];
728                 uint16_t qsize = params->qsize[i];
729
730                 if ((qsize == 0 && tc_rate != 0) ||
731                         (qsize != 0 && tc_rate == 0) ||
732                         (tc_rate > params->tb_rate)) {
733                         RTE_LOG(ERR, SCHED,
734                                 "%s: Incorrect value for tc rate\n", __func__);
735                         return -EINVAL;
736                 }
737         }
738
739         if (params->qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 ||
740                 params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
741                 RTE_LOG(ERR, SCHED,
742                         "%s: Incorrect qsize or tc rate(best effort)\n", __func__);
743                 return -EINVAL;
744         }
745
746         if (params->tc_period == 0) {
747                 RTE_LOG(ERR, SCHED,
748                         "%s: Incorrect value for tc period\n", __func__);
749                 return -EINVAL;
750         }
751
752         /* n_pipes_per_subport: non-zero, power of 2 */
753         if (params->n_pipes_per_subport_enabled == 0 ||
754                 params->n_pipes_per_subport_enabled > n_max_pipes_per_subport ||
755             !rte_is_power_of_2(params->n_pipes_per_subport_enabled)) {
756                 RTE_LOG(ERR, SCHED,
757                         "%s: Incorrect value for pipes number\n", __func__);
758                 return -EINVAL;
759         }
760
761         /* pipe_profiles and n_pipe_profiles */
762         if (params->pipe_profiles == NULL ||
763             params->n_pipe_profiles == 0 ||
764                 params->n_max_pipe_profiles == 0 ||
765                 params->n_pipe_profiles > params->n_max_pipe_profiles) {
766                 RTE_LOG(ERR, SCHED,
767                         "%s: Incorrect value for pipe profiles\n", __func__);
768                 return -EINVAL;
769         }
770
771         for (i = 0; i < params->n_pipe_profiles; i++) {
772                 struct rte_sched_pipe_params *p = params->pipe_profiles + i;
773                 int status;
774
775                 status = pipe_profile_check(p, rate, &params->qsize[0]);
776                 if (status != 0) {
777                         RTE_LOG(ERR, SCHED,
778                                 "%s: Pipe profile check failed(%d)\n", __func__, status);
779                         return -EINVAL;
780                 }
781         }
782
783         return 0;
784 }
785
786 uint32_t
787 rte_sched_port_get_memory_footprint(struct rte_sched_port_params *port_params,
788         struct rte_sched_subport_params **subport_params)
789 {
790         uint32_t size0 = 0, size1 = 0, i;
791         int status;
792
793         status = rte_sched_port_check_params(port_params);
794         if (status != 0) {
795                 RTE_LOG(ERR, SCHED,
796                         "%s: Port scheduler port params check failed (%d)\n",
797                         __func__, status);
798
799                 return 0;
800         }
801
802         for (i = 0; i < port_params->n_subports_per_port; i++) {
803                 struct rte_sched_subport_params *sp = subport_params[i];
804
805                 status = rte_sched_subport_check_params(sp,
806                                 port_params->n_pipes_per_subport,
807                                 port_params->rate);
808                 if (status != 0) {
809                         RTE_LOG(ERR, SCHED,
810                                 "%s: Port scheduler subport params check failed (%d)\n",
811                                 __func__, status);
812
813                         return 0;
814                 }
815         }
816
817         size0 = sizeof(struct rte_sched_port);
818
819         for (i = 0; i < port_params->n_subports_per_port; i++) {
820                 struct rte_sched_subport_params *sp = subport_params[i];
821
822                 size1 += rte_sched_subport_get_array_base(sp,
823                                         e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
824         }
825
826         return size0 + size1;
827 }
828
829 struct rte_sched_port *
830 rte_sched_port_config(struct rte_sched_port_params *params)
831 {
832         struct rte_sched_port *port = NULL;
833         uint32_t size0, size1;
834         uint32_t cycles_per_byte;
835         uint32_t i, j;
836         int status;
837
838         status = rte_sched_port_check_params(params);
839         if (status != 0) {
840                 RTE_LOG(ERR, SCHED,
841                         "%s: Port scheduler params check failed (%d)\n",
842                         __func__, status);
843                 return NULL;
844         }
845
846         size0 = sizeof(struct rte_sched_port);
847         size1 = params->n_subports_per_port * sizeof(struct rte_sched_subport *);
848
849         /* Allocate memory to store the data structures */
850         port = rte_zmalloc_socket("qos_params", size0 + size1, RTE_CACHE_LINE_SIZE,
851                 params->socket);
852         if (port == NULL) {
853                 RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
854
855                 return NULL;
856         }
857
858         /* User parameters */
859         port->n_subports_per_port = params->n_subports_per_port;
860         port->n_pipes_per_subport = params->n_pipes_per_subport;
861         port->n_pipes_per_subport_log2 =
862                         __builtin_ctz(params->n_pipes_per_subport);
863         port->socket = params->socket;
864
865         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
866                 port->pipe_queue[i] = i;
867
868         for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
869                 port->pipe_tc[i] = j;
870
871                 if (j < RTE_SCHED_TRAFFIC_CLASS_BE)
872                         j++;
873         }
874
875         for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
876                 port->tc_queue[i] = j;
877
878                 if (i >= RTE_SCHED_TRAFFIC_CLASS_BE)
879                         j++;
880         }
881         port->rate = params->rate;
882         port->mtu = params->mtu + params->frame_overhead;
883         port->frame_overhead = params->frame_overhead;
884
885         /* Timing */
886         port->time_cpu_cycles = rte_get_tsc_cycles();
887         port->time_cpu_bytes = 0;
888         port->time = 0;
889
890         cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
891                 / params->rate;
892         port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
893
894         /* Grinders */
895         port->pkts_out = NULL;
896         port->n_pkts_out = 0;
897         port->subport_id = 0;
898
899         return port;
900 }
901
902 static inline void
903 rte_sched_subport_free(struct rte_sched_port *port,
904         struct rte_sched_subport *subport)
905 {
906         uint32_t n_subport_pipe_queues;
907         uint32_t qindex;
908
909         if (subport == NULL)
910                 return;
911
912         n_subport_pipe_queues = rte_sched_subport_pipe_queues(subport);
913
914         /* Free enqueued mbufs */
915         for (qindex = 0; qindex < n_subport_pipe_queues; qindex++) {
916                 struct rte_mbuf **mbufs =
917                         rte_sched_subport_pipe_qbase(subport, qindex);
918                 uint16_t qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
919                 if (qsize != 0) {
920                         struct rte_sched_queue *queue = subport->queue + qindex;
921                         uint16_t qr = queue->qr & (qsize - 1);
922                         uint16_t qw = queue->qw & (qsize - 1);
923
924                         for (; qr != qw; qr = (qr + 1) & (qsize - 1))
925                                 rte_pktmbuf_free(mbufs[qr]);
926                 }
927         }
928
929         rte_bitmap_free(subport->bmp);
930 }
931
932 void
933 rte_sched_port_free(struct rte_sched_port *port)
934 {
935         uint32_t i;
936
937         /* Check user parameters */
938         if (port == NULL)
939                 return;
940
941         for (i = 0; i < port->n_subports_per_port; i++)
942                 rte_sched_subport_free(port, port->subports[i]);
943
944         rte_free(port);
945 }
946
947 static void
948 rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i)
949 {
950         struct rte_sched_subport *s = port->subports[i];
951
952         RTE_LOG(DEBUG, SCHED, "Low level config for subport %u:\n"
953                 "       Token bucket: period = %u, credits per period = %u, size = %u\n"
954                 "       Traffic classes: period = %u\n"
955                 "       credits per period = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u]\n"
956                 "       Best effort traffic class oversubscription: wm min = %u, wm max = %u\n",
957                 i,
958
959                 /* Token bucket */
960                 s->tb_period,
961                 s->tb_credits_per_period,
962                 s->tb_size,
963
964                 /* Traffic classes */
965                 s->tc_period,
966                 s->tc_credits_per_period[0],
967                 s->tc_credits_per_period[1],
968                 s->tc_credits_per_period[2],
969                 s->tc_credits_per_period[3],
970                 s->tc_credits_per_period[4],
971                 s->tc_credits_per_period[5],
972                 s->tc_credits_per_period[6],
973                 s->tc_credits_per_period[7],
974                 s->tc_credits_per_period[8],
975                 s->tc_credits_per_period[9],
976                 s->tc_credits_per_period[10],
977                 s->tc_credits_per_period[11],
978                 s->tc_credits_per_period[12],
979
980                 /* Best effort traffic class oversubscription */
981                 s->tc_ov_wm_min,
982                 s->tc_ov_wm_max);
983 }
984
985 static void
986 rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
987 {
988         uint32_t i;
989
990         for (i = 0; i < n_subports; i++) {
991                 struct rte_sched_subport *subport = port->subports[i];
992
993                 rte_sched_subport_free(port, subport);
994         }
995
996         rte_free(port);
997 }
998
999 int
1000 rte_sched_subport_config(struct rte_sched_port *port,
1001         uint32_t subport_id,
1002         struct rte_sched_subport_params *params)
1003 {
1004         struct rte_sched_subport *s = NULL;
1005         uint32_t n_subports = subport_id;
1006         uint32_t n_subport_pipe_queues, i;
1007         uint32_t size0, size1, bmp_mem_size;
1008         int status;
1009
1010         /* Check user parameters */
1011         if (port == NULL) {
1012                 RTE_LOG(ERR, SCHED,
1013                         "%s: Incorrect value for parameter port\n", __func__);
1014                 return 0;
1015         }
1016
1017         if (subport_id >= port->n_subports_per_port) {
1018                 RTE_LOG(ERR, SCHED,
1019                         "%s: Incorrect value for subport id\n", __func__);
1020
1021                 rte_sched_free_memory(port, n_subports);
1022                 return -EINVAL;
1023         }
1024
1025         status = rte_sched_subport_check_params(params,
1026                 port->n_pipes_per_subport,
1027                 port->rate);
1028         if (status != 0) {
1029                 RTE_LOG(NOTICE, SCHED,
1030                         "%s: Port scheduler params check failed (%d)\n",
1031                         __func__, status);
1032
1033                 rte_sched_free_memory(port, n_subports);
1034                 return -EINVAL;
1035         }
1036
1037         /* Determine the amount of memory to allocate */
1038         size0 = sizeof(struct rte_sched_subport);
1039         size1 = rte_sched_subport_get_array_base(params,
1040                                 e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
1041
1042         /* Allocate memory to store the data structures */
1043         s = rte_zmalloc_socket("subport_params", size0 + size1,
1044                 RTE_CACHE_LINE_SIZE, port->socket);
1045         if (s == NULL) {
1046                 RTE_LOG(ERR, SCHED,
1047                         "%s: Memory allocation fails\n", __func__);
1048
1049                 rte_sched_free_memory(port, n_subports);
1050                 return -ENOMEM;
1051         }
1052
1053         n_subports++;
1054
1055         /* Port */
1056         port->subports[subport_id] = s;
1057
1058         /* Token Bucket (TB) */
1059         if (params->tb_rate == port->rate) {
1060                 s->tb_credits_per_period = 1;
1061                 s->tb_period = 1;
1062         } else {
1063                 double tb_rate = ((double) params->tb_rate) / ((double) port->rate);
1064                 double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
1065
1066                 rte_approx(tb_rate, d, &s->tb_credits_per_period, &s->tb_period);
1067         }
1068
1069         s->tb_size = params->tb_size;
1070         s->tb_time = port->time;
1071         s->tb_credits = s->tb_size / 2;
1072
1073         /* Traffic Classes (TCs) */
1074         s->tc_period = rte_sched_time_ms_to_bytes(params->tc_period, port->rate);
1075         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
1076                 if (params->qsize[i])
1077                         s->tc_credits_per_period[i]
1078                                 = rte_sched_time_ms_to_bytes(params->tc_period,
1079                                         params->tc_rate[i]);
1080         }
1081         s->tc_time = port->time + s->tc_period;
1082         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1083                 if (params->qsize[i])
1084                         s->tc_credits[i] = s->tc_credits_per_period[i];
1085
1086         /* compile time checks */
1087         RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS == 0);
1088         RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS &
1089                 (RTE_SCHED_PORT_N_GRINDERS - 1));
1090
1091         /* User parameters */
1092         s->n_pipes_per_subport_enabled = params->n_pipes_per_subport_enabled;
1093         memcpy(s->qsize, params->qsize, sizeof(params->qsize));
1094         s->n_pipe_profiles = params->n_pipe_profiles;
1095         s->n_max_pipe_profiles = params->n_max_pipe_profiles;
1096
1097 #ifdef RTE_SCHED_RED
1098         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
1099                 uint32_t j;
1100
1101                 for (j = 0; j < RTE_COLORS; j++) {
1102                         /* if min/max are both zero, then RED is disabled */
1103                         if ((params->red_params[i][j].min_th |
1104                              params->red_params[i][j].max_th) == 0) {
1105                                 continue;
1106                         }
1107
1108                         if (rte_red_config_init(&s->red_config[i][j],
1109                                 params->red_params[i][j].wq_log2,
1110                                 params->red_params[i][j].min_th,
1111                                 params->red_params[i][j].max_th,
1112                                 params->red_params[i][j].maxp_inv) != 0) {
1113                                 rte_sched_free_memory(port, n_subports);
1114
1115                                 RTE_LOG(NOTICE, SCHED,
1116                                 "%s: RED configuration init fails\n", __func__);
1117                                 return -EINVAL;
1118                         }
1119                 }
1120         }
1121 #endif
1122
1123         /* Scheduling loop detection */
1124         s->pipe_loop = RTE_SCHED_PIPE_INVALID;
1125         s->pipe_exhaustion = 0;
1126
1127         /* Grinders */
1128         s->busy_grinders = 0;
1129
1130         /* Queue base calculation */
1131         rte_sched_subport_config_qsize(s);
1132
1133         /* Large data structures */
1134         s->pipe = (struct rte_sched_pipe *)
1135                 (s->memory + rte_sched_subport_get_array_base(params,
1136                 e_RTE_SCHED_SUBPORT_ARRAY_PIPE));
1137         s->queue = (struct rte_sched_queue *)
1138                 (s->memory + rte_sched_subport_get_array_base(params,
1139                 e_RTE_SCHED_SUBPORT_ARRAY_QUEUE));
1140         s->queue_extra = (struct rte_sched_queue_extra *)
1141                 (s->memory + rte_sched_subport_get_array_base(params,
1142                 e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA));
1143         s->pipe_profiles = (struct rte_sched_pipe_profile *)
1144                 (s->memory + rte_sched_subport_get_array_base(params,
1145                 e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES));
1146         s->bmp_array =  s->memory + rte_sched_subport_get_array_base(params,
1147                 e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY);
1148         s->queue_array = (struct rte_mbuf **)
1149                 (s->memory + rte_sched_subport_get_array_base(params,
1150                 e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY));
1151
1152         /* Pipe profile table */
1153         rte_sched_subport_config_pipe_profile_table(s, params, port->rate);
1154
1155         /* Bitmap */
1156         n_subport_pipe_queues = rte_sched_subport_pipe_queues(s);
1157         bmp_mem_size = rte_bitmap_get_memory_footprint(n_subport_pipe_queues);
1158         s->bmp = rte_bitmap_init(n_subport_pipe_queues, s->bmp_array,
1159                                 bmp_mem_size);
1160         if (s->bmp == NULL) {
1161                 RTE_LOG(ERR, SCHED,
1162                         "%s: Subport bitmap init error\n", __func__);
1163
1164                 rte_sched_free_memory(port, n_subports);
1165                 return -EINVAL;
1166         }
1167
1168         for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
1169                 s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
1170
1171 #ifdef RTE_SCHED_SUBPORT_TC_OV
1172         /* TC oversubscription */
1173         s->tc_ov_wm_min = port->mtu;
1174         s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(params->tc_period,
1175                                                      s->pipe_tc_be_rate_max);
1176         s->tc_ov_wm = s->tc_ov_wm_max;
1177         s->tc_ov_period_id = 0;
1178         s->tc_ov = 0;
1179         s->tc_ov_n = 0;
1180         s->tc_ov_rate = 0;
1181 #endif
1182
1183         rte_sched_port_log_subport_config(port, subport_id);
1184
1185         return 0;
1186 }
1187
1188 int
1189 rte_sched_pipe_config(struct rte_sched_port *port,
1190         uint32_t subport_id,
1191         uint32_t pipe_id,
1192         int32_t pipe_profile)
1193 {
1194         struct rte_sched_subport *s;
1195         struct rte_sched_pipe *p;
1196         struct rte_sched_pipe_profile *params;
1197         uint32_t n_subports = subport_id + 1;
1198         uint32_t deactivate, profile, i;
1199
1200         /* Check user parameters */
1201         profile = (uint32_t) pipe_profile;
1202         deactivate = (pipe_profile < 0);
1203
1204         if (port == NULL) {
1205                 RTE_LOG(ERR, SCHED,
1206                         "%s: Incorrect value for parameter port\n", __func__);
1207                 return -EINVAL;
1208         }
1209
1210         if (subport_id >= port->n_subports_per_port) {
1211                 RTE_LOG(ERR, SCHED,
1212                         "%s: Incorrect value for parameter subport id\n", __func__);
1213
1214                 rte_sched_free_memory(port, n_subports);
1215                 return -EINVAL;
1216         }
1217
1218         s = port->subports[subport_id];
1219         if (pipe_id >= s->n_pipes_per_subport_enabled) {
1220                 RTE_LOG(ERR, SCHED,
1221                         "%s: Incorrect value for parameter pipe id\n", __func__);
1222
1223                 rte_sched_free_memory(port, n_subports);
1224                 return -EINVAL;
1225         }
1226
1227         if (!deactivate && profile >= s->n_pipe_profiles) {
1228                 RTE_LOG(ERR, SCHED,
1229                         "%s: Incorrect value for parameter pipe profile\n", __func__);
1230
1231                 rte_sched_free_memory(port, n_subports);
1232                 return -EINVAL;
1233         }
1234
1235         /* Handle the case when pipe already has a valid configuration */
1236         p = s->pipe + pipe_id;
1237         if (p->tb_time) {
1238                 params = s->pipe_profiles + p->profile;
1239
1240                 double subport_tc_be_rate =
1241                         (double) s->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1242                         / (double) s->tc_period;
1243                 double pipe_tc_be_rate =
1244                         (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1245                         / (double) params->tc_period;
1246                 uint32_t tc_be_ov = s->tc_ov;
1247
1248                 /* Unplug pipe from its subport */
1249                 s->tc_ov_n -= params->tc_ov_weight;
1250                 s->tc_ov_rate -= pipe_tc_be_rate;
1251                 s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1252
1253                 if (s->tc_ov != tc_be_ov) {
1254                         RTE_LOG(DEBUG, SCHED,
1255                                 "Subport %u Best-effort TC oversubscription is OFF (%.4lf >= %.4lf)\n",
1256                                 subport_id, subport_tc_be_rate, s->tc_ov_rate);
1257                 }
1258
1259                 /* Reset the pipe */
1260                 memset(p, 0, sizeof(struct rte_sched_pipe));
1261         }
1262
1263         if (deactivate)
1264                 return 0;
1265
1266         /* Apply the new pipe configuration */
1267         p->profile = profile;
1268         params = s->pipe_profiles + p->profile;
1269
1270         /* Token Bucket (TB) */
1271         p->tb_time = port->time;
1272         p->tb_credits = params->tb_size / 2;
1273
1274         /* Traffic Classes (TCs) */
1275         p->tc_time = port->time + params->tc_period;
1276
1277         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1278                 if (s->qsize[i])
1279                         p->tc_credits[i] = params->tc_credits_per_period[i];
1280
1281         {
1282                 /* Subport best effort tc oversubscription */
1283                 double subport_tc_be_rate =
1284                         (double) s->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1285                         / (double) s->tc_period;
1286                 double pipe_tc_be_rate =
1287                         (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1288                         / (double) params->tc_period;
1289                 uint32_t tc_be_ov = s->tc_ov;
1290
1291                 s->tc_ov_n += params->tc_ov_weight;
1292                 s->tc_ov_rate += pipe_tc_be_rate;
1293                 s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1294
1295                 if (s->tc_ov != tc_be_ov) {
1296                         RTE_LOG(DEBUG, SCHED,
1297                                 "Subport %u Best effort TC oversubscription is ON (%.4lf < %.4lf)\n",
1298                                 subport_id, subport_tc_be_rate, s->tc_ov_rate);
1299                 }
1300                 p->tc_ov_period_id = s->tc_ov_period_id;
1301                 p->tc_ov_credits = s->tc_ov_wm;
1302         }
1303
1304         return 0;
1305 }
1306
1307 int
1308 rte_sched_subport_pipe_profile_add(struct rte_sched_port *port,
1309         uint32_t subport_id,
1310         struct rte_sched_pipe_params *params,
1311         uint32_t *pipe_profile_id)
1312 {
1313         struct rte_sched_subport *s;
1314         struct rte_sched_pipe_profile *pp;
1315         uint32_t i;
1316         int status;
1317
1318         /* Port */
1319         if (port == NULL) {
1320                 RTE_LOG(ERR, SCHED,
1321                         "%s: Incorrect value for parameter port\n", __func__);
1322                 return -EINVAL;
1323         }
1324
1325         /* Subport id not exceeds the max limit */
1326         if (subport_id > port->n_subports_per_port) {
1327                 RTE_LOG(ERR, SCHED,
1328                         "%s: Incorrect value for subport id\n", __func__);
1329                 return -EINVAL;
1330         }
1331
1332         s = port->subports[subport_id];
1333
1334         /* Pipe profiles exceeds the max limit */
1335         if (s->n_pipe_profiles >= s->n_max_pipe_profiles) {
1336                 RTE_LOG(ERR, SCHED,
1337                         "%s: Number of pipe profiles exceeds the max limit\n", __func__);
1338                 return -EINVAL;
1339         }
1340
1341         /* Pipe params */
1342         status = pipe_profile_check(params, port->rate, &s->qsize[0]);
1343         if (status != 0) {
1344                 RTE_LOG(ERR, SCHED,
1345                         "%s: Pipe profile check failed(%d)\n", __func__, status);
1346                 return -EINVAL;
1347         }
1348
1349         pp = &s->pipe_profiles[s->n_pipe_profiles];
1350         rte_sched_pipe_profile_convert(s, params, pp, port->rate);
1351
1352         /* Pipe profile should not exists */
1353         for (i = 0; i < s->n_pipe_profiles; i++)
1354                 if (memcmp(s->pipe_profiles + i, pp, sizeof(*pp)) == 0) {
1355                         RTE_LOG(ERR, SCHED,
1356                                 "%s: Pipe profile exists\n", __func__);
1357                         return -EINVAL;
1358                 }
1359
1360         /* Pipe profile commit */
1361         *pipe_profile_id = s->n_pipe_profiles;
1362         s->n_pipe_profiles++;
1363
1364         if (s->pipe_tc_be_rate_max < params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE])
1365                 s->pipe_tc_be_rate_max = params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
1366
1367         rte_sched_port_log_pipe_profile(s, *pipe_profile_id);
1368
1369         return 0;
1370 }
1371
1372 static inline uint32_t
1373 rte_sched_port_qindex(struct rte_sched_port *port,
1374         uint32_t subport,
1375         uint32_t pipe,
1376         uint32_t traffic_class,
1377         uint32_t queue)
1378 {
1379         return ((subport & (port->n_subports_per_port - 1)) <<
1380                 (port->n_pipes_per_subport_log2 + 4)) |
1381                 ((pipe &
1382                 (port->subports[subport]->n_pipes_per_subport_enabled - 1)) << 4) |
1383                 ((rte_sched_port_pipe_queue(port, traffic_class) + queue) &
1384                 (RTE_SCHED_QUEUES_PER_PIPE - 1));
1385 }
1386
1387 void
1388 rte_sched_port_pkt_write(struct rte_sched_port *port,
1389                          struct rte_mbuf *pkt,
1390                          uint32_t subport, uint32_t pipe,
1391                          uint32_t traffic_class,
1392                          uint32_t queue, enum rte_color color)
1393 {
1394         uint32_t queue_id =
1395                 rte_sched_port_qindex(port, subport, pipe, traffic_class, queue);
1396
1397         rte_mbuf_sched_set(pkt, queue_id, traffic_class, (uint8_t)color);
1398 }
1399
1400 void
1401 rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port,
1402                                   const struct rte_mbuf *pkt,
1403                                   uint32_t *subport, uint32_t *pipe,
1404                                   uint32_t *traffic_class, uint32_t *queue)
1405 {
1406         uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1407
1408         *subport = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1409         *pipe = (queue_id >> 4) &
1410                 (port->subports[*subport]->n_pipes_per_subport_enabled - 1);
1411         *traffic_class = rte_sched_port_pipe_tc(port, queue_id);
1412         *queue = rte_sched_port_tc_queue(port, queue_id);
1413 }
1414
1415 enum rte_color
1416 rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt)
1417 {
1418         return (enum rte_color)rte_mbuf_sched_color_get(pkt);
1419 }
1420
1421 int
1422 rte_sched_subport_read_stats(struct rte_sched_port *port,
1423                              uint32_t subport_id,
1424                              struct rte_sched_subport_stats *stats,
1425                              uint32_t *tc_ov)
1426 {
1427         struct rte_sched_subport *s;
1428
1429         /* Check user parameters */
1430         if (port == NULL) {
1431                 RTE_LOG(ERR, SCHED,
1432                         "%s: Incorrect value for parameter port\n", __func__);
1433                 return -EINVAL;
1434         }
1435
1436         if (subport_id >= port->n_subports_per_port) {
1437                 RTE_LOG(ERR, SCHED,
1438                         "%s: Incorrect value for subport id\n", __func__);
1439                 return -EINVAL;
1440         }
1441
1442         if (stats == NULL) {
1443                 RTE_LOG(ERR, SCHED,
1444                         "%s: Incorrect value for parameter stats\n", __func__);
1445                 return -EINVAL;
1446         }
1447
1448         if (tc_ov == NULL) {
1449                 RTE_LOG(ERR, SCHED,
1450                         "%s: Incorrect value for tc_ov\n", __func__);
1451                 return -EINVAL;
1452         }
1453
1454         s = port->subports[subport_id];
1455
1456         /* Copy subport stats and clear */
1457         memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
1458         memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
1459
1460         /* Subport TC oversubscription status */
1461         *tc_ov = s->tc_ov;
1462
1463         return 0;
1464 }
1465
1466 int
1467 rte_sched_queue_read_stats(struct rte_sched_port *port,
1468         uint32_t queue_id,
1469         struct rte_sched_queue_stats *stats,
1470         uint16_t *qlen)
1471 {
1472         struct rte_sched_subport *s;
1473         struct rte_sched_queue *q;
1474         struct rte_sched_queue_extra *qe;
1475         uint32_t subport_id, subport_qmask, subport_qindex;
1476
1477         /* Check user parameters */
1478         if (port == NULL) {
1479                 RTE_LOG(ERR, SCHED,
1480                         "%s: Incorrect value for parameter port\n", __func__);
1481                 return -EINVAL;
1482         }
1483
1484         if (queue_id >= rte_sched_port_queues_per_port(port)) {
1485                 RTE_LOG(ERR, SCHED,
1486                         "%s: Incorrect value for queue id\n", __func__);
1487                 return -EINVAL;
1488         }
1489
1490         if (stats == NULL) {
1491                 RTE_LOG(ERR, SCHED,
1492                         "%s: Incorrect value for parameter stats\n", __func__);
1493                 return -EINVAL;
1494         }
1495
1496         if (qlen == NULL) {
1497                 RTE_LOG(ERR, SCHED,
1498                         "%s: Incorrect value for parameter qlen\n", __func__);
1499                 return -EINVAL;
1500         }
1501         subport_qmask = port->n_pipes_per_subport_log2 + 4;
1502         subport_id = (queue_id >> subport_qmask) & (port->n_subports_per_port - 1);
1503
1504         s = port->subports[subport_id];
1505         subport_qindex = ((1 << subport_qmask) - 1) & queue_id;
1506         q = s->queue + subport_qindex;
1507         qe = s->queue_extra + subport_qindex;
1508
1509         /* Copy queue stats and clear */
1510         memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats));
1511         memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
1512
1513         /* Queue length */
1514         *qlen = q->qw - q->qr;
1515
1516         return 0;
1517 }
1518
1519 #ifdef RTE_SCHED_DEBUG
1520
1521 static inline int
1522 rte_sched_port_queue_is_empty(struct rte_sched_subport *subport,
1523         uint32_t qindex)
1524 {
1525         struct rte_sched_queue *queue = subport->queue + qindex;
1526
1527         return queue->qr == queue->qw;
1528 }
1529
1530 #endif /* RTE_SCHED_DEBUG */
1531
1532 #ifdef RTE_SCHED_COLLECT_STATS
1533
1534 static inline void
1535 rte_sched_port_update_subport_stats(struct rte_sched_port *port,
1536         struct rte_sched_subport *subport,
1537         uint32_t qindex,
1538         struct rte_mbuf *pkt)
1539 {
1540         uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1541         uint32_t pkt_len = pkt->pkt_len;
1542
1543         subport->stats.n_pkts_tc[tc_index] += 1;
1544         subport->stats.n_bytes_tc[tc_index] += pkt_len;
1545 }
1546
1547 #ifdef RTE_SCHED_RED
1548 static inline void
1549 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
1550         struct rte_sched_subport *subport,
1551         uint32_t qindex,
1552         struct rte_mbuf *pkt,
1553         uint32_t red)
1554 #else
1555 static inline void
1556 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
1557         struct rte_sched_subport *subport,
1558         uint32_t qindex,
1559         struct rte_mbuf *pkt,
1560         __rte_unused uint32_t red)
1561 #endif
1562 {
1563         uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1564         uint32_t pkt_len = pkt->pkt_len;
1565
1566         subport->stats.n_pkts_tc_dropped[tc_index] += 1;
1567         subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
1568 #ifdef RTE_SCHED_RED
1569         subport->stats.n_pkts_red_dropped[tc_index] += red;
1570 #endif
1571 }
1572
1573 static inline void
1574 rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
1575         uint32_t qindex,
1576         struct rte_mbuf *pkt)
1577 {
1578         struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1579         uint32_t pkt_len = pkt->pkt_len;
1580
1581         qe->stats.n_pkts += 1;
1582         qe->stats.n_bytes += pkt_len;
1583 }
1584
1585 #ifdef RTE_SCHED_RED
1586 static inline void
1587 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
1588         uint32_t qindex,
1589         struct rte_mbuf *pkt,
1590         uint32_t red)
1591 #else
1592 static inline void
1593 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
1594         uint32_t qindex,
1595         struct rte_mbuf *pkt,
1596         __rte_unused uint32_t red)
1597 #endif
1598 {
1599         struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1600         uint32_t pkt_len = pkt->pkt_len;
1601
1602         qe->stats.n_pkts_dropped += 1;
1603         qe->stats.n_bytes_dropped += pkt_len;
1604 #ifdef RTE_SCHED_RED
1605         qe->stats.n_pkts_red_dropped += red;
1606 #endif
1607 }
1608
1609 #endif /* RTE_SCHED_COLLECT_STATS */
1610
1611 #ifdef RTE_SCHED_RED
1612
1613 static inline int
1614 rte_sched_port_red_drop(struct rte_sched_port *port,
1615         struct rte_sched_subport *subport,
1616         struct rte_mbuf *pkt,
1617         uint32_t qindex,
1618         uint16_t qlen)
1619 {
1620         struct rte_sched_queue_extra *qe;
1621         struct rte_red_config *red_cfg;
1622         struct rte_red *red;
1623         uint32_t tc_index;
1624         enum rte_color color;
1625
1626         tc_index = rte_sched_port_pipe_tc(port, qindex);
1627         color = rte_sched_port_pkt_read_color(pkt);
1628         red_cfg = &subport->red_config[tc_index][color];
1629
1630         if ((red_cfg->min_th | red_cfg->max_th) == 0)
1631                 return 0;
1632
1633         qe = subport->queue_extra + qindex;
1634         red = &qe->red;
1635
1636         return rte_red_enqueue(red_cfg, red, qlen, port->time);
1637 }
1638
1639 static inline void
1640 rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
1641         struct rte_sched_subport *subport, uint32_t qindex)
1642 {
1643         struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1644         struct rte_red *red = &qe->red;
1645
1646         rte_red_mark_queue_empty(red, port->time);
1647 }
1648
1649 #else
1650
1651 static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
1652         struct rte_sched_subport *subport __rte_unused,
1653         struct rte_mbuf *pkt __rte_unused,
1654         uint32_t qindex __rte_unused,
1655         uint16_t qlen __rte_unused)
1656 {
1657         return 0;
1658 }
1659
1660 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
1661
1662 #endif /* RTE_SCHED_RED */
1663
1664 #ifdef RTE_SCHED_DEBUG
1665
1666 static inline void
1667 debug_check_queue_slab(struct rte_sched_subport *subport, uint32_t bmp_pos,
1668                        uint64_t bmp_slab)
1669 {
1670         uint64_t mask;
1671         uint32_t i, panic;
1672
1673         if (bmp_slab == 0)
1674                 rte_panic("Empty slab at position %u\n", bmp_pos);
1675
1676         panic = 0;
1677         for (i = 0, mask = 1; i < 64; i++, mask <<= 1) {
1678                 if (mask & bmp_slab) {
1679                         if (rte_sched_port_queue_is_empty(subport, bmp_pos + i)) {
1680                                 printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i);
1681                                 panic = 1;
1682                         }
1683                 }
1684         }
1685
1686         if (panic)
1687                 rte_panic("Empty queues in slab 0x%" PRIx64 "starting at position %u\n",
1688                         bmp_slab, bmp_pos);
1689 }
1690
1691 #endif /* RTE_SCHED_DEBUG */
1692
1693 static inline struct rte_sched_subport *
1694 rte_sched_port_subport(struct rte_sched_port *port,
1695         struct rte_mbuf *pkt)
1696 {
1697         uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1698         uint32_t subport_id = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1699
1700         return port->subports[subport_id];
1701 }
1702
1703 static inline uint32_t
1704 rte_sched_port_enqueue_qptrs_prefetch0(struct rte_sched_subport *subport,
1705         struct rte_mbuf *pkt, uint32_t subport_qmask)
1706 {
1707         struct rte_sched_queue *q;
1708 #ifdef RTE_SCHED_COLLECT_STATS
1709         struct rte_sched_queue_extra *qe;
1710 #endif
1711         uint32_t qindex = rte_mbuf_sched_queue_get(pkt);
1712         uint32_t subport_queue_id = subport_qmask & qindex;
1713
1714         q = subport->queue + subport_queue_id;
1715         rte_prefetch0(q);
1716 #ifdef RTE_SCHED_COLLECT_STATS
1717         qe = subport->queue_extra + subport_queue_id;
1718         rte_prefetch0(qe);
1719 #endif
1720
1721         return subport_queue_id;
1722 }
1723
1724 static inline void
1725 rte_sched_port_enqueue_qwa_prefetch0(struct rte_sched_port *port,
1726         struct rte_sched_subport *subport,
1727         uint32_t qindex,
1728         struct rte_mbuf **qbase)
1729 {
1730         struct rte_sched_queue *q;
1731         struct rte_mbuf **q_qw;
1732         uint16_t qsize;
1733
1734         q = subport->queue + qindex;
1735         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
1736         q_qw = qbase + (q->qw & (qsize - 1));
1737
1738         rte_prefetch0(q_qw);
1739         rte_bitmap_prefetch0(subport->bmp, qindex);
1740 }
1741
1742 static inline int
1743 rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
1744         struct rte_sched_subport *subport,
1745         uint32_t qindex,
1746         struct rte_mbuf **qbase,
1747         struct rte_mbuf *pkt)
1748 {
1749         struct rte_sched_queue *q;
1750         uint16_t qsize;
1751         uint16_t qlen;
1752
1753         q = subport->queue + qindex;
1754         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
1755         qlen = q->qw - q->qr;
1756
1757         /* Drop the packet (and update drop stats) when queue is full */
1758         if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
1759                      (qlen >= qsize))) {
1760                 rte_pktmbuf_free(pkt);
1761 #ifdef RTE_SCHED_COLLECT_STATS
1762                 rte_sched_port_update_subport_stats_on_drop(port, subport,
1763                         qindex, pkt, qlen < qsize);
1764                 rte_sched_port_update_queue_stats_on_drop(subport, qindex, pkt,
1765                         qlen < qsize);
1766 #endif
1767                 return 0;
1768         }
1769
1770         /* Enqueue packet */
1771         qbase[q->qw & (qsize - 1)] = pkt;
1772         q->qw++;
1773
1774         /* Activate queue in the subport bitmap */
1775         rte_bitmap_set(subport->bmp, qindex);
1776
1777         /* Statistics */
1778 #ifdef RTE_SCHED_COLLECT_STATS
1779         rte_sched_port_update_subport_stats(port, subport, qindex, pkt);
1780         rte_sched_port_update_queue_stats(subport, qindex, pkt);
1781 #endif
1782
1783         return 1;
1784 }
1785
1786
1787 /*
1788  * The enqueue function implements a 4-level pipeline with each stage
1789  * processing two different packets. The purpose of using a pipeline
1790  * is to hide the latency of prefetching the data structures. The
1791  * naming convention is presented in the diagram below:
1792  *
1793  *   p00  _______   p10  _______   p20  _______   p30  _______
1794  * ----->|       |----->|       |----->|       |----->|       |----->
1795  *       |   0   |      |   1   |      |   2   |      |   3   |
1796  * ----->|_______|----->|_______|----->|_______|----->|_______|----->
1797  *   p01            p11            p21            p31
1798  *
1799  */
1800 int
1801 rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts,
1802                        uint32_t n_pkts)
1803 {
1804         struct rte_mbuf *pkt00, *pkt01, *pkt10, *pkt11, *pkt20, *pkt21,
1805                 *pkt30, *pkt31, *pkt_last;
1806         struct rte_mbuf **q00_base, **q01_base, **q10_base, **q11_base,
1807                 **q20_base, **q21_base, **q30_base, **q31_base, **q_last_base;
1808         struct rte_sched_subport *subport00, *subport01, *subport10, *subport11,
1809                 *subport20, *subport21, *subport30, *subport31, *subport_last;
1810         uint32_t q00, q01, q10, q11, q20, q21, q30, q31, q_last;
1811         uint32_t r00, r01, r10, r11, r20, r21, r30, r31, r_last;
1812         uint32_t subport_qmask;
1813         uint32_t result, i;
1814
1815         result = 0;
1816         subport_qmask = (1 << (port->n_pipes_per_subport_log2 + 4)) - 1;
1817
1818         /*
1819          * Less then 6 input packets available, which is not enough to
1820          * feed the pipeline
1821          */
1822         if (unlikely(n_pkts < 6)) {
1823                 struct rte_sched_subport *subports[5];
1824                 struct rte_mbuf **q_base[5];
1825                 uint32_t q[5];
1826
1827                 /* Prefetch the mbuf structure of each packet */
1828                 for (i = 0; i < n_pkts; i++)
1829                         rte_prefetch0(pkts[i]);
1830
1831                 /* Prefetch the subport structure for each packet */
1832                 for (i = 0; i < n_pkts; i++)
1833                         subports[i] = rte_sched_port_subport(port, pkts[i]);
1834
1835                 /* Prefetch the queue structure for each queue */
1836                 for (i = 0; i < n_pkts; i++)
1837                         q[i] = rte_sched_port_enqueue_qptrs_prefetch0(subports[i],
1838                                         pkts[i], subport_qmask);
1839
1840                 /* Prefetch the write pointer location of each queue */
1841                 for (i = 0; i < n_pkts; i++) {
1842                         q_base[i] = rte_sched_subport_pipe_qbase(subports[i], q[i]);
1843                         rte_sched_port_enqueue_qwa_prefetch0(port, subports[i],
1844                                 q[i], q_base[i]);
1845                 }
1846
1847                 /* Write each packet to its queue */
1848                 for (i = 0; i < n_pkts; i++)
1849                         result += rte_sched_port_enqueue_qwa(port, subports[i],
1850                                                 q[i], q_base[i], pkts[i]);
1851
1852                 return result;
1853         }
1854
1855         /* Feed the first 3 stages of the pipeline (6 packets needed) */
1856         pkt20 = pkts[0];
1857         pkt21 = pkts[1];
1858         rte_prefetch0(pkt20);
1859         rte_prefetch0(pkt21);
1860
1861         pkt10 = pkts[2];
1862         pkt11 = pkts[3];
1863         rte_prefetch0(pkt10);
1864         rte_prefetch0(pkt11);
1865
1866         subport20 = rte_sched_port_subport(port, pkt20);
1867         subport21 = rte_sched_port_subport(port, pkt21);
1868         q20 = rte_sched_port_enqueue_qptrs_prefetch0(subport20,
1869                         pkt20, subport_qmask);
1870         q21 = rte_sched_port_enqueue_qptrs_prefetch0(subport21,
1871                         pkt21, subport_qmask);
1872
1873         pkt00 = pkts[4];
1874         pkt01 = pkts[5];
1875         rte_prefetch0(pkt00);
1876         rte_prefetch0(pkt01);
1877
1878         subport10 = rte_sched_port_subport(port, pkt10);
1879         subport11 = rte_sched_port_subport(port, pkt11);
1880         q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
1881                         pkt10, subport_qmask);
1882         q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
1883                         pkt11, subport_qmask);
1884
1885         q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
1886         q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
1887         rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
1888         rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
1889
1890         /* Run the pipeline */
1891         for (i = 6; i < (n_pkts & (~1)); i += 2) {
1892                 /* Propagate stage inputs */
1893                 pkt30 = pkt20;
1894                 pkt31 = pkt21;
1895                 pkt20 = pkt10;
1896                 pkt21 = pkt11;
1897                 pkt10 = pkt00;
1898                 pkt11 = pkt01;
1899                 q30 = q20;
1900                 q31 = q21;
1901                 q20 = q10;
1902                 q21 = q11;
1903                 subport30 = subport20;
1904                 subport31 = subport21;
1905                 subport20 = subport10;
1906                 subport21 = subport11;
1907                 q30_base = q20_base;
1908                 q31_base = q21_base;
1909
1910                 /* Stage 0: Get packets in */
1911                 pkt00 = pkts[i];
1912                 pkt01 = pkts[i + 1];
1913                 rte_prefetch0(pkt00);
1914                 rte_prefetch0(pkt01);
1915
1916                 /* Stage 1: Prefetch subport and queue structure storing queue pointers */
1917                 subport10 = rte_sched_port_subport(port, pkt10);
1918                 subport11 = rte_sched_port_subport(port, pkt11);
1919                 q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
1920                                 pkt10, subport_qmask);
1921                 q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
1922                                 pkt11, subport_qmask);
1923
1924                 /* Stage 2: Prefetch queue write location */
1925                 q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
1926                 q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
1927                 rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
1928                 rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
1929
1930                 /* Stage 3: Write packet to queue and activate queue */
1931                 r30 = rte_sched_port_enqueue_qwa(port, subport30,
1932                                 q30, q30_base, pkt30);
1933                 r31 = rte_sched_port_enqueue_qwa(port, subport31,
1934                                 q31, q31_base, pkt31);
1935                 result += r30 + r31;
1936         }
1937
1938         /*
1939          * Drain the pipeline (exactly 6 packets).
1940          * Handle the last packet in the case
1941          * of an odd number of input packets.
1942          */
1943         pkt_last = pkts[n_pkts - 1];
1944         rte_prefetch0(pkt_last);
1945
1946         subport00 = rte_sched_port_subport(port, pkt00);
1947         subport01 = rte_sched_port_subport(port, pkt01);
1948         q00 = rte_sched_port_enqueue_qptrs_prefetch0(subport00,
1949                         pkt00, subport_qmask);
1950         q01 = rte_sched_port_enqueue_qptrs_prefetch0(subport01,
1951                         pkt01, subport_qmask);
1952
1953         q10_base = rte_sched_subport_pipe_qbase(subport10, q10);
1954         q11_base = rte_sched_subport_pipe_qbase(subport11, q11);
1955         rte_sched_port_enqueue_qwa_prefetch0(port, subport10, q10, q10_base);
1956         rte_sched_port_enqueue_qwa_prefetch0(port, subport11, q11, q11_base);
1957
1958         r20 = rte_sched_port_enqueue_qwa(port, subport20,
1959                         q20, q20_base, pkt20);
1960         r21 = rte_sched_port_enqueue_qwa(port, subport21,
1961                         q21, q21_base, pkt21);
1962         result += r20 + r21;
1963
1964         subport_last = rte_sched_port_subport(port, pkt_last);
1965         q_last = rte_sched_port_enqueue_qptrs_prefetch0(subport_last,
1966                                 pkt_last, subport_qmask);
1967
1968         q00_base = rte_sched_subport_pipe_qbase(subport00, q00);
1969         q01_base = rte_sched_subport_pipe_qbase(subport01, q01);
1970         rte_sched_port_enqueue_qwa_prefetch0(port, subport00, q00, q00_base);
1971         rte_sched_port_enqueue_qwa_prefetch0(port, subport01, q01, q01_base);
1972
1973         r10 = rte_sched_port_enqueue_qwa(port, subport10, q10,
1974                         q10_base, pkt10);
1975         r11 = rte_sched_port_enqueue_qwa(port, subport11, q11,
1976                         q11_base, pkt11);
1977         result += r10 + r11;
1978
1979         q_last_base = rte_sched_subport_pipe_qbase(subport_last, q_last);
1980         rte_sched_port_enqueue_qwa_prefetch0(port, subport_last,
1981                 q_last, q_last_base);
1982
1983         r00 = rte_sched_port_enqueue_qwa(port, subport00, q00,
1984                         q00_base, pkt00);
1985         r01 = rte_sched_port_enqueue_qwa(port, subport01, q01,
1986                         q01_base, pkt01);
1987         result += r00 + r01;
1988
1989         if (n_pkts & 1) {
1990                 r_last = rte_sched_port_enqueue_qwa(port, subport_last,
1991                                         q_last, q_last_base, pkt_last);
1992                 result += r_last;
1993         }
1994
1995         return result;
1996 }
1997
1998 #ifndef RTE_SCHED_SUBPORT_TC_OV
1999
2000 static inline void
2001 grinder_credits_update(struct rte_sched_port *port,
2002         struct rte_sched_subport *subport, uint32_t pos)
2003 {
2004         struct rte_sched_grinder *grinder = subport->grinder + pos;
2005         struct rte_sched_pipe *pipe = grinder->pipe;
2006         struct rte_sched_pipe_profile *params = grinder->pipe_params;
2007         uint64_t n_periods;
2008         uint32_t i;
2009
2010         /* Subport TB */
2011         n_periods = (port->time - subport->tb_time) / subport->tb_period;
2012         subport->tb_credits += n_periods * subport->tb_credits_per_period;
2013         subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
2014         subport->tb_time += n_periods * subport->tb_period;
2015
2016         /* Pipe TB */
2017         n_periods = (port->time - pipe->tb_time) / params->tb_period;
2018         pipe->tb_credits += n_periods * params->tb_credits_per_period;
2019         pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
2020         pipe->tb_time += n_periods * params->tb_period;
2021
2022         /* Subport TCs */
2023         if (unlikely(port->time >= subport->tc_time)) {
2024                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2025                         subport->tc_credits[i] = subport->tc_credits_per_period[i];
2026
2027                 subport->tc_time = port->time + subport->tc_period;
2028         }
2029
2030         /* Pipe TCs */
2031         if (unlikely(port->time >= pipe->tc_time)) {
2032                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2033                         pipe->tc_credits[i] = params->tc_credits_per_period[i];
2034
2035                 pipe->tc_time = port->time + params->tc_period;
2036         }
2037 }
2038
2039 #else
2040
2041 static inline uint32_t
2042 grinder_tc_ov_credits_update(struct rte_sched_port *port,
2043         struct rte_sched_subport *subport)
2044 {
2045         uint32_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2046         uint32_t tc_consumption = 0, tc_ov_consumption_max;
2047         uint32_t tc_ov_wm = subport->tc_ov_wm;
2048         uint32_t i;
2049
2050         if (subport->tc_ov == 0)
2051                 return subport->tc_ov_wm_max;
2052
2053         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2054                 tc_ov_consumption[i] =
2055                         subport->tc_credits_per_period[i] - subport->tc_credits[i];
2056                 tc_consumption += tc_ov_consumption[i];
2057         }
2058
2059         tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] =
2060                 subport->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2061                 subport->tc_credits[RTE_SCHED_TRAFFIC_CLASS_BE];
2062
2063         tc_ov_consumption_max =
2064                 subport->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2065                         tc_consumption;
2066
2067         if (tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] >
2068                 (tc_ov_consumption_max - port->mtu)) {
2069                 tc_ov_wm  -= tc_ov_wm >> 7;
2070                 if (tc_ov_wm < subport->tc_ov_wm_min)
2071                         tc_ov_wm = subport->tc_ov_wm_min;
2072
2073                 return tc_ov_wm;
2074         }
2075
2076         tc_ov_wm += (tc_ov_wm >> 7) + 1;
2077         if (tc_ov_wm > subport->tc_ov_wm_max)
2078                 tc_ov_wm = subport->tc_ov_wm_max;
2079
2080         return tc_ov_wm;
2081 }
2082
2083 static inline void
2084 grinder_credits_update(struct rte_sched_port *port,
2085         struct rte_sched_subport *subport, uint32_t pos)
2086 {
2087         struct rte_sched_grinder *grinder = subport->grinder + pos;
2088         struct rte_sched_pipe *pipe = grinder->pipe;
2089         struct rte_sched_pipe_profile *params = grinder->pipe_params;
2090         uint64_t n_periods;
2091         uint32_t i;
2092
2093         /* Subport TB */
2094         n_periods = (port->time - subport->tb_time) / subport->tb_period;
2095         subport->tb_credits += n_periods * subport->tb_credits_per_period;
2096         subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
2097         subport->tb_time += n_periods * subport->tb_period;
2098
2099         /* Pipe TB */
2100         n_periods = (port->time - pipe->tb_time) / params->tb_period;
2101         pipe->tb_credits += n_periods * params->tb_credits_per_period;
2102         pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
2103         pipe->tb_time += n_periods * params->tb_period;
2104
2105         /* Subport TCs */
2106         if (unlikely(port->time >= subport->tc_time)) {
2107                 subport->tc_ov_wm = grinder_tc_ov_credits_update(port, subport);
2108
2109                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2110                         subport->tc_credits[i] = subport->tc_credits_per_period[i];
2111
2112                 subport->tc_time = port->time + subport->tc_period;
2113                 subport->tc_ov_period_id++;
2114         }
2115
2116         /* Pipe TCs */
2117         if (unlikely(port->time >= pipe->tc_time)) {
2118                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2119                         pipe->tc_credits[i] = params->tc_credits_per_period[i];
2120                 pipe->tc_time = port->time + params->tc_period;
2121         }
2122
2123         /* Pipe TCs - Oversubscription */
2124         if (unlikely(pipe->tc_ov_period_id != subport->tc_ov_period_id)) {
2125                 pipe->tc_ov_credits = subport->tc_ov_wm * params->tc_ov_weight;
2126
2127                 pipe->tc_ov_period_id = subport->tc_ov_period_id;
2128         }
2129 }
2130
2131 #endif /* RTE_SCHED_TS_CREDITS_UPDATE, RTE_SCHED_SUBPORT_TC_OV */
2132
2133
2134 #ifndef RTE_SCHED_SUBPORT_TC_OV
2135
2136 static inline int
2137 grinder_credits_check(struct rte_sched_port *port,
2138         struct rte_sched_subport *subport, uint32_t pos)
2139 {
2140         struct rte_sched_grinder *grinder = subport->grinder + pos;
2141         struct rte_sched_pipe *pipe = grinder->pipe;
2142         struct rte_mbuf *pkt = grinder->pkt;
2143         uint32_t tc_index = grinder->tc_index;
2144         uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
2145         uint32_t subport_tb_credits = subport->tb_credits;
2146         uint32_t subport_tc_credits = subport->tc_credits[tc_index];
2147         uint32_t pipe_tb_credits = pipe->tb_credits;
2148         uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
2149         int enough_credits;
2150
2151         /* Check queue credits */
2152         enough_credits = (pkt_len <= subport_tb_credits) &&
2153                 (pkt_len <= subport_tc_credits) &&
2154                 (pkt_len <= pipe_tb_credits) &&
2155                 (pkt_len <= pipe_tc_credits);
2156
2157         if (!enough_credits)
2158                 return 0;
2159
2160         /* Update port credits */
2161         subport->tb_credits -= pkt_len;
2162         subport->tc_credits[tc_index] -= pkt_len;
2163         pipe->tb_credits -= pkt_len;
2164         pipe->tc_credits[tc_index] -= pkt_len;
2165
2166         return 1;
2167 }
2168
2169 #else
2170
2171 static inline int
2172 grinder_credits_check(struct rte_sched_port *port,
2173         struct rte_sched_subport *subport, uint32_t pos)
2174 {
2175         struct rte_sched_grinder *grinder = subport->grinder + pos;
2176         struct rte_sched_pipe *pipe = grinder->pipe;
2177         struct rte_mbuf *pkt = grinder->pkt;
2178         uint32_t tc_index = grinder->tc_index;
2179         uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
2180         uint32_t subport_tb_credits = subport->tb_credits;
2181         uint32_t subport_tc_credits = subport->tc_credits[tc_index];
2182         uint32_t pipe_tb_credits = pipe->tb_credits;
2183         uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
2184         uint32_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2185         uint32_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0};
2186         uint32_t pipe_tc_ov_credits, i;
2187         int enough_credits;
2188
2189         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2190                 pipe_tc_ov_mask1[i] = UINT32_MAX;
2191
2192         pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe->tc_ov_credits;
2193         pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = UINT32_MAX;
2194         pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
2195
2196         /* Check pipe and subport credits */
2197         enough_credits = (pkt_len <= subport_tb_credits) &&
2198                 (pkt_len <= subport_tc_credits) &&
2199                 (pkt_len <= pipe_tb_credits) &&
2200                 (pkt_len <= pipe_tc_credits) &&
2201                 (pkt_len <= pipe_tc_ov_credits);
2202
2203         if (!enough_credits)
2204                 return 0;
2205
2206         /* Update pipe and subport credits */
2207         subport->tb_credits -= pkt_len;
2208         subport->tc_credits[tc_index] -= pkt_len;
2209         pipe->tb_credits -= pkt_len;
2210         pipe->tc_credits[tc_index] -= pkt_len;
2211         pipe->tc_ov_credits -= pipe_tc_ov_mask2[tc_index] & pkt_len;
2212
2213         return 1;
2214 }
2215
2216 #endif /* RTE_SCHED_SUBPORT_TC_OV */
2217
2218
2219 static inline int
2220 grinder_schedule(struct rte_sched_port *port,
2221         struct rte_sched_subport *subport, uint32_t pos)
2222 {
2223         struct rte_sched_grinder *grinder = subport->grinder + pos;
2224         struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
2225         struct rte_mbuf *pkt = grinder->pkt;
2226         uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
2227         uint32_t be_tc_active;
2228
2229         if (!grinder_credits_check(port, subport, pos))
2230                 return 0;
2231
2232         /* Advance port time */
2233         port->time += pkt_len;
2234
2235         /* Send packet */
2236         port->pkts_out[port->n_pkts_out++] = pkt;
2237         queue->qr++;
2238
2239         be_tc_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE) ? ~0x0 : 0x0;
2240         grinder->wrr_tokens[grinder->qpos] +=
2241                 (pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
2242
2243         if (queue->qr == queue->qw) {
2244                 uint32_t qindex = grinder->qindex[grinder->qpos];
2245
2246                 rte_bitmap_clear(subport->bmp, qindex);
2247                 grinder->qmask &= ~(1 << grinder->qpos);
2248                 if (be_tc_active)
2249                         grinder->wrr_mask[grinder->qpos] = 0;
2250                 rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
2251         }
2252
2253         /* Reset pipe loop detection */
2254         subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
2255         grinder->productive = 1;
2256
2257         return 1;
2258 }
2259
2260 #ifdef SCHED_VECTOR_SSE4
2261
2262 static inline int
2263 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2264 {
2265         __m128i index = _mm_set1_epi32(base_pipe);
2266         __m128i pipes = _mm_load_si128((__m128i *)subport->grinder_base_bmp_pos);
2267         __m128i res = _mm_cmpeq_epi32(pipes, index);
2268
2269         pipes = _mm_load_si128((__m128i *)(subport->grinder_base_bmp_pos + 4));
2270         pipes = _mm_cmpeq_epi32(pipes, index);
2271         res = _mm_or_si128(res, pipes);
2272
2273         if (_mm_testz_si128(res, res))
2274                 return 0;
2275
2276         return 1;
2277 }
2278
2279 #elif defined(SCHED_VECTOR_NEON)
2280
2281 static inline int
2282 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2283 {
2284         uint32x4_t index, pipes;
2285         uint32_t *pos = (uint32_t *)subport->grinder_base_bmp_pos;
2286
2287         index = vmovq_n_u32(base_pipe);
2288         pipes = vld1q_u32(pos);
2289         if (!vminvq_u32(veorq_u32(pipes, index)))
2290                 return 1;
2291
2292         pipes = vld1q_u32(pos + 4);
2293         if (!vminvq_u32(veorq_u32(pipes, index)))
2294                 return 1;
2295
2296         return 0;
2297 }
2298
2299 #else
2300
2301 static inline int
2302 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2303 {
2304         uint32_t i;
2305
2306         for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) {
2307                 if (subport->grinder_base_bmp_pos[i] == base_pipe)
2308                         return 1;
2309         }
2310
2311         return 0;
2312 }
2313
2314 #endif /* RTE_SCHED_OPTIMIZATIONS */
2315
2316 static inline void
2317 grinder_pcache_populate(struct rte_sched_subport *subport,
2318         uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab)
2319 {
2320         struct rte_sched_grinder *grinder = subport->grinder + pos;
2321         uint16_t w[4];
2322
2323         grinder->pcache_w = 0;
2324         grinder->pcache_r = 0;
2325
2326         w[0] = (uint16_t) bmp_slab;
2327         w[1] = (uint16_t) (bmp_slab >> 16);
2328         w[2] = (uint16_t) (bmp_slab >> 32);
2329         w[3] = (uint16_t) (bmp_slab >> 48);
2330
2331         grinder->pcache_qmask[grinder->pcache_w] = w[0];
2332         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos;
2333         grinder->pcache_w += (w[0] != 0);
2334
2335         grinder->pcache_qmask[grinder->pcache_w] = w[1];
2336         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 16;
2337         grinder->pcache_w += (w[1] != 0);
2338
2339         grinder->pcache_qmask[grinder->pcache_w] = w[2];
2340         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 32;
2341         grinder->pcache_w += (w[2] != 0);
2342
2343         grinder->pcache_qmask[grinder->pcache_w] = w[3];
2344         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 48;
2345         grinder->pcache_w += (w[3] != 0);
2346 }
2347
2348 static inline void
2349 grinder_tccache_populate(struct rte_sched_subport *subport,
2350         uint32_t pos, uint32_t qindex, uint16_t qmask)
2351 {
2352         struct rte_sched_grinder *grinder = subport->grinder + pos;
2353         uint8_t b, i;
2354
2355         grinder->tccache_w = 0;
2356         grinder->tccache_r = 0;
2357
2358         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2359                 b = (uint8_t) ((qmask >> i) & 0x1);
2360                 grinder->tccache_qmask[grinder->tccache_w] = b;
2361                 grinder->tccache_qindex[grinder->tccache_w] = qindex + i;
2362                 grinder->tccache_w += (b != 0);
2363         }
2364
2365         b = (uint8_t) (qmask >> (RTE_SCHED_TRAFFIC_CLASS_BE));
2366         grinder->tccache_qmask[grinder->tccache_w] = b;
2367         grinder->tccache_qindex[grinder->tccache_w] = qindex +
2368                 RTE_SCHED_TRAFFIC_CLASS_BE;
2369         grinder->tccache_w += (b != 0);
2370 }
2371
2372 static inline int
2373 grinder_next_tc(struct rte_sched_port *port,
2374         struct rte_sched_subport *subport, uint32_t pos)
2375 {
2376         struct rte_sched_grinder *grinder = subport->grinder + pos;
2377         struct rte_mbuf **qbase;
2378         uint32_t qindex;
2379         uint16_t qsize;
2380
2381         if (grinder->tccache_r == grinder->tccache_w)
2382                 return 0;
2383
2384         qindex = grinder->tccache_qindex[grinder->tccache_r];
2385         qbase = rte_sched_subport_pipe_qbase(subport, qindex);
2386         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
2387
2388         grinder->tc_index = rte_sched_port_pipe_tc(port, qindex);
2389         grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
2390         grinder->qsize = qsize;
2391
2392         if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2393                 grinder->queue[0] = subport->queue + qindex;
2394                 grinder->qbase[0] = qbase;
2395                 grinder->qindex[0] = qindex;
2396                 grinder->tccache_r++;
2397
2398                 return 1;
2399         }
2400
2401         grinder->queue[0] = subport->queue + qindex;
2402         grinder->queue[1] = subport->queue + qindex + 1;
2403         grinder->queue[2] = subport->queue + qindex + 2;
2404         grinder->queue[3] = subport->queue + qindex + 3;
2405
2406         grinder->qbase[0] = qbase;
2407         grinder->qbase[1] = qbase + qsize;
2408         grinder->qbase[2] = qbase + 2 * qsize;
2409         grinder->qbase[3] = qbase + 3 * qsize;
2410
2411         grinder->qindex[0] = qindex;
2412         grinder->qindex[1] = qindex + 1;
2413         grinder->qindex[2] = qindex + 2;
2414         grinder->qindex[3] = qindex + 3;
2415
2416         grinder->tccache_r++;
2417         return 1;
2418 }
2419
2420 static inline int
2421 grinder_next_pipe(struct rte_sched_port *port,
2422         struct rte_sched_subport *subport, uint32_t pos)
2423 {
2424         struct rte_sched_grinder *grinder = subport->grinder + pos;
2425         uint32_t pipe_qindex;
2426         uint16_t pipe_qmask;
2427
2428         if (grinder->pcache_r < grinder->pcache_w) {
2429                 pipe_qmask = grinder->pcache_qmask[grinder->pcache_r];
2430                 pipe_qindex = grinder->pcache_qindex[grinder->pcache_r];
2431                 grinder->pcache_r++;
2432         } else {
2433                 uint64_t bmp_slab = 0;
2434                 uint32_t bmp_pos = 0;
2435
2436                 /* Get another non-empty pipe group */
2437                 if (unlikely(rte_bitmap_scan(subport->bmp, &bmp_pos, &bmp_slab) <= 0))
2438                         return 0;
2439
2440 #ifdef RTE_SCHED_DEBUG
2441                 debug_check_queue_slab(subport, bmp_pos, bmp_slab);
2442 #endif
2443
2444                 /* Return if pipe group already in one of the other grinders */
2445                 subport->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID;
2446                 if (unlikely(grinder_pipe_exists(subport, bmp_pos)))
2447                         return 0;
2448
2449                 subport->grinder_base_bmp_pos[pos] = bmp_pos;
2450
2451                 /* Install new pipe group into grinder's pipe cache */
2452                 grinder_pcache_populate(subport, pos, bmp_pos, bmp_slab);
2453
2454                 pipe_qmask = grinder->pcache_qmask[0];
2455                 pipe_qindex = grinder->pcache_qindex[0];
2456                 grinder->pcache_r = 1;
2457         }
2458
2459         /* Install new pipe in the grinder */
2460         grinder->pindex = pipe_qindex >> 4;
2461         grinder->subport = subport;
2462         grinder->pipe = subport->pipe + grinder->pindex;
2463         grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */
2464         grinder->productive = 0;
2465
2466         grinder_tccache_populate(subport, pos, pipe_qindex, pipe_qmask);
2467         grinder_next_tc(port, subport, pos);
2468
2469         /* Check for pipe exhaustion */
2470         if (grinder->pindex == subport->pipe_loop) {
2471                 subport->pipe_exhaustion = 1;
2472                 subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
2473         }
2474
2475         return 1;
2476 }
2477
2478
2479 static inline void
2480 grinder_wrr_load(struct rte_sched_subport *subport, uint32_t pos)
2481 {
2482         struct rte_sched_grinder *grinder = subport->grinder + pos;
2483         struct rte_sched_pipe *pipe = grinder->pipe;
2484         struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params;
2485         uint32_t qmask = grinder->qmask;
2486
2487         grinder->wrr_tokens[0] =
2488                 ((uint16_t) pipe->wrr_tokens[0]) << RTE_SCHED_WRR_SHIFT;
2489         grinder->wrr_tokens[1] =
2490                 ((uint16_t) pipe->wrr_tokens[1]) << RTE_SCHED_WRR_SHIFT;
2491         grinder->wrr_tokens[2] =
2492                 ((uint16_t) pipe->wrr_tokens[2]) << RTE_SCHED_WRR_SHIFT;
2493         grinder->wrr_tokens[3] =
2494                 ((uint16_t) pipe->wrr_tokens[3]) << RTE_SCHED_WRR_SHIFT;
2495
2496         grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
2497         grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
2498         grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
2499         grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
2500
2501         grinder->wrr_cost[0] = pipe_params->wrr_cost[0];
2502         grinder->wrr_cost[1] = pipe_params->wrr_cost[1];
2503         grinder->wrr_cost[2] = pipe_params->wrr_cost[2];
2504         grinder->wrr_cost[3] = pipe_params->wrr_cost[3];
2505 }
2506
2507 static inline void
2508 grinder_wrr_store(struct rte_sched_subport *subport, uint32_t pos)
2509 {
2510         struct rte_sched_grinder *grinder = subport->grinder + pos;
2511         struct rte_sched_pipe *pipe = grinder->pipe;
2512
2513         pipe->wrr_tokens[0] =
2514                         (grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >>
2515                                 RTE_SCHED_WRR_SHIFT;
2516         pipe->wrr_tokens[1] =
2517                         (grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >>
2518                                 RTE_SCHED_WRR_SHIFT;
2519         pipe->wrr_tokens[2] =
2520                         (grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >>
2521                                 RTE_SCHED_WRR_SHIFT;
2522         pipe->wrr_tokens[3] =
2523                         (grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >>
2524                                 RTE_SCHED_WRR_SHIFT;
2525 }
2526
2527 static inline void
2528 grinder_wrr(struct rte_sched_subport *subport, uint32_t pos)
2529 {
2530         struct rte_sched_grinder *grinder = subport->grinder + pos;
2531         uint16_t wrr_tokens_min;
2532
2533         grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0];
2534         grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1];
2535         grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2];
2536         grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3];
2537
2538         grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens);
2539         wrr_tokens_min = grinder->wrr_tokens[grinder->qpos];
2540
2541         grinder->wrr_tokens[0] -= wrr_tokens_min;
2542         grinder->wrr_tokens[1] -= wrr_tokens_min;
2543         grinder->wrr_tokens[2] -= wrr_tokens_min;
2544         grinder->wrr_tokens[3] -= wrr_tokens_min;
2545 }
2546
2547
2548 #define grinder_evict(subport, pos)
2549
2550 static inline void
2551 grinder_prefetch_pipe(struct rte_sched_subport *subport, uint32_t pos)
2552 {
2553         struct rte_sched_grinder *grinder = subport->grinder + pos;
2554
2555         rte_prefetch0(grinder->pipe);
2556         rte_prefetch0(grinder->queue[0]);
2557 }
2558
2559 static inline void
2560 grinder_prefetch_tc_queue_arrays(struct rte_sched_subport *subport, uint32_t pos)
2561 {
2562         struct rte_sched_grinder *grinder = subport->grinder + pos;
2563         uint16_t qsize, qr[RTE_SCHED_MAX_QUEUES_PER_TC];
2564
2565         qsize = grinder->qsize;
2566         grinder->qpos = 0;
2567
2568         if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2569                 qr[0] = grinder->queue[0]->qr & (qsize - 1);
2570
2571                 rte_prefetch0(grinder->qbase[0] + qr[0]);
2572                 return;
2573         }
2574
2575         qr[0] = grinder->queue[0]->qr & (qsize - 1);
2576         qr[1] = grinder->queue[1]->qr & (qsize - 1);
2577         qr[2] = grinder->queue[2]->qr & (qsize - 1);
2578         qr[3] = grinder->queue[3]->qr & (qsize - 1);
2579
2580         rte_prefetch0(grinder->qbase[0] + qr[0]);
2581         rte_prefetch0(grinder->qbase[1] + qr[1]);
2582
2583         grinder_wrr_load(subport, pos);
2584         grinder_wrr(subport, pos);
2585
2586         rte_prefetch0(grinder->qbase[2] + qr[2]);
2587         rte_prefetch0(grinder->qbase[3] + qr[3]);
2588 }
2589
2590 static inline void
2591 grinder_prefetch_mbuf(struct rte_sched_subport *subport, uint32_t pos)
2592 {
2593         struct rte_sched_grinder *grinder = subport->grinder + pos;
2594         uint32_t qpos = grinder->qpos;
2595         struct rte_mbuf **qbase = grinder->qbase[qpos];
2596         uint16_t qsize = grinder->qsize;
2597         uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1);
2598
2599         grinder->pkt = qbase[qr];
2600         rte_prefetch0(grinder->pkt);
2601
2602         if (unlikely((qr & 0x7) == 7)) {
2603                 uint16_t qr_next = (grinder->queue[qpos]->qr + 1) & (qsize - 1);
2604
2605                 rte_prefetch0(qbase + qr_next);
2606         }
2607 }
2608
2609 static inline uint32_t
2610 grinder_handle(struct rte_sched_port *port,
2611         struct rte_sched_subport *subport, uint32_t pos)
2612 {
2613         struct rte_sched_grinder *grinder = subport->grinder + pos;
2614
2615         switch (grinder->state) {
2616         case e_GRINDER_PREFETCH_PIPE:
2617         {
2618                 if (grinder_next_pipe(port, subport, pos)) {
2619                         grinder_prefetch_pipe(subport, pos);
2620                         subport->busy_grinders++;
2621
2622                         grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2623                         return 0;
2624                 }
2625
2626                 return 0;
2627         }
2628
2629         case e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS:
2630         {
2631                 struct rte_sched_pipe *pipe = grinder->pipe;
2632
2633                 grinder->pipe_params = subport->pipe_profiles + pipe->profile;
2634                 grinder_prefetch_tc_queue_arrays(subport, pos);
2635                 grinder_credits_update(port, subport, pos);
2636
2637                 grinder->state = e_GRINDER_PREFETCH_MBUF;
2638                 return 0;
2639         }
2640
2641         case e_GRINDER_PREFETCH_MBUF:
2642         {
2643                 grinder_prefetch_mbuf(subport, pos);
2644
2645                 grinder->state = e_GRINDER_READ_MBUF;
2646                 return 0;
2647         }
2648
2649         case e_GRINDER_READ_MBUF:
2650         {
2651                 uint32_t wrr_active, result = 0;
2652
2653                 result = grinder_schedule(port, subport, pos);
2654
2655                 wrr_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE);
2656
2657                 /* Look for next packet within the same TC */
2658                 if (result && grinder->qmask) {
2659                         if (wrr_active)
2660                                 grinder_wrr(subport, pos);
2661
2662                         grinder_prefetch_mbuf(subport, pos);
2663
2664                         return 1;
2665                 }
2666
2667                 if (wrr_active)
2668                         grinder_wrr_store(subport, pos);
2669
2670                 /* Look for another active TC within same pipe */
2671                 if (grinder_next_tc(port, subport, pos)) {
2672                         grinder_prefetch_tc_queue_arrays(subport, pos);
2673
2674                         grinder->state = e_GRINDER_PREFETCH_MBUF;
2675                         return result;
2676                 }
2677
2678                 if (grinder->productive == 0 &&
2679                     subport->pipe_loop == RTE_SCHED_PIPE_INVALID)
2680                         subport->pipe_loop = grinder->pindex;
2681
2682                 grinder_evict(subport, pos);
2683
2684                 /* Look for another active pipe */
2685                 if (grinder_next_pipe(port, subport, pos)) {
2686                         grinder_prefetch_pipe(subport, pos);
2687
2688                         grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2689                         return result;
2690                 }
2691
2692                 /* No active pipe found */
2693                 subport->busy_grinders--;
2694
2695                 grinder->state = e_GRINDER_PREFETCH_PIPE;
2696                 return result;
2697         }
2698
2699         default:
2700                 rte_panic("Algorithmic error (invalid state)\n");
2701                 return 0;
2702         }
2703 }
2704
2705 static inline void
2706 rte_sched_port_time_resync(struct rte_sched_port *port)
2707 {
2708         uint64_t cycles = rte_get_tsc_cycles();
2709         uint64_t cycles_diff = cycles - port->time_cpu_cycles;
2710         uint64_t bytes_diff;
2711         uint32_t i;
2712
2713         /* Compute elapsed time in bytes */
2714         bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT,
2715                                            port->inv_cycles_per_byte);
2716
2717         /* Advance port time */
2718         port->time_cpu_cycles = cycles;
2719         port->time_cpu_bytes += bytes_diff;
2720         if (port->time < port->time_cpu_bytes)
2721                 port->time = port->time_cpu_bytes;
2722
2723         /* Reset pipe loop detection */
2724         for (i = 0; i < port->n_subports_per_port; i++)
2725                 port->subports[i]->pipe_loop = RTE_SCHED_PIPE_INVALID;
2726 }
2727
2728 static inline int
2729 rte_sched_port_exceptions(struct rte_sched_subport *subport, int second_pass)
2730 {
2731         int exceptions;
2732
2733         /* Check if any exception flag is set */
2734         exceptions = (second_pass && subport->busy_grinders == 0) ||
2735                 (subport->pipe_exhaustion == 1);
2736
2737         /* Clear exception flags */
2738         subport->pipe_exhaustion = 0;
2739
2740         return exceptions;
2741 }
2742
2743 int
2744 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
2745 {
2746         struct rte_sched_subport *subport;
2747         uint32_t subport_id = port->subport_id;
2748         uint32_t i, n_subports = 0, count;
2749
2750         port->pkts_out = pkts;
2751         port->n_pkts_out = 0;
2752
2753         rte_sched_port_time_resync(port);
2754
2755         /* Take each queue in the grinder one step further */
2756         for (i = 0, count = 0; ; i++)  {
2757                 subport = port->subports[subport_id];
2758
2759                 count += grinder_handle(port, subport,
2760                                 i & (RTE_SCHED_PORT_N_GRINDERS - 1));
2761
2762                 if (count == n_pkts) {
2763                         subport_id++;
2764
2765                         if (subport_id == port->n_subports_per_port)
2766                                 subport_id = 0;
2767
2768                         port->subport_id = subport_id;
2769                         break;
2770                 }
2771
2772                 if (rte_sched_port_exceptions(subport, i >= RTE_SCHED_PORT_N_GRINDERS)) {
2773                         i = 0;
2774                         subport_id++;
2775                         n_subports++;
2776                 }
2777
2778                 if (subport_id == port->n_subports_per_port)
2779                         subport_id = 0;
2780
2781                 if (n_subports == port->n_subports_per_port) {
2782                         port->subport_id = subport_id;
2783                         break;
2784                 }
2785         }
2786
2787         return count;
2788 }