sched: update memory compute to support flexiblity
[dpdk.git] / lib / librte_sched / rte_sched.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <string.h>
7
8 #include <rte_common.h>
9 #include <rte_log.h>
10 #include <rte_memory.h>
11 #include <rte_malloc.h>
12 #include <rte_cycles.h>
13 #include <rte_prefetch.h>
14 #include <rte_branch_prediction.h>
15 #include <rte_mbuf.h>
16 #include <rte_bitmap.h>
17 #include <rte_reciprocal.h>
18
19 #include "rte_sched.h"
20 #include "rte_sched_common.h"
21 #include "rte_approx.h"
22
23 #ifdef __INTEL_COMPILER
24 #pragma warning(disable:2259) /* conversion may lose significant bits */
25 #endif
26
27 #ifdef RTE_SCHED_VECTOR
28 #include <rte_vect.h>
29
30 #ifdef RTE_ARCH_X86
31 #define SCHED_VECTOR_SSE4
32 #elif defined(RTE_MACHINE_CPUFLAG_NEON)
33 #define SCHED_VECTOR_NEON
34 #endif
35
36 #endif
37
38 #define RTE_SCHED_TB_RATE_CONFIG_ERR          (1e-7)
39 #define RTE_SCHED_WRR_SHIFT                   3
40 #define RTE_SCHED_MAX_QUEUES_PER_TC           RTE_SCHED_BE_QUEUES_PER_PIPE
41 #define RTE_SCHED_GRINDER_PCACHE_SIZE         (64 / RTE_SCHED_QUEUES_PER_PIPE)
42 #define RTE_SCHED_PIPE_INVALID                UINT32_MAX
43 #define RTE_SCHED_BMP_POS_INVALID             UINT32_MAX
44
45 /* Scaling for cycles_per_byte calculation
46  * Chosen so that minimum rate is 480 bit/sec
47  */
48 #define RTE_SCHED_TIME_SHIFT                  8
49
50 struct rte_sched_pipe_profile {
51         /* Token bucket (TB) */
52         uint32_t tb_period;
53         uint32_t tb_credits_per_period;
54         uint32_t tb_size;
55
56         /* Pipe traffic classes */
57         uint32_t tc_period;
58         uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
59         uint8_t tc_ov_weight;
60
61         /* Pipe best-effort traffic class queues */
62         uint8_t  wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
63 };
64
65 struct rte_sched_pipe {
66         /* Token bucket (TB) */
67         uint64_t tb_time; /* time of last update */
68         uint32_t tb_credits;
69
70         /* Pipe profile and flags */
71         uint32_t profile;
72
73         /* Traffic classes (TCs) */
74         uint64_t tc_time; /* time of next update */
75         uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
76
77         /* Weighted Round Robin (WRR) */
78         uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
79
80         /* TC oversubscription */
81         uint32_t tc_ov_credits;
82         uint8_t tc_ov_period_id;
83 } __rte_cache_aligned;
84
85 struct rte_sched_queue {
86         uint16_t qw;
87         uint16_t qr;
88 };
89
90 struct rte_sched_queue_extra {
91         struct rte_sched_queue_stats stats;
92 #ifdef RTE_SCHED_RED
93         struct rte_red red;
94 #endif
95 };
96
97 enum grinder_state {
98         e_GRINDER_PREFETCH_PIPE = 0,
99         e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS,
100         e_GRINDER_PREFETCH_MBUF,
101         e_GRINDER_READ_MBUF
102 };
103
104 struct rte_sched_grinder {
105         /* Pipe cache */
106         uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE];
107         uint32_t pcache_qindex[RTE_SCHED_GRINDER_PCACHE_SIZE];
108         uint32_t pcache_w;
109         uint32_t pcache_r;
110
111         /* Current pipe */
112         enum grinder_state state;
113         uint32_t productive;
114         uint32_t pindex;
115         struct rte_sched_subport *subport;
116         struct rte_sched_pipe *pipe;
117         struct rte_sched_pipe_profile *pipe_params;
118
119         /* TC cache */
120         uint8_t tccache_qmask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
121         uint32_t tccache_qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
122         uint32_t tccache_w;
123         uint32_t tccache_r;
124
125         /* Current TC */
126         uint32_t tc_index;
127         struct rte_sched_queue *queue[RTE_SCHED_MAX_QUEUES_PER_TC];
128         struct rte_mbuf **qbase[RTE_SCHED_MAX_QUEUES_PER_TC];
129         uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC];
130         uint16_t qsize;
131         uint32_t qmask;
132         uint32_t qpos;
133         struct rte_mbuf *pkt;
134
135         /* WRR */
136         uint16_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
137         uint16_t wrr_mask[RTE_SCHED_BE_QUEUES_PER_PIPE];
138         uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
139 };
140
141 struct rte_sched_subport {
142         /* Token bucket (TB) */
143         uint64_t tb_time; /* time of last update */
144         uint32_t tb_period;
145         uint32_t tb_credits_per_period;
146         uint32_t tb_size;
147         uint32_t tb_credits;
148
149         /* Traffic classes (TCs) */
150         uint64_t tc_time; /* time of next update */
151         uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
152         uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
153         uint32_t tc_period;
154
155         /* TC oversubscription */
156         uint32_t tc_ov_wm;
157         uint32_t tc_ov_wm_min;
158         uint32_t tc_ov_wm_max;
159         uint8_t tc_ov_period_id;
160         uint8_t tc_ov;
161         uint32_t tc_ov_n;
162         double tc_ov_rate;
163
164         /* Statistics */
165         struct rte_sched_subport_stats stats;
166
167         /* Subport pipes */
168         uint32_t n_pipes_per_subport_enabled;
169         uint32_t n_pipe_profiles;
170         uint32_t n_max_pipe_profiles;
171
172         /* Pipe best-effort TC rate */
173         uint32_t pipe_tc_be_rate_max;
174
175         /* Pipe queues size */
176         uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
177
178 #ifdef RTE_SCHED_RED
179         struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
180 #endif
181
182         /* Scheduling loop detection */
183         uint32_t pipe_loop;
184         uint32_t pipe_exhaustion;
185
186         /* Bitmap */
187         struct rte_bitmap *bmp;
188         uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16;
189
190         /* Grinders */
191         struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS];
192         uint32_t busy_grinders;
193
194         /* Queue base calculation */
195         uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE];
196         uint32_t qsize_sum;
197
198         struct rte_sched_pipe *pipe;
199         struct rte_sched_queue *queue;
200         struct rte_sched_queue_extra *queue_extra;
201         struct rte_sched_pipe_profile *pipe_profiles;
202         uint8_t *bmp_array;
203         struct rte_mbuf **queue_array;
204         uint8_t memory[0] __rte_cache_aligned;
205 } __rte_cache_aligned;
206
207 struct rte_sched_port {
208         /* User parameters */
209         uint32_t n_subports_per_port;
210         uint32_t n_pipes_per_subport;
211         uint32_t n_pipes_per_subport_log2;
212         uint16_t pipe_queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
213         uint8_t pipe_tc[RTE_SCHED_QUEUES_PER_PIPE];
214         uint8_t tc_queue[RTE_SCHED_QUEUES_PER_PIPE];
215         uint32_t rate;
216         uint32_t mtu;
217         uint32_t frame_overhead;
218         int socket;
219         uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
220         uint32_t n_pipe_profiles;
221         uint32_t n_max_pipe_profiles;
222         uint32_t pipe_tc_be_rate_max;
223 #ifdef RTE_SCHED_RED
224         struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
225 #endif
226
227         /* Timing */
228         uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cyles */
229         uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
230         uint64_t time;                /* Current NIC TX time measured in bytes */
231         struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
232
233         /* Scheduling loop detection */
234         uint32_t pipe_loop;
235         uint32_t pipe_exhaustion;
236
237         /* Bitmap */
238         struct rte_bitmap *bmp;
239         uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16;
240
241         /* Grinders */
242         struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS];
243         uint32_t busy_grinders;
244         struct rte_mbuf **pkts_out;
245         uint32_t n_pkts_out;
246
247         /* Queue base calculation */
248         uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE];
249         uint32_t qsize_sum;
250
251         /* Large data structures */
252         struct rte_sched_subport *subports[0];
253         struct rte_sched_subport *subport;
254         struct rte_sched_pipe *pipe;
255         struct rte_sched_queue *queue;
256         struct rte_sched_queue_extra *queue_extra;
257         struct rte_sched_pipe_profile *pipe_profiles;
258         uint8_t *bmp_array;
259         struct rte_mbuf **queue_array;
260         uint8_t memory[0] __rte_cache_aligned;
261 } __rte_cache_aligned;
262
263 enum rte_sched_port_array {
264         e_RTE_SCHED_PORT_ARRAY_SUBPORT = 0,
265         e_RTE_SCHED_PORT_ARRAY_PIPE,
266         e_RTE_SCHED_PORT_ARRAY_QUEUE,
267         e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA,
268         e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES,
269         e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY,
270         e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY,
271         e_RTE_SCHED_PORT_ARRAY_TOTAL,
272 };
273
274 enum rte_sched_subport_array {
275         e_RTE_SCHED_SUBPORT_ARRAY_PIPE = 0,
276         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE,
277         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA,
278         e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES,
279         e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY,
280         e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY,
281         e_RTE_SCHED_SUBPORT_ARRAY_TOTAL,
282 };
283
284 static inline uint32_t
285 rte_sched_subport_pipe_queues(struct rte_sched_subport *subport)
286 {
287         return RTE_SCHED_QUEUES_PER_PIPE * subport->n_pipes_per_subport_enabled;
288 }
289
290 static inline struct rte_mbuf **
291 rte_sched_subport_pipe_qbase(struct rte_sched_subport *subport, uint32_t qindex)
292 {
293         uint32_t pindex = qindex >> 4;
294         uint32_t qpos = qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1);
295
296         return (subport->queue_array + pindex *
297                 subport->qsize_sum + subport->qsize_add[qpos]);
298 }
299
300 static inline uint16_t
301 rte_sched_subport_pipe_qsize(struct rte_sched_port *port,
302 struct rte_sched_subport *subport, uint32_t qindex)
303 {
304         uint32_t tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
305
306         return subport->qsize[tc];
307 }
308
309 static inline uint32_t
310 rte_sched_port_queues_per_port(struct rte_sched_port *port)
311 {
312         return RTE_SCHED_QUEUES_PER_PIPE * port->n_pipes_per_subport * port->n_subports_per_port;
313 }
314
315 static inline struct rte_mbuf **
316 rte_sched_port_qbase(struct rte_sched_port *port, uint32_t qindex)
317 {
318         uint32_t pindex = qindex >> 4;
319         uint32_t qpos = qindex & 0xF;
320
321         return (port->queue_array + pindex *
322                 port->qsize_sum + port->qsize_add[qpos]);
323 }
324
325 static inline uint16_t
326 rte_sched_port_qsize(struct rte_sched_port *port, uint32_t qindex)
327 {
328         uint32_t tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
329
330         return port->qsize[tc];
331 }
332
333 static inline uint16_t
334 rte_sched_port_pipe_queue(struct rte_sched_port *port, uint32_t traffic_class)
335 {
336         uint16_t pipe_queue = port->pipe_queue[traffic_class];
337
338         return pipe_queue;
339 }
340
341 static inline uint8_t
342 rte_sched_port_pipe_tc(struct rte_sched_port *port, uint32_t qindex)
343 {
344         uint8_t pipe_tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
345
346         return pipe_tc;
347 }
348
349 static inline uint8_t
350 rte_sched_port_tc_queue(struct rte_sched_port *port, uint32_t qindex)
351 {
352         uint8_t tc_queue = port->tc_queue[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
353
354         return tc_queue;
355 }
356
357 static int
358 pipe_profile_check(struct rte_sched_pipe_params *params,
359         uint32_t rate, uint16_t *qsize)
360 {
361         uint32_t i;
362
363         /* Pipe parameters */
364         if (params == NULL) {
365                 RTE_LOG(ERR, SCHED,
366                         "%s: Incorrect value for parameter params\n", __func__);
367                 return -EINVAL;
368         }
369
370         /* TB rate: non-zero, not greater than port rate */
371         if (params->tb_rate == 0 ||
372                 params->tb_rate > rate) {
373                 RTE_LOG(ERR, SCHED,
374                         "%s: Incorrect value for tb rate\n", __func__);
375                 return -EINVAL;
376         }
377
378         /* TB size: non-zero */
379         if (params->tb_size == 0) {
380                 RTE_LOG(ERR, SCHED,
381                         "%s: Incorrect value for tb size\n", __func__);
382                 return -EINVAL;
383         }
384
385         /* TC rate: non-zero if qsize non-zero, less than pipe rate */
386         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
387                 if ((qsize[i] == 0 && params->tc_rate[i] != 0) ||
388                         (qsize[i] != 0 && (params->tc_rate[i] == 0 ||
389                         params->tc_rate[i] > params->tb_rate))) {
390                         RTE_LOG(ERR, SCHED,
391                                 "%s: Incorrect value for qsize or tc_rate\n", __func__);
392                         return -EINVAL;
393                 }
394         }
395
396         if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 ||
397                 qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
398                 RTE_LOG(ERR, SCHED,
399                         "%s: Incorrect value for be traffic class rate\n", __func__);
400                 return -EINVAL;
401         }
402
403         /* TC period: non-zero */
404         if (params->tc_period == 0) {
405                 RTE_LOG(ERR, SCHED,
406                         "%s: Incorrect value for tc period\n", __func__);
407                 return -EINVAL;
408         }
409
410         /*  Best effort tc oversubscription weight: non-zero */
411         if (params->tc_ov_weight == 0) {
412                 RTE_LOG(ERR, SCHED,
413                         "%s: Incorrect value for tc ov weight\n", __func__);
414                 return -EINVAL;
415         }
416
417         /* Queue WRR weights: non-zero */
418         for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++) {
419                 if (params->wrr_weights[i] == 0) {
420                         RTE_LOG(ERR, SCHED,
421                                 "%s: Incorrect value for wrr weight\n", __func__);
422                         return -EINVAL;
423                 }
424         }
425
426         return 0;
427 }
428
429 static int
430 rte_sched_port_check_params(struct rte_sched_port_params *params)
431 {
432         if (params == NULL) {
433                 RTE_LOG(ERR, SCHED,
434                         "%s: Incorrect value for parameter params\n", __func__);
435                 return -EINVAL;
436         }
437
438         /* socket */
439         if (params->socket < 0) {
440                 RTE_LOG(ERR, SCHED,
441                         "%s: Incorrect value for socket id\n", __func__);
442                 return -EINVAL;
443         }
444
445         /* rate */
446         if (params->rate == 0) {
447                 RTE_LOG(ERR, SCHED,
448                         "%s: Incorrect value for rate\n", __func__);
449                 return -EINVAL;
450         }
451
452         /* mtu */
453         if (params->mtu == 0) {
454                 RTE_LOG(ERR, SCHED,
455                         "%s: Incorrect value for mtu\n", __func__);
456                 return -EINVAL;
457         }
458
459         /* n_subports_per_port: non-zero, limited to 16 bits, power of 2 */
460         if (params->n_subports_per_port == 0 ||
461             params->n_subports_per_port > 1u << 16 ||
462             !rte_is_power_of_2(params->n_subports_per_port)) {
463                 RTE_LOG(ERR, SCHED,
464                         "%s: Incorrect value for number of subports\n", __func__);
465                 return -EINVAL;
466         }
467
468         /* n_pipes_per_subport: non-zero, power of 2 */
469         if (params->n_pipes_per_subport == 0 ||
470             !rte_is_power_of_2(params->n_pipes_per_subport)) {
471                 RTE_LOG(ERR, SCHED,
472                         "%s: Incorrect value for maximum pipes number\n", __func__);
473                 return -EINVAL;
474         }
475
476         return 0;
477 }
478
479 static uint32_t
480 rte_sched_subport_get_array_base(struct rte_sched_subport_params *params,
481         enum rte_sched_subport_array array)
482 {
483         uint32_t n_pipes_per_subport = params->n_pipes_per_subport_enabled;
484         uint32_t n_subport_pipe_queues =
485                 RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport;
486
487         uint32_t size_pipe = n_pipes_per_subport * sizeof(struct rte_sched_pipe);
488         uint32_t size_queue =
489                 n_subport_pipe_queues * sizeof(struct rte_sched_queue);
490         uint32_t size_queue_extra
491                 = n_subport_pipe_queues * sizeof(struct rte_sched_queue_extra);
492         uint32_t size_pipe_profiles = params->n_max_pipe_profiles *
493                 sizeof(struct rte_sched_pipe_profile);
494         uint32_t size_bmp_array =
495                 rte_bitmap_get_memory_footprint(n_subport_pipe_queues);
496         uint32_t size_per_pipe_queue_array, size_queue_array;
497
498         uint32_t base, i;
499
500         size_per_pipe_queue_array = 0;
501         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
502                 if (i < RTE_SCHED_TRAFFIC_CLASS_BE)
503                         size_per_pipe_queue_array +=
504                                 params->qsize[i] * sizeof(struct rte_mbuf *);
505                 else
506                         size_per_pipe_queue_array += RTE_SCHED_MAX_QUEUES_PER_TC *
507                                 params->qsize[i] * sizeof(struct rte_mbuf *);
508         }
509         size_queue_array = n_pipes_per_subport * size_per_pipe_queue_array;
510
511         base = 0;
512
513         if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE)
514                 return base;
515         base += RTE_CACHE_LINE_ROUNDUP(size_pipe);
516
517         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE)
518                 return base;
519         base += RTE_CACHE_LINE_ROUNDUP(size_queue);
520
521         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA)
522                 return base;
523         base += RTE_CACHE_LINE_ROUNDUP(size_queue_extra);
524
525         if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES)
526                 return base;
527         base += RTE_CACHE_LINE_ROUNDUP(size_pipe_profiles);
528
529         if (array == e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY)
530                 return base;
531         base += RTE_CACHE_LINE_ROUNDUP(size_bmp_array);
532
533         if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY)
534                 return base;
535         base += RTE_CACHE_LINE_ROUNDUP(size_queue_array);
536
537         return base;
538 }
539
540 static void
541 rte_sched_subport_config_qsize(struct rte_sched_subport *subport)
542 {
543         uint32_t i;
544
545         subport->qsize_add[0] = 0;
546
547         /* Strict prority traffic class */
548         for (i = 1; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
549                 subport->qsize_add[i] = subport->qsize_add[i-1] + subport->qsize[i-1];
550
551         /* Best-effort traffic class */
552         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] =
553                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE] +
554                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
555         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] =
556                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] +
557                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
558         subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] =
559                 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] +
560                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
561
562         subport->qsize_sum = subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] +
563                 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
564 }
565
566 static void
567 rte_sched_port_log_pipe_profile(struct rte_sched_subport *subport, uint32_t i)
568 {
569         struct rte_sched_pipe_profile *p = subport->pipe_profiles + i;
570
571         RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n"
572                 "       Token bucket: period = %u, credits per period = %u, size = %u\n"
573                 "       Traffic classes: period = %u,\n"
574                 "       credits per period = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u]\n"
575                 "       Best-effort traffic class oversubscription: weight = %hhu\n"
576                 "       WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
577                 i,
578
579                 /* Token bucket */
580                 p->tb_period,
581                 p->tb_credits_per_period,
582                 p->tb_size,
583
584                 /* Traffic classes */
585                 p->tc_period,
586                 p->tc_credits_per_period[0],
587                 p->tc_credits_per_period[1],
588                 p->tc_credits_per_period[2],
589                 p->tc_credits_per_period[3],
590                 p->tc_credits_per_period[4],
591                 p->tc_credits_per_period[5],
592                 p->tc_credits_per_period[6],
593                 p->tc_credits_per_period[7],
594                 p->tc_credits_per_period[8],
595                 p->tc_credits_per_period[9],
596                 p->tc_credits_per_period[10],
597                 p->tc_credits_per_period[11],
598                 p->tc_credits_per_period[12],
599
600                 /* Best-effort traffic class oversubscription */
601                 p->tc_ov_weight,
602
603                 /* WRR */
604                 p->wrr_cost[0], p->wrr_cost[1], p->wrr_cost[2], p->wrr_cost[3]);
605 }
606
607 static inline uint64_t
608 rte_sched_time_ms_to_bytes(uint32_t time_ms, uint32_t rate)
609 {
610         uint64_t time = time_ms;
611
612         time = (time * rate) / 1000;
613
614         return time;
615 }
616
617 static void
618 rte_sched_pipe_profile_convert(struct rte_sched_subport *subport,
619         struct rte_sched_pipe_params *src,
620         struct rte_sched_pipe_profile *dst,
621         uint32_t rate)
622 {
623         uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
624         uint32_t lcd1, lcd2, lcd;
625         uint32_t i;
626
627         /* Token Bucket */
628         if (src->tb_rate == rate) {
629                 dst->tb_credits_per_period = 1;
630                 dst->tb_period = 1;
631         } else {
632                 double tb_rate = (double) src->tb_rate
633                                 / (double) rate;
634                 double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
635
636                 rte_approx(tb_rate, d,
637                         &dst->tb_credits_per_period, &dst->tb_period);
638         }
639
640         dst->tb_size = src->tb_size;
641
642         /* Traffic Classes */
643         dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period,
644                                                 rate);
645
646         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
647                 if (subport->qsize[i])
648                         dst->tc_credits_per_period[i]
649                                 = rte_sched_time_ms_to_bytes(src->tc_period,
650                                         src->tc_rate[i]);
651
652         dst->tc_ov_weight = src->tc_ov_weight;
653
654         /* WRR queues */
655         wrr_cost[0] = src->wrr_weights[0];
656         wrr_cost[1] = src->wrr_weights[1];
657         wrr_cost[2] = src->wrr_weights[2];
658         wrr_cost[3] = src->wrr_weights[3];
659
660         lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
661         lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
662         lcd = rte_get_lcd(lcd1, lcd2);
663
664         wrr_cost[0] = lcd / wrr_cost[0];
665         wrr_cost[1] = lcd / wrr_cost[1];
666         wrr_cost[2] = lcd / wrr_cost[2];
667         wrr_cost[3] = lcd / wrr_cost[3];
668
669         dst->wrr_cost[0] = (uint8_t) wrr_cost[0];
670         dst->wrr_cost[1] = (uint8_t) wrr_cost[1];
671         dst->wrr_cost[2] = (uint8_t) wrr_cost[2];
672         dst->wrr_cost[3] = (uint8_t) wrr_cost[3];
673 }
674
675 static void
676 rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport,
677         struct rte_sched_subport_params *params, uint32_t rate)
678 {
679         uint32_t i;
680
681         for (i = 0; i < subport->n_pipe_profiles; i++) {
682                 struct rte_sched_pipe_params *src = params->pipe_profiles + i;
683                 struct rte_sched_pipe_profile *dst = subport->pipe_profiles + i;
684
685                 rte_sched_pipe_profile_convert(subport, src, dst, rate);
686                 rte_sched_port_log_pipe_profile(subport, i);
687         }
688
689         subport->pipe_tc_be_rate_max = 0;
690         for (i = 0; i < subport->n_pipe_profiles; i++) {
691                 struct rte_sched_pipe_params *src = params->pipe_profiles + i;
692                 uint32_t pipe_tc_be_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
693
694                 if (subport->pipe_tc_be_rate_max < pipe_tc_be_rate)
695                         subport->pipe_tc_be_rate_max = pipe_tc_be_rate;
696         }
697 }
698
699 static int
700 rte_sched_subport_check_params(struct rte_sched_subport_params *params,
701         uint32_t n_max_pipes_per_subport,
702         uint32_t rate)
703 {
704         uint32_t i;
705
706         /* Check user parameters */
707         if (params == NULL) {
708                 RTE_LOG(ERR, SCHED,
709                         "%s: Incorrect value for parameter params\n", __func__);
710                 return -EINVAL;
711         }
712
713         if (params->tb_rate == 0 || params->tb_rate > rate) {
714                 RTE_LOG(ERR, SCHED,
715                         "%s: Incorrect value for tb rate\n", __func__);
716                 return -EINVAL;
717         }
718
719         if (params->tb_size == 0) {
720                 RTE_LOG(ERR, SCHED,
721                         "%s: Incorrect value for tb size\n", __func__);
722                 return -EINVAL;
723         }
724
725         /* qsize: if non-zero, power of 2,
726          * no bigger than 32K (due to 16-bit read/write pointers)
727          */
728         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
729                 uint16_t qsize = params->qsize[i];
730
731                 if (qsize != 0 && !rte_is_power_of_2(qsize)) {
732                         RTE_LOG(ERR, SCHED,
733                                 "%s: Incorrect value for qsize\n", __func__);
734                         return -EINVAL;
735                 }
736         }
737
738         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
739                 uint32_t tc_rate = params->tc_rate[i];
740                 uint16_t qsize = params->qsize[i];
741
742                 if ((qsize == 0 && tc_rate != 0) ||
743                         (qsize != 0 && tc_rate == 0) ||
744                         (tc_rate > params->tb_rate)) {
745                         RTE_LOG(ERR, SCHED,
746                                 "%s: Incorrect value for tc rate\n", __func__);
747                         return -EINVAL;
748                 }
749         }
750
751         if (params->qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 ||
752                 params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
753                 RTE_LOG(ERR, SCHED,
754                         "%s: Incorrect qsize or tc rate(best effort)\n", __func__);
755                 return -EINVAL;
756         }
757
758         if (params->tc_period == 0) {
759                 RTE_LOG(ERR, SCHED,
760                         "%s: Incorrect value for tc period\n", __func__);
761                 return -EINVAL;
762         }
763
764         /* n_pipes_per_subport: non-zero, power of 2 */
765         if (params->n_pipes_per_subport_enabled == 0 ||
766                 params->n_pipes_per_subport_enabled > n_max_pipes_per_subport ||
767             !rte_is_power_of_2(params->n_pipes_per_subport_enabled)) {
768                 RTE_LOG(ERR, SCHED,
769                         "%s: Incorrect value for pipes number\n", __func__);
770                 return -EINVAL;
771         }
772
773         /* pipe_profiles and n_pipe_profiles */
774         if (params->pipe_profiles == NULL ||
775             params->n_pipe_profiles == 0 ||
776                 params->n_max_pipe_profiles == 0 ||
777                 params->n_pipe_profiles > params->n_max_pipe_profiles) {
778                 RTE_LOG(ERR, SCHED,
779                         "%s: Incorrect value for pipe profiles\n", __func__);
780                 return -EINVAL;
781         }
782
783         for (i = 0; i < params->n_pipe_profiles; i++) {
784                 struct rte_sched_pipe_params *p = params->pipe_profiles + i;
785                 int status;
786
787                 status = pipe_profile_check(p, rate, &params->qsize[0]);
788                 if (status != 0) {
789                         RTE_LOG(ERR, SCHED,
790                                 "%s: Pipe profile check failed(%d)\n", __func__, status);
791                         return -EINVAL;
792                 }
793         }
794
795         return 0;
796 }
797
798 uint32_t
799 rte_sched_port_get_memory_footprint(struct rte_sched_port_params *port_params,
800         struct rte_sched_subport_params **subport_params)
801 {
802         uint32_t size0 = 0, size1 = 0, i;
803         int status;
804
805         status = rte_sched_port_check_params(port_params);
806         if (status != 0) {
807                 RTE_LOG(ERR, SCHED,
808                         "%s: Port scheduler port params check failed (%d)\n",
809                         __func__, status);
810
811                 return 0;
812         }
813
814         for (i = 0; i < port_params->n_subports_per_port; i++) {
815                 struct rte_sched_subport_params *sp = subport_params[i];
816
817                 status = rte_sched_subport_check_params(sp,
818                                 port_params->n_pipes_per_subport,
819                                 port_params->rate);
820                 if (status != 0) {
821                         RTE_LOG(ERR, SCHED,
822                                 "%s: Port scheduler subport params check failed (%d)\n",
823                                 __func__, status);
824
825                         return 0;
826                 }
827         }
828
829         size0 = sizeof(struct rte_sched_port);
830
831         for (i = 0; i < port_params->n_subports_per_port; i++) {
832                 struct rte_sched_subport_params *sp = subport_params[i];
833
834                 size1 += rte_sched_subport_get_array_base(sp,
835                                         e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
836         }
837
838         return size0 + size1;
839 }
840
841 struct rte_sched_port *
842 rte_sched_port_config(struct rte_sched_port_params *params)
843 {
844         struct rte_sched_port *port = NULL;
845         uint32_t size0, size1;
846         uint32_t cycles_per_byte;
847         uint32_t i, j;
848         int status;
849
850         status = rte_sched_port_check_params(params);
851         if (status != 0) {
852                 RTE_LOG(ERR, SCHED,
853                         "%s: Port scheduler params check failed (%d)\n",
854                         __func__, status);
855                 return NULL;
856         }
857
858         size0 = sizeof(struct rte_sched_port);
859         size1 = params->n_subports_per_port * sizeof(struct rte_sched_subport *);
860
861         /* Allocate memory to store the data structures */
862         port = rte_zmalloc_socket("qos_params", size0 + size1, RTE_CACHE_LINE_SIZE,
863                 params->socket);
864         if (port == NULL) {
865                 RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
866
867                 return NULL;
868         }
869
870         /* User parameters */
871         port->n_subports_per_port = params->n_subports_per_port;
872         port->n_pipes_per_subport = params->n_pipes_per_subport;
873         port->n_pipes_per_subport_log2 =
874                         __builtin_ctz(params->n_pipes_per_subport);
875         port->socket = params->socket;
876
877         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
878                 port->pipe_queue[i] = i;
879
880         for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
881                 port->pipe_tc[i] = j;
882
883                 if (j < RTE_SCHED_TRAFFIC_CLASS_BE)
884                         j++;
885         }
886
887         for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
888                 port->tc_queue[i] = j;
889
890                 if (i >= RTE_SCHED_TRAFFIC_CLASS_BE)
891                         j++;
892         }
893         port->rate = params->rate;
894         port->mtu = params->mtu + params->frame_overhead;
895         port->frame_overhead = params->frame_overhead;
896
897         /* Timing */
898         port->time_cpu_cycles = rte_get_tsc_cycles();
899         port->time_cpu_bytes = 0;
900         port->time = 0;
901
902         cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
903                 / params->rate;
904         port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
905
906         /* Grinders */
907         port->pkts_out = NULL;
908         port->n_pkts_out = 0;
909
910         return port;
911 }
912
913 static inline void
914 rte_sched_subport_free(struct rte_sched_port *port,
915         struct rte_sched_subport *subport)
916 {
917         uint32_t n_subport_pipe_queues;
918         uint32_t qindex;
919
920         if (subport == NULL)
921                 return;
922
923         n_subport_pipe_queues = rte_sched_subport_pipe_queues(subport);
924
925         /* Free enqueued mbufs */
926         for (qindex = 0; qindex < n_subport_pipe_queues; qindex++) {
927                 struct rte_mbuf **mbufs =
928                         rte_sched_subport_pipe_qbase(subport, qindex);
929                 uint16_t qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
930                 if (qsize != 0) {
931                         struct rte_sched_queue *queue = subport->queue + qindex;
932                         uint16_t qr = queue->qr & (qsize - 1);
933                         uint16_t qw = queue->qw & (qsize - 1);
934
935                         for (; qr != qw; qr = (qr + 1) & (qsize - 1))
936                                 rte_pktmbuf_free(mbufs[qr]);
937                 }
938         }
939
940         rte_bitmap_free(subport->bmp);
941 }
942
943 void
944 rte_sched_port_free(struct rte_sched_port *port)
945 {
946         uint32_t i;
947
948         /* Check user parameters */
949         if (port == NULL)
950                 return;
951
952         for (i = 0; i < port->n_subports_per_port; i++)
953                 rte_sched_subport_free(port, port->subports[i]);
954
955         rte_free(port);
956 }
957
958 static void
959 rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i)
960 {
961         struct rte_sched_subport *s = port->subports[i];
962
963         RTE_LOG(DEBUG, SCHED, "Low level config for subport %u:\n"
964                 "       Token bucket: period = %u, credits per period = %u, size = %u\n"
965                 "       Traffic classes: period = %u\n"
966                 "       credits per period = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u]\n"
967                 "       Best effort traffic class oversubscription: wm min = %u, wm max = %u\n",
968                 i,
969
970                 /* Token bucket */
971                 s->tb_period,
972                 s->tb_credits_per_period,
973                 s->tb_size,
974
975                 /* Traffic classes */
976                 s->tc_period,
977                 s->tc_credits_per_period[0],
978                 s->tc_credits_per_period[1],
979                 s->tc_credits_per_period[2],
980                 s->tc_credits_per_period[3],
981                 s->tc_credits_per_period[4],
982                 s->tc_credits_per_period[5],
983                 s->tc_credits_per_period[6],
984                 s->tc_credits_per_period[7],
985                 s->tc_credits_per_period[8],
986                 s->tc_credits_per_period[9],
987                 s->tc_credits_per_period[10],
988                 s->tc_credits_per_period[11],
989                 s->tc_credits_per_period[12],
990
991                 /* Best effort traffic class oversubscription */
992                 s->tc_ov_wm_min,
993                 s->tc_ov_wm_max);
994 }
995
996 static void
997 rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
998 {
999         uint32_t i;
1000
1001         for (i = 0; i < n_subports; i++) {
1002                 struct rte_sched_subport *subport = port->subports[i];
1003
1004                 rte_sched_subport_free(port, subport);
1005         }
1006
1007         rte_free(port);
1008 }
1009
1010 int
1011 rte_sched_subport_config(struct rte_sched_port *port,
1012         uint32_t subport_id,
1013         struct rte_sched_subport_params *params)
1014 {
1015         struct rte_sched_subport *s = NULL;
1016         uint32_t n_subports = subport_id;
1017         uint32_t n_subport_pipe_queues, i;
1018         uint32_t size0, size1, bmp_mem_size;
1019         int status;
1020
1021         /* Check user parameters */
1022         if (port == NULL) {
1023                 RTE_LOG(ERR, SCHED,
1024                         "%s: Incorrect value for parameter port\n", __func__);
1025                 return 0;
1026         }
1027
1028         if (subport_id >= port->n_subports_per_port) {
1029                 RTE_LOG(ERR, SCHED,
1030                         "%s: Incorrect value for subport id\n", __func__);
1031
1032                 rte_sched_free_memory(port, n_subports);
1033                 return -EINVAL;
1034         }
1035
1036         status = rte_sched_subport_check_params(params,
1037                 port->n_pipes_per_subport,
1038                 port->rate);
1039         if (status != 0) {
1040                 RTE_LOG(NOTICE, SCHED,
1041                         "%s: Port scheduler params check failed (%d)\n",
1042                         __func__, status);
1043
1044                 rte_sched_free_memory(port, n_subports);
1045                 return -EINVAL;
1046         }
1047
1048         /* Determine the amount of memory to allocate */
1049         size0 = sizeof(struct rte_sched_subport);
1050         size1 = rte_sched_subport_get_array_base(params,
1051                                 e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
1052
1053         /* Allocate memory to store the data structures */
1054         s = rte_zmalloc_socket("subport_params", size0 + size1,
1055                 RTE_CACHE_LINE_SIZE, port->socket);
1056         if (s == NULL) {
1057                 RTE_LOG(ERR, SCHED,
1058                         "%s: Memory allocation fails\n", __func__);
1059
1060                 rte_sched_free_memory(port, n_subports);
1061                 return -ENOMEM;
1062         }
1063
1064         n_subports++;
1065
1066         /* Port */
1067         port->subports[subport_id] = s;
1068
1069         /* Token Bucket (TB) */
1070         if (params->tb_rate == port->rate) {
1071                 s->tb_credits_per_period = 1;
1072                 s->tb_period = 1;
1073         } else {
1074                 double tb_rate = ((double) params->tb_rate) / ((double) port->rate);
1075                 double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
1076
1077                 rte_approx(tb_rate, d, &s->tb_credits_per_period, &s->tb_period);
1078         }
1079
1080         s->tb_size = params->tb_size;
1081         s->tb_time = port->time;
1082         s->tb_credits = s->tb_size / 2;
1083
1084         /* Traffic Classes (TCs) */
1085         s->tc_period = rte_sched_time_ms_to_bytes(params->tc_period, port->rate);
1086         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
1087                 if (params->qsize[i])
1088                         s->tc_credits_per_period[i]
1089                                 = rte_sched_time_ms_to_bytes(params->tc_period,
1090                                         params->tc_rate[i]);
1091         }
1092         s->tc_time = port->time + s->tc_period;
1093         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1094                 if (params->qsize[i])
1095                         s->tc_credits[i] = s->tc_credits_per_period[i];
1096
1097         /* compile time checks */
1098         RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS == 0);
1099         RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS &
1100                 (RTE_SCHED_PORT_N_GRINDERS - 1));
1101
1102         /* User parameters */
1103         s->n_pipes_per_subport_enabled = params->n_pipes_per_subport_enabled;
1104         memcpy(s->qsize, params->qsize, sizeof(params->qsize));
1105         s->n_pipe_profiles = params->n_pipe_profiles;
1106         s->n_max_pipe_profiles = params->n_max_pipe_profiles;
1107
1108 #ifdef RTE_SCHED_RED
1109         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
1110                 uint32_t j;
1111
1112                 for (j = 0; j < RTE_COLORS; j++) {
1113                         /* if min/max are both zero, then RED is disabled */
1114                         if ((params->red_params[i][j].min_th |
1115                              params->red_params[i][j].max_th) == 0) {
1116                                 continue;
1117                         }
1118
1119                         if (rte_red_config_init(&s->red_config[i][j],
1120                                 params->red_params[i][j].wq_log2,
1121                                 params->red_params[i][j].min_th,
1122                                 params->red_params[i][j].max_th,
1123                                 params->red_params[i][j].maxp_inv) != 0) {
1124                                 rte_sched_free_memory(port, n_subports);
1125
1126                                 RTE_LOG(NOTICE, SCHED,
1127                                 "%s: RED configuration init fails\n", __func__);
1128                                 return -EINVAL;
1129                         }
1130                 }
1131         }
1132 #endif
1133
1134         /* Scheduling loop detection */
1135         s->pipe_loop = RTE_SCHED_PIPE_INVALID;
1136         s->pipe_exhaustion = 0;
1137
1138         /* Grinders */
1139         s->busy_grinders = 0;
1140
1141         /* Queue base calculation */
1142         rte_sched_subport_config_qsize(s);
1143
1144         /* Large data structures */
1145         s->pipe = (struct rte_sched_pipe *)
1146                 (s->memory + rte_sched_subport_get_array_base(params,
1147                 e_RTE_SCHED_SUBPORT_ARRAY_PIPE));
1148         s->queue = (struct rte_sched_queue *)
1149                 (s->memory + rte_sched_subport_get_array_base(params,
1150                 e_RTE_SCHED_SUBPORT_ARRAY_QUEUE));
1151         s->queue_extra = (struct rte_sched_queue_extra *)
1152                 (s->memory + rte_sched_subport_get_array_base(params,
1153                 e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA));
1154         s->pipe_profiles = (struct rte_sched_pipe_profile *)
1155                 (s->memory + rte_sched_subport_get_array_base(params,
1156                 e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES));
1157         s->bmp_array =  s->memory + rte_sched_subport_get_array_base(params,
1158                 e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY);
1159         s->queue_array = (struct rte_mbuf **)
1160                 (s->memory + rte_sched_subport_get_array_base(params,
1161                 e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY));
1162
1163         /* Pipe profile table */
1164         rte_sched_subport_config_pipe_profile_table(s, params, port->rate);
1165
1166         /* Bitmap */
1167         n_subport_pipe_queues = rte_sched_subport_pipe_queues(s);
1168         bmp_mem_size = rte_bitmap_get_memory_footprint(n_subport_pipe_queues);
1169         s->bmp = rte_bitmap_init(n_subport_pipe_queues, s->bmp_array,
1170                                 bmp_mem_size);
1171         if (s->bmp == NULL) {
1172                 RTE_LOG(ERR, SCHED,
1173                         "%s: Subport bitmap init error\n", __func__);
1174
1175                 rte_sched_free_memory(port, n_subports);
1176                 return -EINVAL;
1177         }
1178
1179         for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
1180                 s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
1181
1182 #ifdef RTE_SCHED_SUBPORT_TC_OV
1183         /* TC oversubscription */
1184         s->tc_ov_wm_min = port->mtu;
1185         s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(params->tc_period,
1186                                                      s->pipe_tc_be_rate_max);
1187         s->tc_ov_wm = s->tc_ov_wm_max;
1188         s->tc_ov_period_id = 0;
1189         s->tc_ov = 0;
1190         s->tc_ov_n = 0;
1191         s->tc_ov_rate = 0;
1192 #endif
1193
1194         rte_sched_port_log_subport_config(port, subport_id);
1195
1196         return 0;
1197 }
1198
1199 int
1200 rte_sched_pipe_config(struct rte_sched_port *port,
1201         uint32_t subport_id,
1202         uint32_t pipe_id,
1203         int32_t pipe_profile)
1204 {
1205         struct rte_sched_subport *s;
1206         struct rte_sched_pipe *p;
1207         struct rte_sched_pipe_profile *params;
1208         uint32_t n_subports = subport_id + 1;
1209         uint32_t deactivate, profile, i;
1210
1211         /* Check user parameters */
1212         profile = (uint32_t) pipe_profile;
1213         deactivate = (pipe_profile < 0);
1214
1215         if (port == NULL) {
1216                 RTE_LOG(ERR, SCHED,
1217                         "%s: Incorrect value for parameter port\n", __func__);
1218                 return -EINVAL;
1219         }
1220
1221         if (subport_id >= port->n_subports_per_port) {
1222                 RTE_LOG(ERR, SCHED,
1223                         "%s: Incorrect value for parameter subport id\n", __func__);
1224
1225                 rte_sched_free_memory(port, n_subports);
1226                 return -EINVAL;
1227         }
1228
1229         s = port->subports[subport_id];
1230         if (pipe_id >= s->n_pipes_per_subport_enabled) {
1231                 RTE_LOG(ERR, SCHED,
1232                         "%s: Incorrect value for parameter pipe id\n", __func__);
1233
1234                 rte_sched_free_memory(port, n_subports);
1235                 return -EINVAL;
1236         }
1237
1238         if (!deactivate && profile >= s->n_pipe_profiles) {
1239                 RTE_LOG(ERR, SCHED,
1240                         "%s: Incorrect value for parameter pipe profile\n", __func__);
1241
1242                 rte_sched_free_memory(port, n_subports);
1243                 return -EINVAL;
1244         }
1245
1246         /* Handle the case when pipe already has a valid configuration */
1247         p = s->pipe + pipe_id;
1248         if (p->tb_time) {
1249                 params = s->pipe_profiles + p->profile;
1250
1251                 double subport_tc_be_rate =
1252                         (double) s->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1253                         / (double) s->tc_period;
1254                 double pipe_tc_be_rate =
1255                         (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1256                         / (double) params->tc_period;
1257                 uint32_t tc_be_ov = s->tc_ov;
1258
1259                 /* Unplug pipe from its subport */
1260                 s->tc_ov_n -= params->tc_ov_weight;
1261                 s->tc_ov_rate -= pipe_tc_be_rate;
1262                 s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1263
1264                 if (s->tc_ov != tc_be_ov) {
1265                         RTE_LOG(DEBUG, SCHED,
1266                                 "Subport %u Best-effort TC oversubscription is OFF (%.4lf >= %.4lf)\n",
1267                                 subport_id, subport_tc_be_rate, s->tc_ov_rate);
1268                 }
1269
1270                 /* Reset the pipe */
1271                 memset(p, 0, sizeof(struct rte_sched_pipe));
1272         }
1273
1274         if (deactivate)
1275                 return 0;
1276
1277         /* Apply the new pipe configuration */
1278         p->profile = profile;
1279         params = s->pipe_profiles + p->profile;
1280
1281         /* Token Bucket (TB) */
1282         p->tb_time = port->time;
1283         p->tb_credits = params->tb_size / 2;
1284
1285         /* Traffic Classes (TCs) */
1286         p->tc_time = port->time + params->tc_period;
1287
1288         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1289                 if (s->qsize[i])
1290                         p->tc_credits[i] = params->tc_credits_per_period[i];
1291
1292         {
1293                 /* Subport best effort tc oversubscription */
1294                 double subport_tc_be_rate =
1295                         (double) s->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1296                         / (double) s->tc_period;
1297                 double pipe_tc_be_rate =
1298                         (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1299                         / (double) params->tc_period;
1300                 uint32_t tc_be_ov = s->tc_ov;
1301
1302                 s->tc_ov_n += params->tc_ov_weight;
1303                 s->tc_ov_rate += pipe_tc_be_rate;
1304                 s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1305
1306                 if (s->tc_ov != tc_be_ov) {
1307                         RTE_LOG(DEBUG, SCHED,
1308                                 "Subport %u Best effort TC oversubscription is ON (%.4lf < %.4lf)\n",
1309                                 subport_id, subport_tc_be_rate, s->tc_ov_rate);
1310                 }
1311                 p->tc_ov_period_id = s->tc_ov_period_id;
1312                 p->tc_ov_credits = s->tc_ov_wm;
1313         }
1314
1315         return 0;
1316 }
1317
1318 int
1319 rte_sched_subport_pipe_profile_add(struct rte_sched_port *port,
1320         uint32_t subport_id,
1321         struct rte_sched_pipe_params *params,
1322         uint32_t *pipe_profile_id)
1323 {
1324         struct rte_sched_subport *s;
1325         struct rte_sched_pipe_profile *pp;
1326         uint32_t i;
1327         int status;
1328
1329         /* Port */
1330         if (port == NULL) {
1331                 RTE_LOG(ERR, SCHED,
1332                         "%s: Incorrect value for parameter port\n", __func__);
1333                 return -EINVAL;
1334         }
1335
1336         /* Subport id not exceeds the max limit */
1337         if (subport_id > port->n_subports_per_port) {
1338                 RTE_LOG(ERR, SCHED,
1339                         "%s: Incorrect value for subport id\n", __func__);
1340                 return -EINVAL;
1341         }
1342
1343         s = port->subports[subport_id];
1344
1345         /* Pipe profiles exceeds the max limit */
1346         if (s->n_pipe_profiles >= s->n_max_pipe_profiles) {
1347                 RTE_LOG(ERR, SCHED,
1348                         "%s: Number of pipe profiles exceeds the max limit\n", __func__);
1349                 return -EINVAL;
1350         }
1351
1352         /* Pipe params */
1353         status = pipe_profile_check(params, port->rate, &s->qsize[0]);
1354         if (status != 0) {
1355                 RTE_LOG(ERR, SCHED,
1356                         "%s: Pipe profile check failed(%d)\n", __func__, status);
1357                 return -EINVAL;
1358         }
1359
1360         pp = &s->pipe_profiles[s->n_pipe_profiles];
1361         rte_sched_pipe_profile_convert(s, params, pp, port->rate);
1362
1363         /* Pipe profile should not exists */
1364         for (i = 0; i < s->n_pipe_profiles; i++)
1365                 if (memcmp(s->pipe_profiles + i, pp, sizeof(*pp)) == 0) {
1366                         RTE_LOG(ERR, SCHED,
1367                                 "%s: Pipe profile exists\n", __func__);
1368                         return -EINVAL;
1369                 }
1370
1371         /* Pipe profile commit */
1372         *pipe_profile_id = s->n_pipe_profiles;
1373         s->n_pipe_profiles++;
1374
1375         if (s->pipe_tc_be_rate_max < params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE])
1376                 s->pipe_tc_be_rate_max = params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
1377
1378         rte_sched_port_log_pipe_profile(s, *pipe_profile_id);
1379
1380         return 0;
1381 }
1382
1383 static inline uint32_t
1384 rte_sched_port_qindex(struct rte_sched_port *port,
1385         uint32_t subport,
1386         uint32_t pipe,
1387         uint32_t traffic_class,
1388         uint32_t queue)
1389 {
1390         return ((subport & (port->n_subports_per_port - 1)) <<
1391                 (port->n_pipes_per_subport_log2 + 4)) |
1392                 ((pipe &
1393                 (port->subports[subport]->n_pipes_per_subport_enabled - 1)) << 4) |
1394                 ((rte_sched_port_pipe_queue(port, traffic_class) + queue) &
1395                 (RTE_SCHED_QUEUES_PER_PIPE - 1));
1396 }
1397
1398 void
1399 rte_sched_port_pkt_write(struct rte_sched_port *port,
1400                          struct rte_mbuf *pkt,
1401                          uint32_t subport, uint32_t pipe,
1402                          uint32_t traffic_class,
1403                          uint32_t queue, enum rte_color color)
1404 {
1405         uint32_t queue_id =
1406                 rte_sched_port_qindex(port, subport, pipe, traffic_class, queue);
1407
1408         rte_mbuf_sched_set(pkt, queue_id, traffic_class, (uint8_t)color);
1409 }
1410
1411 void
1412 rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port,
1413                                   const struct rte_mbuf *pkt,
1414                                   uint32_t *subport, uint32_t *pipe,
1415                                   uint32_t *traffic_class, uint32_t *queue)
1416 {
1417         uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1418
1419         *subport = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1420         *pipe = (queue_id >> 4) &
1421                 (port->subports[*subport]->n_pipes_per_subport_enabled - 1);
1422         *traffic_class = rte_sched_port_pipe_tc(port, queue_id);
1423         *queue = rte_sched_port_tc_queue(port, queue_id);
1424 }
1425
1426 enum rte_color
1427 rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt)
1428 {
1429         return (enum rte_color)rte_mbuf_sched_color_get(pkt);
1430 }
1431
1432 int
1433 rte_sched_subport_read_stats(struct rte_sched_port *port,
1434                              uint32_t subport_id,
1435                              struct rte_sched_subport_stats *stats,
1436                              uint32_t *tc_ov)
1437 {
1438         struct rte_sched_subport *s;
1439
1440         /* Check user parameters */
1441         if (port == NULL) {
1442                 RTE_LOG(ERR, SCHED,
1443                         "%s: Incorrect value for parameter port\n", __func__);
1444                 return -EINVAL;
1445         }
1446
1447         if (subport_id >= port->n_subports_per_port) {
1448                 RTE_LOG(ERR, SCHED,
1449                         "%s: Incorrect value for subport id\n", __func__);
1450                 return -EINVAL;
1451         }
1452
1453         if (stats == NULL) {
1454                 RTE_LOG(ERR, SCHED,
1455                         "%s: Incorrect value for parameter stats\n", __func__);
1456                 return -EINVAL;
1457         }
1458
1459         if (tc_ov == NULL) {
1460                 RTE_LOG(ERR, SCHED,
1461                         "%s: Incorrect value for tc_ov\n", __func__);
1462                 return -EINVAL;
1463         }
1464
1465         s = port->subports[subport_id];
1466
1467         /* Copy subport stats and clear */
1468         memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
1469         memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
1470
1471         /* Subport TC oversubscription status */
1472         *tc_ov = s->tc_ov;
1473
1474         return 0;
1475 }
1476
1477 int
1478 rte_sched_queue_read_stats(struct rte_sched_port *port,
1479         uint32_t queue_id,
1480         struct rte_sched_queue_stats *stats,
1481         uint16_t *qlen)
1482 {
1483         struct rte_sched_queue *q;
1484         struct rte_sched_queue_extra *qe;
1485
1486         /* Check user parameters */
1487         if (port == NULL) {
1488                 RTE_LOG(ERR, SCHED,
1489                         "%s: Incorrect value for parameter port\n", __func__);
1490                 return -EINVAL;
1491         }
1492
1493         if (queue_id >= rte_sched_port_queues_per_port(port)) {
1494                 RTE_LOG(ERR, SCHED,
1495                         "%s: Incorrect value for queue id\n", __func__);
1496                 return -EINVAL;
1497         }
1498
1499         if (stats == NULL) {
1500                 RTE_LOG(ERR, SCHED,
1501                         "%s: Incorrect value for parameter stats\n", __func__);
1502                 return -EINVAL;
1503         }
1504
1505         if (qlen == NULL) {
1506                 RTE_LOG(ERR, SCHED,
1507                         "%s: Incorrect value for parameter qlen\n", __func__);
1508                 return -EINVAL;
1509         }
1510         q = port->queue + queue_id;
1511         qe = port->queue_extra + queue_id;
1512
1513         /* Copy queue stats and clear */
1514         memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats));
1515         memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
1516
1517         /* Queue length */
1518         *qlen = q->qw - q->qr;
1519
1520         return 0;
1521 }
1522
1523 #ifdef RTE_SCHED_DEBUG
1524
1525 static inline int
1526 rte_sched_port_queue_is_empty(struct rte_sched_port *port, uint32_t qindex)
1527 {
1528         struct rte_sched_queue *queue = port->queue + qindex;
1529
1530         return queue->qr == queue->qw;
1531 }
1532
1533 #endif /* RTE_SCHED_DEBUG */
1534
1535 #ifdef RTE_SCHED_COLLECT_STATS
1536
1537 static inline void
1538 rte_sched_port_update_subport_stats(struct rte_sched_port *port,
1539         struct rte_sched_subport *subport,
1540         uint32_t qindex,
1541         struct rte_mbuf *pkt)
1542 {
1543         uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1544         uint32_t pkt_len = pkt->pkt_len;
1545
1546         subport->stats.n_pkts_tc[tc_index] += 1;
1547         subport->stats.n_bytes_tc[tc_index] += pkt_len;
1548 }
1549
1550 #ifdef RTE_SCHED_RED
1551 static inline void
1552 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
1553         struct rte_sched_subport *subport,
1554         uint32_t qindex,
1555         struct rte_mbuf *pkt,
1556         uint32_t red)
1557 #else
1558 static inline void
1559 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
1560         struct rte_sched_subport *subport,
1561         uint32_t qindex,
1562         struct rte_mbuf *pkt,
1563         __rte_unused uint32_t red)
1564 #endif
1565 {
1566         uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1567         uint32_t pkt_len = pkt->pkt_len;
1568
1569         subport->stats.n_pkts_tc_dropped[tc_index] += 1;
1570         subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
1571 #ifdef RTE_SCHED_RED
1572         subport->stats.n_pkts_red_dropped[tc_index] += red;
1573 #endif
1574 }
1575
1576 static inline void
1577 rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
1578         uint32_t qindex,
1579         struct rte_mbuf *pkt)
1580 {
1581         struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1582         uint32_t pkt_len = pkt->pkt_len;
1583
1584         qe->stats.n_pkts += 1;
1585         qe->stats.n_bytes += pkt_len;
1586 }
1587
1588 #ifdef RTE_SCHED_RED
1589 static inline void
1590 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
1591         uint32_t qindex,
1592         struct rte_mbuf *pkt,
1593         uint32_t red)
1594 #else
1595 static inline void
1596 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
1597         uint32_t qindex,
1598         struct rte_mbuf *pkt,
1599         __rte_unused uint32_t red)
1600 #endif
1601 {
1602         struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1603         uint32_t pkt_len = pkt->pkt_len;
1604
1605         qe->stats.n_pkts_dropped += 1;
1606         qe->stats.n_bytes_dropped += pkt_len;
1607 #ifdef RTE_SCHED_RED
1608         qe->stats.n_pkts_red_dropped += red;
1609 #endif
1610 }
1611
1612 #endif /* RTE_SCHED_COLLECT_STATS */
1613
1614 #ifdef RTE_SCHED_RED
1615
1616 static inline int
1617 rte_sched_port_red_drop(struct rte_sched_port *port,
1618         struct rte_sched_subport *subport,
1619         struct rte_mbuf *pkt,
1620         uint32_t qindex,
1621         uint16_t qlen)
1622 {
1623         struct rte_sched_queue_extra *qe;
1624         struct rte_red_config *red_cfg;
1625         struct rte_red *red;
1626         uint32_t tc_index;
1627         enum rte_color color;
1628
1629         tc_index = rte_sched_port_pipe_tc(port, qindex);
1630         color = rte_sched_port_pkt_read_color(pkt);
1631         red_cfg = &subport->red_config[tc_index][color];
1632
1633         if ((red_cfg->min_th | red_cfg->max_th) == 0)
1634                 return 0;
1635
1636         qe = subport->queue_extra + qindex;
1637         red = &qe->red;
1638
1639         return rte_red_enqueue(red_cfg, red, qlen, port->time);
1640 }
1641
1642 static inline void
1643 rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port, uint32_t qindex)
1644 {
1645         struct rte_sched_queue_extra *qe = port->queue_extra + qindex;
1646         struct rte_red *red = &qe->red;
1647
1648         rte_red_mark_queue_empty(red, port->time);
1649 }
1650
1651 #else
1652
1653 static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
1654         struct rte_sched_subport *subport __rte_unused,
1655         struct rte_mbuf *pkt __rte_unused,
1656         uint32_t qindex __rte_unused,
1657         uint16_t qlen __rte_unused)
1658 {
1659         return 0;
1660 }
1661
1662 #define rte_sched_port_set_queue_empty_timestamp(port, qindex)
1663
1664 #endif /* RTE_SCHED_RED */
1665
1666 #ifdef RTE_SCHED_DEBUG
1667
1668 static inline void
1669 debug_check_queue_slab(struct rte_sched_port *port, uint32_t bmp_pos,
1670                        uint64_t bmp_slab)
1671 {
1672         uint64_t mask;
1673         uint32_t i, panic;
1674
1675         if (bmp_slab == 0)
1676                 rte_panic("Empty slab at position %u\n", bmp_pos);
1677
1678         panic = 0;
1679         for (i = 0, mask = 1; i < 64; i++, mask <<= 1) {
1680                 if (mask & bmp_slab) {
1681                         if (rte_sched_port_queue_is_empty(port, bmp_pos + i)) {
1682                                 printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i);
1683                                 panic = 1;
1684                         }
1685                 }
1686         }
1687
1688         if (panic)
1689                 rte_panic("Empty queues in slab 0x%" PRIx64 "starting at position %u\n",
1690                         bmp_slab, bmp_pos);
1691 }
1692
1693 #endif /* RTE_SCHED_DEBUG */
1694
1695 static inline struct rte_sched_subport *
1696 rte_sched_port_subport(struct rte_sched_port *port,
1697         struct rte_mbuf *pkt)
1698 {
1699         uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1700         uint32_t subport_id = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1701
1702         return port->subports[subport_id];
1703 }
1704
1705 static inline uint32_t
1706 rte_sched_port_enqueue_qptrs_prefetch0(struct rte_sched_subport *subport,
1707         struct rte_mbuf *pkt, uint32_t subport_qmask)
1708 {
1709         struct rte_sched_queue *q;
1710 #ifdef RTE_SCHED_COLLECT_STATS
1711         struct rte_sched_queue_extra *qe;
1712 #endif
1713         uint32_t qindex = rte_mbuf_sched_queue_get(pkt);
1714         uint32_t subport_queue_id = subport_qmask & qindex;
1715
1716         q = subport->queue + subport_queue_id;
1717         rte_prefetch0(q);
1718 #ifdef RTE_SCHED_COLLECT_STATS
1719         qe = subport->queue_extra + subport_queue_id;
1720         rte_prefetch0(qe);
1721 #endif
1722
1723         return subport_queue_id;
1724 }
1725
1726 static inline void
1727 rte_sched_port_enqueue_qwa_prefetch0(struct rte_sched_port *port,
1728         struct rte_sched_subport *subport,
1729         uint32_t qindex,
1730         struct rte_mbuf **qbase)
1731 {
1732         struct rte_sched_queue *q;
1733         struct rte_mbuf **q_qw;
1734         uint16_t qsize;
1735
1736         q = subport->queue + qindex;
1737         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
1738         q_qw = qbase + (q->qw & (qsize - 1));
1739
1740         rte_prefetch0(q_qw);
1741         rte_bitmap_prefetch0(subport->bmp, qindex);
1742 }
1743
1744 static inline int
1745 rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
1746         struct rte_sched_subport *subport,
1747         uint32_t qindex,
1748         struct rte_mbuf **qbase,
1749         struct rte_mbuf *pkt)
1750 {
1751         struct rte_sched_queue *q;
1752         uint16_t qsize;
1753         uint16_t qlen;
1754
1755         q = subport->queue + qindex;
1756         qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
1757         qlen = q->qw - q->qr;
1758
1759         /* Drop the packet (and update drop stats) when queue is full */
1760         if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
1761                      (qlen >= qsize))) {
1762                 rte_pktmbuf_free(pkt);
1763 #ifdef RTE_SCHED_COLLECT_STATS
1764                 rte_sched_port_update_subport_stats_on_drop(port, subport,
1765                         qindex, pkt, qlen < qsize);
1766                 rte_sched_port_update_queue_stats_on_drop(subport, qindex, pkt,
1767                         qlen < qsize);
1768 #endif
1769                 return 0;
1770         }
1771
1772         /* Enqueue packet */
1773         qbase[q->qw & (qsize - 1)] = pkt;
1774         q->qw++;
1775
1776         /* Activate queue in the subport bitmap */
1777         rte_bitmap_set(subport->bmp, qindex);
1778
1779         /* Statistics */
1780 #ifdef RTE_SCHED_COLLECT_STATS
1781         rte_sched_port_update_subport_stats(port, subport, qindex, pkt);
1782         rte_sched_port_update_queue_stats(subport, qindex, pkt);
1783 #endif
1784
1785         return 1;
1786 }
1787
1788
1789 /*
1790  * The enqueue function implements a 4-level pipeline with each stage
1791  * processing two different packets. The purpose of using a pipeline
1792  * is to hide the latency of prefetching the data structures. The
1793  * naming convention is presented in the diagram below:
1794  *
1795  *   p00  _______   p10  _______   p20  _______   p30  _______
1796  * ----->|       |----->|       |----->|       |----->|       |----->
1797  *       |   0   |      |   1   |      |   2   |      |   3   |
1798  * ----->|_______|----->|_______|----->|_______|----->|_______|----->
1799  *   p01            p11            p21            p31
1800  *
1801  */
1802 int
1803 rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts,
1804                        uint32_t n_pkts)
1805 {
1806         struct rte_mbuf *pkt00, *pkt01, *pkt10, *pkt11, *pkt20, *pkt21,
1807                 *pkt30, *pkt31, *pkt_last;
1808         struct rte_mbuf **q00_base, **q01_base, **q10_base, **q11_base,
1809                 **q20_base, **q21_base, **q30_base, **q31_base, **q_last_base;
1810         struct rte_sched_subport *subport00, *subport01, *subport10, *subport11,
1811                 *subport20, *subport21, *subport30, *subport31, *subport_last;
1812         uint32_t q00, q01, q10, q11, q20, q21, q30, q31, q_last;
1813         uint32_t r00, r01, r10, r11, r20, r21, r30, r31, r_last;
1814         uint32_t subport_qmask;
1815         uint32_t result, i;
1816
1817         result = 0;
1818         subport_qmask = (1 << (port->n_pipes_per_subport_log2 + 4)) - 1;
1819
1820         /*
1821          * Less then 6 input packets available, which is not enough to
1822          * feed the pipeline
1823          */
1824         if (unlikely(n_pkts < 6)) {
1825                 struct rte_sched_subport *subports[5];
1826                 struct rte_mbuf **q_base[5];
1827                 uint32_t q[5];
1828
1829                 /* Prefetch the mbuf structure of each packet */
1830                 for (i = 0; i < n_pkts; i++)
1831                         rte_prefetch0(pkts[i]);
1832
1833                 /* Prefetch the subport structure for each packet */
1834                 for (i = 0; i < n_pkts; i++)
1835                         subports[i] = rte_sched_port_subport(port, pkts[i]);
1836
1837                 /* Prefetch the queue structure for each queue */
1838                 for (i = 0; i < n_pkts; i++)
1839                         q[i] = rte_sched_port_enqueue_qptrs_prefetch0(subports[i],
1840                                         pkts[i], subport_qmask);
1841
1842                 /* Prefetch the write pointer location of each queue */
1843                 for (i = 0; i < n_pkts; i++) {
1844                         q_base[i] = rte_sched_subport_pipe_qbase(subports[i], q[i]);
1845                         rte_sched_port_enqueue_qwa_prefetch0(port, subports[i],
1846                                 q[i], q_base[i]);
1847                 }
1848
1849                 /* Write each packet to its queue */
1850                 for (i = 0; i < n_pkts; i++)
1851                         result += rte_sched_port_enqueue_qwa(port, subports[i],
1852                                                 q[i], q_base[i], pkts[i]);
1853
1854                 return result;
1855         }
1856
1857         /* Feed the first 3 stages of the pipeline (6 packets needed) */
1858         pkt20 = pkts[0];
1859         pkt21 = pkts[1];
1860         rte_prefetch0(pkt20);
1861         rte_prefetch0(pkt21);
1862
1863         pkt10 = pkts[2];
1864         pkt11 = pkts[3];
1865         rte_prefetch0(pkt10);
1866         rte_prefetch0(pkt11);
1867
1868         subport20 = rte_sched_port_subport(port, pkt20);
1869         subport21 = rte_sched_port_subport(port, pkt21);
1870         q20 = rte_sched_port_enqueue_qptrs_prefetch0(subport20,
1871                         pkt20, subport_qmask);
1872         q21 = rte_sched_port_enqueue_qptrs_prefetch0(subport21,
1873                         pkt21, subport_qmask);
1874
1875         pkt00 = pkts[4];
1876         pkt01 = pkts[5];
1877         rte_prefetch0(pkt00);
1878         rte_prefetch0(pkt01);
1879
1880         subport10 = rte_sched_port_subport(port, pkt10);
1881         subport11 = rte_sched_port_subport(port, pkt11);
1882         q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
1883                         pkt10, subport_qmask);
1884         q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
1885                         pkt11, subport_qmask);
1886
1887         q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
1888         q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
1889         rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
1890         rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
1891
1892         /* Run the pipeline */
1893         for (i = 6; i < (n_pkts & (~1)); i += 2) {
1894                 /* Propagate stage inputs */
1895                 pkt30 = pkt20;
1896                 pkt31 = pkt21;
1897                 pkt20 = pkt10;
1898                 pkt21 = pkt11;
1899                 pkt10 = pkt00;
1900                 pkt11 = pkt01;
1901                 q30 = q20;
1902                 q31 = q21;
1903                 q20 = q10;
1904                 q21 = q11;
1905                 subport30 = subport20;
1906                 subport31 = subport21;
1907                 subport20 = subport10;
1908                 subport21 = subport11;
1909                 q30_base = q20_base;
1910                 q31_base = q21_base;
1911
1912                 /* Stage 0: Get packets in */
1913                 pkt00 = pkts[i];
1914                 pkt01 = pkts[i + 1];
1915                 rte_prefetch0(pkt00);
1916                 rte_prefetch0(pkt01);
1917
1918                 /* Stage 1: Prefetch subport and queue structure storing queue pointers */
1919                 subport10 = rte_sched_port_subport(port, pkt10);
1920                 subport11 = rte_sched_port_subport(port, pkt11);
1921                 q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
1922                                 pkt10, subport_qmask);
1923                 q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
1924                                 pkt11, subport_qmask);
1925
1926                 /* Stage 2: Prefetch queue write location */
1927                 q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
1928                 q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
1929                 rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
1930                 rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
1931
1932                 /* Stage 3: Write packet to queue and activate queue */
1933                 r30 = rte_sched_port_enqueue_qwa(port, subport30,
1934                                 q30, q30_base, pkt30);
1935                 r31 = rte_sched_port_enqueue_qwa(port, subport31,
1936                                 q31, q31_base, pkt31);
1937                 result += r30 + r31;
1938         }
1939
1940         /*
1941          * Drain the pipeline (exactly 6 packets).
1942          * Handle the last packet in the case
1943          * of an odd number of input packets.
1944          */
1945         pkt_last = pkts[n_pkts - 1];
1946         rte_prefetch0(pkt_last);
1947
1948         subport00 = rte_sched_port_subport(port, pkt00);
1949         subport01 = rte_sched_port_subport(port, pkt01);
1950         q00 = rte_sched_port_enqueue_qptrs_prefetch0(subport00,
1951                         pkt00, subport_qmask);
1952         q01 = rte_sched_port_enqueue_qptrs_prefetch0(subport01,
1953                         pkt01, subport_qmask);
1954
1955         q10_base = rte_sched_subport_pipe_qbase(subport10, q10);
1956         q11_base = rte_sched_subport_pipe_qbase(subport11, q11);
1957         rte_sched_port_enqueue_qwa_prefetch0(port, subport10, q10, q10_base);
1958         rte_sched_port_enqueue_qwa_prefetch0(port, subport11, q11, q11_base);
1959
1960         r20 = rte_sched_port_enqueue_qwa(port, subport20,
1961                         q20, q20_base, pkt20);
1962         r21 = rte_sched_port_enqueue_qwa(port, subport21,
1963                         q21, q21_base, pkt21);
1964         result += r20 + r21;
1965
1966         subport_last = rte_sched_port_subport(port, pkt_last);
1967         q_last = rte_sched_port_enqueue_qptrs_prefetch0(subport_last,
1968                                 pkt_last, subport_qmask);
1969
1970         q00_base = rte_sched_subport_pipe_qbase(subport00, q00);
1971         q01_base = rte_sched_subport_pipe_qbase(subport01, q01);
1972         rte_sched_port_enqueue_qwa_prefetch0(port, subport00, q00, q00_base);
1973         rte_sched_port_enqueue_qwa_prefetch0(port, subport01, q01, q01_base);
1974
1975         r10 = rte_sched_port_enqueue_qwa(port, subport10, q10,
1976                         q10_base, pkt10);
1977         r11 = rte_sched_port_enqueue_qwa(port, subport11, q11,
1978                         q11_base, pkt11);
1979         result += r10 + r11;
1980
1981         q_last_base = rte_sched_subport_pipe_qbase(subport_last, q_last);
1982         rte_sched_port_enqueue_qwa_prefetch0(port, subport_last,
1983                 q_last, q_last_base);
1984
1985         r00 = rte_sched_port_enqueue_qwa(port, subport00, q00,
1986                         q00_base, pkt00);
1987         r01 = rte_sched_port_enqueue_qwa(port, subport01, q01,
1988                         q01_base, pkt01);
1989         result += r00 + r01;
1990
1991         if (n_pkts & 1) {
1992                 r_last = rte_sched_port_enqueue_qwa(port, subport_last,
1993                                         q_last, q_last_base, pkt_last);
1994                 result += r_last;
1995         }
1996
1997         return result;
1998 }
1999
2000 #ifndef RTE_SCHED_SUBPORT_TC_OV
2001
2002 static inline void
2003 grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
2004 {
2005         struct rte_sched_grinder *grinder = port->grinder + pos;
2006         struct rte_sched_subport *subport = grinder->subport;
2007         struct rte_sched_pipe *pipe = grinder->pipe;
2008         struct rte_sched_pipe_profile *params = grinder->pipe_params;
2009         uint64_t n_periods;
2010         uint32_t i;
2011
2012         /* Subport TB */
2013         n_periods = (port->time - subport->tb_time) / subport->tb_period;
2014         subport->tb_credits += n_periods * subport->tb_credits_per_period;
2015         subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
2016         subport->tb_time += n_periods * subport->tb_period;
2017
2018         /* Pipe TB */
2019         n_periods = (port->time - pipe->tb_time) / params->tb_period;
2020         pipe->tb_credits += n_periods * params->tb_credits_per_period;
2021         pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
2022         pipe->tb_time += n_periods * params->tb_period;
2023
2024         /* Subport TCs */
2025         if (unlikely(port->time >= subport->tc_time)) {
2026                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2027                         subport->tc_credits[i] = subport->tc_credits_per_period[i];
2028
2029                 subport->tc_time = port->time + subport->tc_period;
2030         }
2031
2032         /* Pipe TCs */
2033         if (unlikely(port->time >= pipe->tc_time)) {
2034                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2035                         pipe->tc_credits[i] = params->tc_credits_per_period[i];
2036
2037                 pipe->tc_time = port->time + params->tc_period;
2038         }
2039 }
2040
2041 #else
2042
2043 static inline uint32_t
2044 grinder_tc_ov_credits_update(struct rte_sched_port *port, uint32_t pos)
2045 {
2046         struct rte_sched_grinder *grinder = port->grinder + pos;
2047         struct rte_sched_subport *subport = grinder->subport;
2048         uint32_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2049         uint32_t tc_consumption = 0, tc_ov_consumption_max;
2050         uint32_t tc_ov_wm = subport->tc_ov_wm;
2051         uint32_t i;
2052
2053         if (subport->tc_ov == 0)
2054                 return subport->tc_ov_wm_max;
2055
2056         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2057                 tc_ov_consumption[i] =
2058                         subport->tc_credits_per_period[i] - subport->tc_credits[i];
2059                 tc_consumption += tc_ov_consumption[i];
2060         }
2061
2062         tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] =
2063                 subport->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2064                 subport->tc_credits[RTE_SCHED_TRAFFIC_CLASS_BE];
2065
2066         tc_ov_consumption_max =
2067                 subport->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2068                         tc_consumption;
2069
2070         if (tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] >
2071                 (tc_ov_consumption_max - port->mtu)) {
2072                 tc_ov_wm  -= tc_ov_wm >> 7;
2073                 if (tc_ov_wm < subport->tc_ov_wm_min)
2074                         tc_ov_wm = subport->tc_ov_wm_min;
2075
2076                 return tc_ov_wm;
2077         }
2078
2079         tc_ov_wm += (tc_ov_wm >> 7) + 1;
2080         if (tc_ov_wm > subport->tc_ov_wm_max)
2081                 tc_ov_wm = subport->tc_ov_wm_max;
2082
2083         return tc_ov_wm;
2084 }
2085
2086 static inline void
2087 grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
2088 {
2089         struct rte_sched_grinder *grinder = port->grinder + pos;
2090         struct rte_sched_subport *subport = grinder->subport;
2091         struct rte_sched_pipe *pipe = grinder->pipe;
2092         struct rte_sched_pipe_profile *params = grinder->pipe_params;
2093         uint64_t n_periods;
2094         uint32_t i;
2095
2096         /* Subport TB */
2097         n_periods = (port->time - subport->tb_time) / subport->tb_period;
2098         subport->tb_credits += n_periods * subport->tb_credits_per_period;
2099         subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
2100         subport->tb_time += n_periods * subport->tb_period;
2101
2102         /* Pipe TB */
2103         n_periods = (port->time - pipe->tb_time) / params->tb_period;
2104         pipe->tb_credits += n_periods * params->tb_credits_per_period;
2105         pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
2106         pipe->tb_time += n_periods * params->tb_period;
2107
2108         /* Subport TCs */
2109         if (unlikely(port->time >= subport->tc_time)) {
2110                 subport->tc_ov_wm = grinder_tc_ov_credits_update(port, pos);
2111
2112                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2113                         subport->tc_credits[i] = subport->tc_credits_per_period[i];
2114
2115                 subport->tc_time = port->time + subport->tc_period;
2116                 subport->tc_ov_period_id++;
2117         }
2118
2119         /* Pipe TCs */
2120         if (unlikely(port->time >= pipe->tc_time)) {
2121                 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2122                         pipe->tc_credits[i] = params->tc_credits_per_period[i];
2123                 pipe->tc_time = port->time + params->tc_period;
2124         }
2125
2126         /* Pipe TCs - Oversubscription */
2127         if (unlikely(pipe->tc_ov_period_id != subport->tc_ov_period_id)) {
2128                 pipe->tc_ov_credits = subport->tc_ov_wm * params->tc_ov_weight;
2129
2130                 pipe->tc_ov_period_id = subport->tc_ov_period_id;
2131         }
2132 }
2133
2134 #endif /* RTE_SCHED_TS_CREDITS_UPDATE, RTE_SCHED_SUBPORT_TC_OV */
2135
2136
2137 #ifndef RTE_SCHED_SUBPORT_TC_OV
2138
2139 static inline int
2140 grinder_credits_check(struct rte_sched_port *port, uint32_t pos)
2141 {
2142         struct rte_sched_grinder *grinder = port->grinder + pos;
2143         struct rte_sched_subport *subport = grinder->subport;
2144         struct rte_sched_pipe *pipe = grinder->pipe;
2145         struct rte_mbuf *pkt = grinder->pkt;
2146         uint32_t tc_index = grinder->tc_index;
2147         uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
2148         uint32_t subport_tb_credits = subport->tb_credits;
2149         uint32_t subport_tc_credits = subport->tc_credits[tc_index];
2150         uint32_t pipe_tb_credits = pipe->tb_credits;
2151         uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
2152         int enough_credits;
2153
2154         /* Check queue credits */
2155         enough_credits = (pkt_len <= subport_tb_credits) &&
2156                 (pkt_len <= subport_tc_credits) &&
2157                 (pkt_len <= pipe_tb_credits) &&
2158                 (pkt_len <= pipe_tc_credits);
2159
2160         if (!enough_credits)
2161                 return 0;
2162
2163         /* Update port credits */
2164         subport->tb_credits -= pkt_len;
2165         subport->tc_credits[tc_index] -= pkt_len;
2166         pipe->tb_credits -= pkt_len;
2167         pipe->tc_credits[tc_index] -= pkt_len;
2168
2169         return 1;
2170 }
2171
2172 #else
2173
2174 static inline int
2175 grinder_credits_check(struct rte_sched_port *port, uint32_t pos)
2176 {
2177         struct rte_sched_grinder *grinder = port->grinder + pos;
2178         struct rte_sched_subport *subport = grinder->subport;
2179         struct rte_sched_pipe *pipe = grinder->pipe;
2180         struct rte_mbuf *pkt = grinder->pkt;
2181         uint32_t tc_index = grinder->tc_index;
2182         uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
2183         uint32_t subport_tb_credits = subport->tb_credits;
2184         uint32_t subport_tc_credits = subport->tc_credits[tc_index];
2185         uint32_t pipe_tb_credits = pipe->tb_credits;
2186         uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
2187         uint32_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2188         uint32_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0};
2189         uint32_t pipe_tc_ov_credits, i;
2190         int enough_credits;
2191
2192         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2193                 pipe_tc_ov_mask1[i] = UINT32_MAX;
2194
2195         pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe->tc_ov_credits;
2196         pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = UINT32_MAX;
2197         pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
2198
2199         /* Check pipe and subport credits */
2200         enough_credits = (pkt_len <= subport_tb_credits) &&
2201                 (pkt_len <= subport_tc_credits) &&
2202                 (pkt_len <= pipe_tb_credits) &&
2203                 (pkt_len <= pipe_tc_credits) &&
2204                 (pkt_len <= pipe_tc_ov_credits);
2205
2206         if (!enough_credits)
2207                 return 0;
2208
2209         /* Update pipe and subport credits */
2210         subport->tb_credits -= pkt_len;
2211         subport->tc_credits[tc_index] -= pkt_len;
2212         pipe->tb_credits -= pkt_len;
2213         pipe->tc_credits[tc_index] -= pkt_len;
2214         pipe->tc_ov_credits -= pipe_tc_ov_mask2[tc_index] & pkt_len;
2215
2216         return 1;
2217 }
2218
2219 #endif /* RTE_SCHED_SUBPORT_TC_OV */
2220
2221
2222 static inline int
2223 grinder_schedule(struct rte_sched_port *port, uint32_t pos)
2224 {
2225         struct rte_sched_grinder *grinder = port->grinder + pos;
2226         struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
2227         struct rte_mbuf *pkt = grinder->pkt;
2228         uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
2229         uint32_t be_tc_active;
2230
2231         if (!grinder_credits_check(port, pos))
2232                 return 0;
2233
2234         /* Advance port time */
2235         port->time += pkt_len;
2236
2237         /* Send packet */
2238         port->pkts_out[port->n_pkts_out++] = pkt;
2239         queue->qr++;
2240
2241         be_tc_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE) ? ~0x0 : 0x0;
2242         grinder->wrr_tokens[grinder->qpos] +=
2243                 (pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
2244
2245         if (queue->qr == queue->qw) {
2246                 uint32_t qindex = grinder->qindex[grinder->qpos];
2247
2248                 rte_bitmap_clear(port->bmp, qindex);
2249                 grinder->qmask &= ~(1 << grinder->qpos);
2250                 if (be_tc_active)
2251                         grinder->wrr_mask[grinder->qpos] = 0;
2252                 rte_sched_port_set_queue_empty_timestamp(port, qindex);
2253         }
2254
2255         /* Reset pipe loop detection */
2256         port->pipe_loop = RTE_SCHED_PIPE_INVALID;
2257         grinder->productive = 1;
2258
2259         return 1;
2260 }
2261
2262 #ifdef SCHED_VECTOR_SSE4
2263
2264 static inline int
2265 grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe)
2266 {
2267         __m128i index = _mm_set1_epi32(base_pipe);
2268         __m128i pipes = _mm_load_si128((__m128i *)port->grinder_base_bmp_pos);
2269         __m128i res = _mm_cmpeq_epi32(pipes, index);
2270
2271         pipes = _mm_load_si128((__m128i *)(port->grinder_base_bmp_pos + 4));
2272         pipes = _mm_cmpeq_epi32(pipes, index);
2273         res = _mm_or_si128(res, pipes);
2274
2275         if (_mm_testz_si128(res, res))
2276                 return 0;
2277
2278         return 1;
2279 }
2280
2281 #elif defined(SCHED_VECTOR_NEON)
2282
2283 static inline int
2284 grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe)
2285 {
2286         uint32x4_t index, pipes;
2287         uint32_t *pos = (uint32_t *)port->grinder_base_bmp_pos;
2288
2289         index = vmovq_n_u32(base_pipe);
2290         pipes = vld1q_u32(pos);
2291         if (!vminvq_u32(veorq_u32(pipes, index)))
2292                 return 1;
2293
2294         pipes = vld1q_u32(pos + 4);
2295         if (!vminvq_u32(veorq_u32(pipes, index)))
2296                 return 1;
2297
2298         return 0;
2299 }
2300
2301 #else
2302
2303 static inline int
2304 grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe)
2305 {
2306         uint32_t i;
2307
2308         for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) {
2309                 if (port->grinder_base_bmp_pos[i] == base_pipe)
2310                         return 1;
2311         }
2312
2313         return 0;
2314 }
2315
2316 #endif /* RTE_SCHED_OPTIMIZATIONS */
2317
2318 static inline void
2319 grinder_pcache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab)
2320 {
2321         struct rte_sched_grinder *grinder = port->grinder + pos;
2322         uint16_t w[4];
2323
2324         grinder->pcache_w = 0;
2325         grinder->pcache_r = 0;
2326
2327         w[0] = (uint16_t) bmp_slab;
2328         w[1] = (uint16_t) (bmp_slab >> 16);
2329         w[2] = (uint16_t) (bmp_slab >> 32);
2330         w[3] = (uint16_t) (bmp_slab >> 48);
2331
2332         grinder->pcache_qmask[grinder->pcache_w] = w[0];
2333         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos;
2334         grinder->pcache_w += (w[0] != 0);
2335
2336         grinder->pcache_qmask[grinder->pcache_w] = w[1];
2337         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 16;
2338         grinder->pcache_w += (w[1] != 0);
2339
2340         grinder->pcache_qmask[grinder->pcache_w] = w[2];
2341         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 32;
2342         grinder->pcache_w += (w[2] != 0);
2343
2344         grinder->pcache_qmask[grinder->pcache_w] = w[3];
2345         grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 48;
2346         grinder->pcache_w += (w[3] != 0);
2347 }
2348
2349 static inline void
2350 grinder_tccache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t qindex, uint16_t qmask)
2351 {
2352         struct rte_sched_grinder *grinder = port->grinder + pos;
2353         uint8_t b, i;
2354
2355         grinder->tccache_w = 0;
2356         grinder->tccache_r = 0;
2357
2358         for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2359                 b = (uint8_t) ((qmask >> i) & 0x1);
2360                 grinder->tccache_qmask[grinder->tccache_w] = b;
2361                 grinder->tccache_qindex[grinder->tccache_w] = qindex + i;
2362                 grinder->tccache_w += (b != 0);
2363         }
2364
2365         b = (uint8_t) (qmask >> (RTE_SCHED_TRAFFIC_CLASS_BE));
2366         grinder->tccache_qmask[grinder->tccache_w] = b;
2367         grinder->tccache_qindex[grinder->tccache_w] = qindex +
2368                 RTE_SCHED_TRAFFIC_CLASS_BE;
2369         grinder->tccache_w += (b != 0);
2370 }
2371
2372 static inline int
2373 grinder_next_tc(struct rte_sched_port *port, uint32_t pos)
2374 {
2375         struct rte_sched_grinder *grinder = port->grinder + pos;
2376         struct rte_mbuf **qbase;
2377         uint32_t qindex;
2378         uint16_t qsize;
2379
2380         if (grinder->tccache_r == grinder->tccache_w)
2381                 return 0;
2382
2383         qindex = grinder->tccache_qindex[grinder->tccache_r];
2384         qbase = rte_sched_port_qbase(port, qindex);
2385         qsize = rte_sched_port_qsize(port, qindex);
2386
2387         grinder->tc_index = rte_sched_port_pipe_tc(port, qindex);
2388         grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
2389         grinder->qsize = qsize;
2390
2391         if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2392                 grinder->queue[0] = port->queue + qindex;
2393                 grinder->qbase[0] = qbase;
2394                 grinder->qindex[0] = qindex;
2395                 grinder->tccache_r++;
2396
2397                 return 1;
2398         }
2399
2400         grinder->queue[0] = port->queue + qindex;
2401         grinder->queue[1] = port->queue + qindex + 1;
2402         grinder->queue[2] = port->queue + qindex + 2;
2403         grinder->queue[3] = port->queue + qindex + 3;
2404
2405         grinder->qbase[0] = qbase;
2406         grinder->qbase[1] = qbase + qsize;
2407         grinder->qbase[2] = qbase + 2 * qsize;
2408         grinder->qbase[3] = qbase + 3 * qsize;
2409
2410         grinder->qindex[0] = qindex;
2411         grinder->qindex[1] = qindex + 1;
2412         grinder->qindex[2] = qindex + 2;
2413         grinder->qindex[3] = qindex + 3;
2414
2415         grinder->tccache_r++;
2416         return 1;
2417 }
2418
2419 static inline int
2420 grinder_next_pipe(struct rte_sched_port *port, uint32_t pos)
2421 {
2422         struct rte_sched_grinder *grinder = port->grinder + pos;
2423         uint32_t pipe_qindex;
2424         uint16_t pipe_qmask;
2425
2426         if (grinder->pcache_r < grinder->pcache_w) {
2427                 pipe_qmask = grinder->pcache_qmask[grinder->pcache_r];
2428                 pipe_qindex = grinder->pcache_qindex[grinder->pcache_r];
2429                 grinder->pcache_r++;
2430         } else {
2431                 uint64_t bmp_slab = 0;
2432                 uint32_t bmp_pos = 0;
2433
2434                 /* Get another non-empty pipe group */
2435                 if (unlikely(rte_bitmap_scan(port->bmp, &bmp_pos, &bmp_slab) <= 0))
2436                         return 0;
2437
2438 #ifdef RTE_SCHED_DEBUG
2439                 debug_check_queue_slab(port, bmp_pos, bmp_slab);
2440 #endif
2441
2442                 /* Return if pipe group already in one of the other grinders */
2443                 port->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID;
2444                 if (unlikely(grinder_pipe_exists(port, bmp_pos)))
2445                         return 0;
2446
2447                 port->grinder_base_bmp_pos[pos] = bmp_pos;
2448
2449                 /* Install new pipe group into grinder's pipe cache */
2450                 grinder_pcache_populate(port, pos, bmp_pos, bmp_slab);
2451
2452                 pipe_qmask = grinder->pcache_qmask[0];
2453                 pipe_qindex = grinder->pcache_qindex[0];
2454                 grinder->pcache_r = 1;
2455         }
2456
2457         /* Install new pipe in the grinder */
2458         grinder->pindex = pipe_qindex >> 4;
2459         grinder->subport = port->subport + (grinder->pindex / port->n_pipes_per_subport);
2460         grinder->pipe = port->pipe + grinder->pindex;
2461         grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */
2462         grinder->productive = 0;
2463
2464         grinder_tccache_populate(port, pos, pipe_qindex, pipe_qmask);
2465         grinder_next_tc(port, pos);
2466
2467         /* Check for pipe exhaustion */
2468         if (grinder->pindex == port->pipe_loop) {
2469                 port->pipe_exhaustion = 1;
2470                 port->pipe_loop = RTE_SCHED_PIPE_INVALID;
2471         }
2472
2473         return 1;
2474 }
2475
2476
2477 static inline void
2478 grinder_wrr_load(struct rte_sched_port *port, uint32_t pos)
2479 {
2480         struct rte_sched_grinder *grinder = port->grinder + pos;
2481         struct rte_sched_pipe *pipe = grinder->pipe;
2482         struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params;
2483         uint32_t qmask = grinder->qmask;
2484
2485         grinder->wrr_tokens[0] =
2486                 ((uint16_t) pipe->wrr_tokens[0]) << RTE_SCHED_WRR_SHIFT;
2487         grinder->wrr_tokens[1] =
2488                 ((uint16_t) pipe->wrr_tokens[1]) << RTE_SCHED_WRR_SHIFT;
2489         grinder->wrr_tokens[2] =
2490                 ((uint16_t) pipe->wrr_tokens[2]) << RTE_SCHED_WRR_SHIFT;
2491         grinder->wrr_tokens[3] =
2492                 ((uint16_t) pipe->wrr_tokens[3]) << RTE_SCHED_WRR_SHIFT;
2493
2494         grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
2495         grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
2496         grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
2497         grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
2498
2499         grinder->wrr_cost[0] = pipe_params->wrr_cost[0];
2500         grinder->wrr_cost[1] = pipe_params->wrr_cost[1];
2501         grinder->wrr_cost[2] = pipe_params->wrr_cost[2];
2502         grinder->wrr_cost[3] = pipe_params->wrr_cost[3];
2503 }
2504
2505 static inline void
2506 grinder_wrr_store(struct rte_sched_port *port, uint32_t pos)
2507 {
2508         struct rte_sched_grinder *grinder = port->grinder + pos;
2509         struct rte_sched_pipe *pipe = grinder->pipe;
2510
2511         pipe->wrr_tokens[0] =
2512                         (grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >>
2513                                 RTE_SCHED_WRR_SHIFT;
2514         pipe->wrr_tokens[1] =
2515                         (grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >>
2516                                 RTE_SCHED_WRR_SHIFT;
2517         pipe->wrr_tokens[2] =
2518                         (grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >>
2519                                 RTE_SCHED_WRR_SHIFT;
2520         pipe->wrr_tokens[3] =
2521                         (grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >>
2522                                 RTE_SCHED_WRR_SHIFT;
2523 }
2524
2525 static inline void
2526 grinder_wrr(struct rte_sched_port *port, uint32_t pos)
2527 {
2528         struct rte_sched_grinder *grinder = port->grinder + pos;
2529         uint16_t wrr_tokens_min;
2530
2531         grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0];
2532         grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1];
2533         grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2];
2534         grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3];
2535
2536         grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens);
2537         wrr_tokens_min = grinder->wrr_tokens[grinder->qpos];
2538
2539         grinder->wrr_tokens[0] -= wrr_tokens_min;
2540         grinder->wrr_tokens[1] -= wrr_tokens_min;
2541         grinder->wrr_tokens[2] -= wrr_tokens_min;
2542         grinder->wrr_tokens[3] -= wrr_tokens_min;
2543 }
2544
2545
2546 #define grinder_evict(port, pos)
2547
2548 static inline void
2549 grinder_prefetch_pipe(struct rte_sched_port *port, uint32_t pos)
2550 {
2551         struct rte_sched_grinder *grinder = port->grinder + pos;
2552
2553         rte_prefetch0(grinder->pipe);
2554         rte_prefetch0(grinder->queue[0]);
2555 }
2556
2557 static inline void
2558 grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos)
2559 {
2560         struct rte_sched_grinder *grinder = port->grinder + pos;
2561         uint16_t qsize, qr[RTE_SCHED_MAX_QUEUES_PER_TC];
2562
2563         qsize = grinder->qsize;
2564         grinder->qpos = 0;
2565
2566         if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2567                 qr[0] = grinder->queue[0]->qr & (qsize - 1);
2568
2569                 rte_prefetch0(grinder->qbase[0] + qr[0]);
2570                 return;
2571         }
2572
2573         qr[0] = grinder->queue[0]->qr & (qsize - 1);
2574         qr[1] = grinder->queue[1]->qr & (qsize - 1);
2575         qr[2] = grinder->queue[2]->qr & (qsize - 1);
2576         qr[3] = grinder->queue[3]->qr & (qsize - 1);
2577
2578         rte_prefetch0(grinder->qbase[0] + qr[0]);
2579         rte_prefetch0(grinder->qbase[1] + qr[1]);
2580
2581         grinder_wrr_load(port, pos);
2582         grinder_wrr(port, pos);
2583
2584         rte_prefetch0(grinder->qbase[2] + qr[2]);
2585         rte_prefetch0(grinder->qbase[3] + qr[3]);
2586 }
2587
2588 static inline void
2589 grinder_prefetch_mbuf(struct rte_sched_port *port, uint32_t pos)
2590 {
2591         struct rte_sched_grinder *grinder = port->grinder + pos;
2592         uint32_t qpos = grinder->qpos;
2593         struct rte_mbuf **qbase = grinder->qbase[qpos];
2594         uint16_t qsize = grinder->qsize;
2595         uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1);
2596
2597         grinder->pkt = qbase[qr];
2598         rte_prefetch0(grinder->pkt);
2599
2600         if (unlikely((qr & 0x7) == 7)) {
2601                 uint16_t qr_next = (grinder->queue[qpos]->qr + 1) & (qsize - 1);
2602
2603                 rte_prefetch0(qbase + qr_next);
2604         }
2605 }
2606
2607 static inline uint32_t
2608 grinder_handle(struct rte_sched_port *port, uint32_t pos)
2609 {
2610         struct rte_sched_grinder *grinder = port->grinder + pos;
2611
2612         switch (grinder->state) {
2613         case e_GRINDER_PREFETCH_PIPE:
2614         {
2615                 if (grinder_next_pipe(port, pos)) {
2616                         grinder_prefetch_pipe(port, pos);
2617                         port->busy_grinders++;
2618
2619                         grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2620                         return 0;
2621                 }
2622
2623                 return 0;
2624         }
2625
2626         case e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS:
2627         {
2628                 struct rte_sched_pipe *pipe = grinder->pipe;
2629
2630                 grinder->pipe_params = port->pipe_profiles + pipe->profile;
2631                 grinder_prefetch_tc_queue_arrays(port, pos);
2632                 grinder_credits_update(port, pos);
2633
2634                 grinder->state = e_GRINDER_PREFETCH_MBUF;
2635                 return 0;
2636         }
2637
2638         case e_GRINDER_PREFETCH_MBUF:
2639         {
2640                 grinder_prefetch_mbuf(port, pos);
2641
2642                 grinder->state = e_GRINDER_READ_MBUF;
2643                 return 0;
2644         }
2645
2646         case e_GRINDER_READ_MBUF:
2647         {
2648                 uint32_t wrr_active, result = 0;
2649
2650                 result = grinder_schedule(port, pos);
2651
2652                 wrr_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE);
2653
2654                 /* Look for next packet within the same TC */
2655                 if (result && grinder->qmask) {
2656                         if (wrr_active)
2657                                 grinder_wrr(port, pos);
2658
2659                         grinder_prefetch_mbuf(port, pos);
2660
2661                         return 1;
2662                 }
2663
2664                 if (wrr_active)
2665                         grinder_wrr_store(port, pos);
2666
2667                 /* Look for another active TC within same pipe */
2668                 if (grinder_next_tc(port, pos)) {
2669                         grinder_prefetch_tc_queue_arrays(port, pos);
2670
2671                         grinder->state = e_GRINDER_PREFETCH_MBUF;
2672                         return result;
2673                 }
2674
2675                 if (grinder->productive == 0 &&
2676                     port->pipe_loop == RTE_SCHED_PIPE_INVALID)
2677                         port->pipe_loop = grinder->pindex;
2678
2679                 grinder_evict(port, pos);
2680
2681                 /* Look for another active pipe */
2682                 if (grinder_next_pipe(port, pos)) {
2683                         grinder_prefetch_pipe(port, pos);
2684
2685                         grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2686                         return result;
2687                 }
2688
2689                 /* No active pipe found */
2690                 port->busy_grinders--;
2691
2692                 grinder->state = e_GRINDER_PREFETCH_PIPE;
2693                 return result;
2694         }
2695
2696         default:
2697                 rte_panic("Algorithmic error (invalid state)\n");
2698                 return 0;
2699         }
2700 }
2701
2702 static inline void
2703 rte_sched_port_time_resync(struct rte_sched_port *port)
2704 {
2705         uint64_t cycles = rte_get_tsc_cycles();
2706         uint64_t cycles_diff = cycles - port->time_cpu_cycles;
2707         uint64_t bytes_diff;
2708
2709         /* Compute elapsed time in bytes */
2710         bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT,
2711                                            port->inv_cycles_per_byte);
2712
2713         /* Advance port time */
2714         port->time_cpu_cycles = cycles;
2715         port->time_cpu_bytes += bytes_diff;
2716         if (port->time < port->time_cpu_bytes)
2717                 port->time = port->time_cpu_bytes;
2718
2719         /* Reset pipe loop detection */
2720         port->pipe_loop = RTE_SCHED_PIPE_INVALID;
2721 }
2722
2723 static inline int
2724 rte_sched_port_exceptions(struct rte_sched_port *port, int second_pass)
2725 {
2726         int exceptions;
2727
2728         /* Check if any exception flag is set */
2729         exceptions = (second_pass && port->busy_grinders == 0) ||
2730                 (port->pipe_exhaustion == 1);
2731
2732         /* Clear exception flags */
2733         port->pipe_exhaustion = 0;
2734
2735         return exceptions;
2736 }
2737
2738 int
2739 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
2740 {
2741         uint32_t i, count;
2742
2743         port->pkts_out = pkts;
2744         port->n_pkts_out = 0;
2745
2746         rte_sched_port_time_resync(port);
2747
2748         /* Take each queue in the grinder one step further */
2749         for (i = 0, count = 0; ; i++)  {
2750                 count += grinder_handle(port, i & (RTE_SCHED_PORT_N_GRINDERS - 1));
2751                 if ((count == n_pkts) ||
2752                     rte_sched_port_exceptions(port, i >= RTE_SCHED_PORT_N_GRINDERS)) {
2753                         break;
2754                 }
2755         }
2756
2757         return count;
2758 }