lib/sched/rte_sched.h

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright(c) 2010-2014 Intel Corporation
   3  */
   4
   5 #ifndef __INCLUDE_RTE_SCHED_H__
   6 #define __INCLUDE_RTE_SCHED_H__
   7
   8 #ifdef __cplusplus
   9 extern "C" {
  10 #endif
  11
  12 /**
  13  * @file
  14  * RTE Hierarchical Scheduler
  15  *
  16  * The hierarchical scheduler prioritizes the transmission of packets
  17  * from different users and traffic classes according to the Service
  18  * Level Agreements (SLAs) defined for the current network node.
  19  *
  20  * The scheduler supports thousands of packet queues grouped under a
  21  * 5-level hierarchy:
  22  *     1. Port:
  23  *           - Typical usage: output Ethernet port;
  24  *           - Multiple ports are scheduled in round robin order with
  25  *          equal priority;
  26  *     2. Subport:
  27  *           - Typical usage: group of users;
  28  *           - Traffic shaping using the token bucket algorithm
  29  *          (one bucket per subport);
  30  *           - Upper limit enforced per traffic class at subport level;
  31  *           - Lower priority traffic classes able to reuse subport
  32  *          bandwidth currently unused by higher priority traffic
  33  *          classes of the same subport;
  34  *           - When any subport traffic class is oversubscribed
  35  *          (configuration time event), the usage of subport member
  36  *          pipes with high demand for that traffic class pipes is
  37  *          truncated to a dynamically adjusted value with no
  38  *             impact to low demand pipes;
  39  *     3. Pipe:
  40  *           - Typical usage: individual user/subscriber;
  41  *           - Traffic shaping using the token bucket algorithm
  42  *          (one bucket per pipe);
  43  *     4. Traffic class:
  44  *           - Traffic classes of the same pipe handled in strict
  45  *          priority order;
  46  *           - Upper limit enforced per traffic class at the pipe level;
  47  *           - Lower priority traffic classes able to reuse pipe
  48  *          bandwidth currently unused by higher priority traffic
  49  *          classes of the same pipe;
  50  *     5. Queue:
  51  *           - Typical usage: queue hosting packets from one or
  52  *          multiple connections of same traffic class belonging to
  53  *          the same user;
  54  *           - Weighted Round Robin (WRR) is used to service the
  55  *          queues within same pipe lowest priority traffic class (best-effort).
  56  *
  57  */
  58
  59 #include <sys/types.h>
  60 #include <rte_compat.h>
  61 #include <rte_mbuf.h>
  62 #include <rte_meter.h>
  63
  64 /** Congestion Management */
  65 #include "rte_red.h"
  66 #include "rte_pie.h"
  67
  68 /** Maximum number of queues per pipe.
  69  * Note that the multiple queues (power of 2) can only be assigned to
  70  * lowest priority (best-effort) traffic class. Other higher priority traffic
  71  * classes can only have one queue.
  72  * Can not change.
  73  *
  74  * @see struct rte_sched_port_params
  75  */
  76 #define RTE_SCHED_QUEUES_PER_PIPE    16
  77
  78 /** Number of WRR queues for best-effort traffic class per pipe.
  79  *
  80  * @see struct rte_sched_pipe_params
  81  */
  82 #define RTE_SCHED_BE_QUEUES_PER_PIPE    4
  83
  84 /** Number of traffic classes per pipe (as well as subport).
  85  * @see struct rte_sched_subport_params
  86  * @see struct rte_sched_pipe_params
  87  */
  88 #define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    \
  89 (RTE_SCHED_QUEUES_PER_PIPE - RTE_SCHED_BE_QUEUES_PER_PIPE + 1)
  90
  91 /** Best-effort traffic class ID
  92  * Can not change.
  93  */
  94 #define RTE_SCHED_TRAFFIC_CLASS_BE    (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1)
  95
  96 /*
  97  * Ethernet framing overhead. Overhead fields per Ethernet frame:
  98  * 1. Preamble:                             7 bytes;
  99  * 2. Start of Frame Delimiter (SFD):       1 byte;
 100  * 3. Frame Check Sequence (FCS):           4 bytes;
 101  * 4. Inter Frame Gap (IFG):               12 bytes.
 102  *
 103  * The FCS is considered overhead only if not included in the packet
 104  * length (field pkt_len of struct rte_mbuf).
 105  *
 106  * @see struct rte_sched_port_params
 107  */
 108 #ifndef RTE_SCHED_FRAME_OVERHEAD_DEFAULT
 109 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 110 #endif
 111
 112 /**
 113  * Congestion Management (CMAN) mode
 114  *
 115  * This is used for controlling the admission of packets into a packet queue or
 116  * group of packet queues on congestion.
 117  *
 118  * The *Random Early Detection (RED)* algorithm works by proactively dropping
 119  * more and more input packets as the queue occupancy builds up. When the queue
 120  * is full or almost full, RED effectively works as *tail drop*. The *Weighted
 121  * RED* algorithm uses a separate set of RED thresholds for each packet color.
 122  *
 123  * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
 124  * drops a packet at the onset of the congestion and tries to control the
 125  * latency around the target value. The congestion detection, however, is based
 126  * on the queueing latency instead of the queue length like RED. For more
 127  * information, refer RFC8033.
 128  */
 129 enum rte_sched_cman_mode {
 130         RTE_SCHED_CMAN_RED, /**< Random Early Detection (RED) */
 131         RTE_SCHED_CMAN_PIE, /**< Proportional Integral Controller Enhanced (PIE) */
 132 };
 133
 134 /*
 135  * Pipe configuration parameters. The period and credits_per_period
 136  * parameters are measured in bytes, with one byte meaning the time
 137  * duration associated with the transmission of one byte on the
 138  * physical medium of the output port, with pipe or pipe traffic class
 139  * rate (measured as percentage of output port rate) determined as
 140  * credits_per_period divided by period. One credit represents one
 141  * byte.
 142  */
 143 struct rte_sched_pipe_params {
 144         /** Token bucket rate (measured in bytes per second) */
 145         uint64_t tb_rate;
 146
 147         /** Token bucket size (measured in credits) */
 148         uint64_t tb_size;
 149
 150         /** Traffic class rates (measured in bytes per second) */
 151         uint64_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 152
 153         /** Enforcement period (measured in milliseconds) */
 154         uint64_t tc_period;
 155
 156         /** Best-effort traffic class oversubscription weight */
 157         uint8_t tc_ov_weight;
 158
 159         /** WRR weights of best-effort traffic class queues */
 160         uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE];
 161 };
 162
 163 /*
 164  * Congestion Management configuration parameters.
 165  */
 166 struct rte_sched_cman_params {
 167         /** Congestion Management mode */
 168         enum rte_sched_cman_mode cman_mode;
 169
 170         union {
 171                 /** RED parameters */
 172                 struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
 173
 174                 /** PIE parameters */
 175                 struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 176         };
 177 };
 178
 179 /*
 180  * Subport configuration parameters. The period and credits_per_period
 181  * parameters are measured in bytes, with one byte meaning the time
 182  * duration associated with the transmission of one byte on the
 183  * physical medium of the output port, with pipe or pipe traffic class
 184  * rate (measured as percentage of output port rate) determined as
 185  * credits_per_period divided by period. One credit represents one
 186  * byte.
 187  */
 188 struct rte_sched_subport_params {
 189         /** Number of subport pipes.
 190          * The subport can enable/allocate fewer pipes than the maximum
 191          * number set through struct port_params::n_max_pipes_per_subport,
 192          * as needed, to avoid memory allocation for the queues of the
 193          * pipes that are not really needed.
 194          */
 195         uint32_t n_pipes_per_subport_enabled;
 196
 197         /** Packet queue size for each traffic class.
 198          * All the pipes within the same subport share the similar
 199          * configuration for the queues.
 200          */
 201         uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 202
 203         /** Pipe profile table.
 204          * Every pipe is configured using one of the profiles from this table.
 205          */
 206         struct rte_sched_pipe_params *pipe_profiles;
 207
 208         /** Profiles in the pipe profile table */
 209         uint32_t n_pipe_profiles;
 210
 211         /** Max allowed profiles in the pipe profile table */
 212         uint32_t n_max_pipe_profiles;
 213
 214         /** Congestion Management parameters
 215          * If NULL the congestion management is disabled for the subport,
 216          * otherwise proper parameters need to be provided.
 217          */
 218         struct rte_sched_cman_params *cman_params;
 219 };
 220
 221 struct rte_sched_subport_profile_params {
 222         /** Token bucket rate (measured in bytes per second) */
 223         uint64_t tb_rate;
 224
 225         /** Token bucket size (measured in credits) */
 226         uint64_t tb_size;
 227
 228         /** Traffic class rates (measured in bytes per second) */
 229         uint64_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 230
 231         /** Enforcement period for rates (measured in milliseconds) */
 232         uint64_t tc_period;
 233 };
 234
 235 /** Subport statistics */
 236 struct rte_sched_subport_stats {
 237         /** Number of packets successfully written */
 238         uint64_t n_pkts_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 239
 240         /** Number of packets dropped */
 241         uint64_t n_pkts_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 242
 243         /** Number of bytes successfully written for each traffic class */
 244         uint64_t n_bytes_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 245
 246         /** Number of bytes dropped for each traffic class */
 247         uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 248
 249         /** Number of packets dropped by congestion management scheme */
 250         uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 251 };
 252
 253 /** Queue statistics */
 254 struct rte_sched_queue_stats {
 255         /** Packets successfully written */
 256         uint64_t n_pkts;
 257
 258         /** Packets dropped */
 259         uint64_t n_pkts_dropped;
 260
 261         /** Packets dropped by congestion management scheme */
 262         uint64_t n_pkts_cman_dropped;
 263
 264         /** Bytes successfully written */
 265         uint64_t n_bytes;
 266
 267         /** Bytes dropped */
 268         uint64_t n_bytes_dropped;
 269 };
 270
 271 /** Port configuration parameters. */
 272 struct rte_sched_port_params {
 273         /** Name of the port to be associated */
 274         const char *name;
 275
 276         /** CPU socket ID */
 277         int socket;
 278
 279         /** Output port rate (measured in bytes per second) */
 280         uint64_t rate;
 281
 282         /** Maximum Ethernet frame size (measured in bytes).
 283          * Should not include the framing overhead.
 284          */
 285         uint32_t mtu;
 286
 287         /** Framing overhead per packet (measured in bytes) */
 288         uint32_t frame_overhead;
 289
 290         /** Number of subports */
 291         uint32_t n_subports_per_port;
 292
 293         /** subport profile table.
 294          * Every pipe is configured using one of the profiles from this table.
 295          */
 296         struct rte_sched_subport_profile_params *subport_profiles;
 297
 298         /** Profiles in the pipe profile table */
 299         uint32_t n_subport_profiles;
 300
 301         /** Max allowed profiles in the pipe profile table */
 302         uint32_t n_max_subport_profiles;
 303
 304         /** Maximum number of subport pipes.
 305          * This parameter is used to reserve a fixed number of bits
 306          * in struct rte_mbuf::sched.queue_id for the pipe_id for all
 307          * the subports of the same port.
 308          */
 309         uint32_t n_pipes_per_subport;
 310 };
 311
 312 /*
 313  * Configuration
 314  *
 315  ***/
 316
 317 /**
 318  * Hierarchical scheduler port configuration
 319  *
 320  * @param params
 321  *   Port scheduler configuration parameter structure
 322  * @return
 323  *   Handle to port scheduler instance upon success or NULL otherwise.
 324  */
 325 struct rte_sched_port *
 326 rte_sched_port_config(struct rte_sched_port_params *params);
 327
 328 /**
 329  * Hierarchical scheduler port free
 330  *
 331  * @param port
 332  *   Handle to port scheduler instance
 333  */
 334 void
 335 rte_sched_port_free(struct rte_sched_port *port);
 336
 337 /**
 338  * Hierarchical scheduler pipe profile add
 339  *
 340  * @param port
 341  *   Handle to port scheduler instance
 342  * @param subport_id
 343  *   Subport ID
 344  * @param params
 345  *   Pipe profile parameters
 346  * @param pipe_profile_id
 347  *   Set to valid profile id when profile is added successfully.
 348  * @return
 349  *   0 upon success, error code otherwise
 350  */
 351 int
 352 rte_sched_subport_pipe_profile_add(struct rte_sched_port *port,
 353         uint32_t subport_id,
 354         struct rte_sched_pipe_params *params,
 355         uint32_t *pipe_profile_id);
 356
 357 /**
 358  * @warning
 359  * @b EXPERIMENTAL: this API may change without prior notice.
 360  *
 361  * Hierarchical scheduler subport bandwidth profile add
 362  * Note that this function is safe to use in runtime for adding new
 363  * subport bandwidth profile as it doesn't have any impact on hiearchical
 364  * structure of the scheduler.
 365  * @param port
 366  *   Handle to port scheduler instance
 367  * @param profile
 368  *   Subport bandwidth profile
 369  * @param subport_profile_id
 370  *   Subport profile id
 371  * @return
 372  *   0 upon success, error code otherwise
 373  */
 374 __rte_experimental
 375 int
 376 rte_sched_port_subport_profile_add(struct rte_sched_port *port,
 377         struct rte_sched_subport_profile_params *profile,
 378         uint32_t *subport_profile_id);
 379
 380 /**
 381  * Hierarchical scheduler subport configuration
 382  * Note that this function is safe to use at runtime
 383  * to configure subport bandwidth profile.
 384  * @param port
 385  *   Handle to port scheduler instance
 386  * @param subport_id
 387  *   Subport ID
 388  * @param params
 389  *   Subport configuration parameters. Must be non-NULL
 390  *   for first invocation (i.e initialization) for a given
 391  *   subport. Ignored (recommended value is NULL) for all
 392  *   subsequent invocation on the same subport.
 393  * @param subport_profile_id
 394  *   ID of subport bandwidth profile
 395  * @return
 396  *   0 upon success, error code otherwise
 397  */
 398 int
 399 rte_sched_subport_config(struct rte_sched_port *port,
 400         uint32_t subport_id,
 401         struct rte_sched_subport_params *params,
 402         uint32_t subport_profile_id);
 403
 404 /**
 405  * Hierarchical scheduler pipe configuration
 406  *
 407  * @param port
 408  *   Handle to port scheduler instance
 409  * @param subport_id
 410  *   Subport ID
 411  * @param pipe_id
 412  *   Pipe ID within subport
 413  * @param pipe_profile
 414  *   ID of subport-level pre-configured pipe profile
 415  * @return
 416  *   0 upon success, error code otherwise
 417  */
 418 int
 419 rte_sched_pipe_config(struct rte_sched_port *port,
 420         uint32_t subport_id,
 421         uint32_t pipe_id,
 422         int32_t pipe_profile);
 423
 424 /**
 425  * Hierarchical scheduler memory footprint size per port
 426  *
 427  * @param port_params
 428  *   Port scheduler configuration parameter structure
 429  * @param subport_params
 430  *   Array of subport parameter structures
 431  * @return
 432  *   Memory footprint size in bytes upon success, 0 otherwise
 433  */
 434 uint32_t
 435 rte_sched_port_get_memory_footprint(struct rte_sched_port_params *port_params,
 436         struct rte_sched_subport_params **subport_params);
 437 /*
 438  * Statistics
 439  *
 440  ***/
 441
 442 /**
 443  * Hierarchical scheduler subport statistics read
 444  *
 445  * @param port
 446  *   Handle to port scheduler instance
 447  * @param subport_id
 448  *   Subport ID
 449  * @param stats
 450  *   Pointer to pre-allocated subport statistics structure where the statistics
 451  *   counters should be stored
 452  * @param tc_ov
 453  *   Pointer to pre-allocated RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE-entry array
 454  *   where the oversubscription status for each of the subport traffic classes
 455  *   should be stored.
 456  * @return
 457  *   0 upon success, error code otherwise
 458  */
 459 int
 460 rte_sched_subport_read_stats(struct rte_sched_port *port,
 461         uint32_t subport_id,
 462         struct rte_sched_subport_stats *stats,
 463         uint32_t *tc_ov);
 464
 465 /**
 466  * Hierarchical scheduler queue statistics read
 467  *
 468  * @param port
 469  *   Handle to port scheduler instance
 470  * @param queue_id
 471  *   Queue ID within port scheduler
 472  * @param stats
 473  *   Pointer to pre-allocated subport statistics structure where the statistics
 474  *   counters should be stored
 475  * @param qlen
 476  *   Pointer to pre-allocated variable where the current queue length
 477  *   should be stored.
 478  * @return
 479  *   0 upon success, error code otherwise
 480  */
 481 int
 482 rte_sched_queue_read_stats(struct rte_sched_port *port,
 483         uint32_t queue_id,
 484         struct rte_sched_queue_stats *stats,
 485         uint16_t *qlen);
 486
 487 /**
 488  * Scheduler hierarchy path write to packet descriptor. Typically
 489  * called by the packet classification stage.
 490  *
 491  * @param port
 492  *   Handle to port scheduler instance
 493  * @param pkt
 494  *   Packet descriptor handle
 495  * @param subport
 496  *   Subport ID
 497  * @param pipe
 498  *   Pipe ID within subport
 499  * @param traffic_class
 500  *   Traffic class ID within pipe (0 .. RTE_SCHED_TRAFFIC_CLASS_BE)
 501  * @param queue
 502  *   Queue ID within pipe traffic class, 0 for high priority TCs, and
 503  *   0 .. (RTE_SCHED_BE_QUEUES_PER_PIPE - 1) for best-effort TC
 504  * @param color
 505  *   Packet color set
 506  */
 507 void
 508 rte_sched_port_pkt_write(struct rte_sched_port *port,
 509                          struct rte_mbuf *pkt,
 510                          uint32_t subport, uint32_t pipe, uint32_t traffic_class,
 511                          uint32_t queue, enum rte_color color);
 512
 513 /**
 514  * Scheduler hierarchy path read from packet descriptor (struct
 515  * rte_mbuf). Typically called as part of the hierarchical scheduler
 516  * enqueue operation. The subport, pipe, traffic class and queue
 517  * parameters need to be pre-allocated by the caller.
 518  *
 519  * @param port
 520  *   Handle to port scheduler instance
 521  * @param pkt
 522  *   Packet descriptor handle
 523  * @param subport
 524  *   Subport ID
 525  * @param pipe
 526  *   Pipe ID within subport
 527  * @param traffic_class
 528  *   Traffic class ID within pipe (0 .. RTE_SCHED_TRAFFIC_CLASS_BE)
 529  * @param queue
 530  *   Queue ID within pipe traffic class, 0 for high priority TCs, and
 531  *   0 .. (RTE_SCHED_BE_QUEUES_PER_PIPE - 1) for best-effort TC
 532  */
 533 void
 534 rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port,
 535                                   const struct rte_mbuf *pkt,
 536                                   uint32_t *subport, uint32_t *pipe,
 537                                   uint32_t *traffic_class, uint32_t *queue);
 538
 539 enum rte_color
 540 rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt);
 541
 542 /**
 543  * Hierarchical scheduler port enqueue. Writes up to n_pkts to port
 544  * scheduler and returns the number of packets actually written. For
 545  * each packet, the port scheduler queue to write the packet to is
 546  * identified by reading the hierarchy path from the packet
 547  * descriptor; if the queue is full or congested and the packet is not
 548  * written to the queue, then the packet is automatically dropped
 549  * without any action required from the caller.
 550  *
 551  * @param port
 552  *   Handle to port scheduler instance
 553  * @param pkts
 554  *   Array storing the packet descriptor handles
 555  * @param n_pkts
 556  *   Number of packets to enqueue from the pkts array into the port scheduler
 557  * @return
 558  *   Number of packets successfully enqueued
 559  */
 560 int
 561 rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
 562
 563 /**
 564  * Hierarchical scheduler port dequeue. Reads up to n_pkts from the
 565  * port scheduler and stores them in the pkts array and returns the
 566  * number of packets actually read.  The pkts array needs to be
 567  * pre-allocated by the caller with at least n_pkts entries.
 568  *
 569  * @param port
 570  *   Handle to port scheduler instance
 571  * @param pkts
 572  *   Pre-allocated packet descriptor array where the packets dequeued
 573  *   from the port
 574  *   scheduler should be stored
 575  * @param n_pkts
 576  *   Number of packets to dequeue from the port scheduler
 577  * @return
 578  *   Number of packets successfully dequeued and placed in the pkts array
 579  */
 580 int
 581 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
 582
 583 #ifdef __cplusplus
 584 }
 585 #endif
 586
 587 #endif /* __INCLUDE_RTE_SCHED_H__ */