4 * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #ifndef __RTE_RED_H_INCLUDED__
36 #define __RTE_RED_H_INCLUDED__
44 * RTE Random Early Detection (RED)
51 #include <rte_common.h>
52 #include <rte_debug.h>
53 #include <rte_cycles.h>
54 #include <rte_branch_prediction.h>
56 #define RTE_RED_SCALING 10 /**< Fraction size for fixed-point */
57 #define RTE_RED_S (1 << 22) /**< Packet size multiplied by number of leaf queues */
58 #define RTE_RED_MAX_TH_MAX 1023 /**< Max threshold limit in fixed point format */
59 #define RTE_RED_WQ_LOG2_MIN 1 /**< Min inverse filter weight value */
60 #define RTE_RED_WQ_LOG2_MAX 12 /**< Max inverse filter weight value */
61 #define RTE_RED_MAXP_INV_MIN 1 /**< Min inverse mark probability value */
62 #define RTE_RED_MAXP_INV_MAX 255 /**< Max inverse mark probability value */
63 #define RTE_RED_2POW16 (1<<16) /**< 2 power 16 */
64 #define RTE_RED_INT16_NBITS (sizeof(uint16_t) * CHAR_BIT)
65 #define RTE_RED_WQ_LOG2_NUM (RTE_RED_WQ_LOG2_MAX - RTE_RED_WQ_LOG2_MIN + 1)
69 #define RTE_RED_ASSERT(exp) \
71 rte_panic("line%d\tassert \"" #exp "\" failed\n", __LINE__); \
76 #define RTE_RED_ASSERT(exp) do { } while(0)
78 #endif /* RTE_RED_DEBUG */
84 extern uint32_t rte_red_rand_val;
85 extern uint32_t rte_red_rand_seed;
86 extern uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM];
87 extern uint16_t rte_red_pow2_frac_inv[16];
90 * RED configuration parameters passed by user
93 struct rte_red_params {
94 uint16_t min_th; /**< Minimum threshold for queue (max_th) */
95 uint16_t max_th; /**< Maximum threshold for queue (max_th) */
96 uint16_t maxp_inv; /**< Inverse of packet marking probability maximum value (maxp = 1 / maxp_inv) */
97 uint16_t wq_log2; /**< Negated log2 of queue weight (wq = 1 / (2 ^ wq_log2)) */
101 * RED configuration parameters
103 struct rte_red_config {
104 uint32_t min_th; /**< min_th scaled in fixed-point format */
105 uint32_t max_th; /**< max_th scaled in fixed-point format */
106 uint32_t pa_const; /**< Precomputed constant value used for pa calculation (scaled in fixed-point format) */
107 uint8_t maxp_inv; /**< maxp_inv */
108 uint8_t wq_log2; /**< wq_log2 */
115 uint32_t avg; /**< Average queue size (avg), scaled in fixed-point format */
116 uint32_t count; /**< Number of packets since last marked packet (count) */
117 uint64_t q_time; /**< Start of the queue idle time (q_time) */
121 * @brief Initialises run-time data
123 * @param [in,out] data pointer to RED runtime data
125 * @return Operation status
130 rte_red_rt_data_init(struct rte_red *red);
133 * @brief Configures a single RED configuration parameter structure.
135 * @param [in,out] config pointer to a RED configuration parameter structure
136 * @param [in] wq_log2 log2 of the filter weight, valid range is:
137 * RTE_RED_WQ_LOG2_MIN <= wq_log2 <= RTE_RED_WQ_LOG2_MAX
138 * @param [in] min_th queue minimum threshold in number of packets
139 * @param [in] max_th queue maximum threshold in number of packets
140 * @param [in] maxp_inv inverse maximum mark probability
142 * @return Operation status
147 rte_red_config_init(struct rte_red_config *red_cfg,
148 const uint16_t wq_log2,
149 const uint16_t min_th,
150 const uint16_t max_th,
151 const uint16_t maxp_inv);
154 * @brief Generate random number for RED
156 * Implemenetation based on:
157 * http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/
159 * 10 bit shift has been found through empirical tests (was 16).
161 * @return Random number between 0 and (2^22 - 1)
163 static inline uint32_t
166 rte_red_rand_seed = (214013 * rte_red_rand_seed) + 2531011;
167 return (rte_red_rand_seed >> 10);
171 * @brief calculate factor to scale average queue size when queue
174 * @param [in] wq_log2, where EWMA filter weight wq = 1/(2 ^ wq_log2)
175 * @param [in] m exponent in the computed value (1 - wq) ^ m
177 * @return computed value
178 * @retval ((1 - wq) ^ m) scaled in fixed-point format
180 static inline uint16_t
181 __rte_red_calc_qempty_factor(uint8_t wq_log2, uint16_t m)
187 * Basic math tells us that:
188 * a^b = 2^(b * log2(a) )
195 * So we are computing this equation:
196 * factor = 2 ^ ( m * log2(1-Wq))
198 * First we are computing:
201 * To avoid dealing with signed numbers log2 values are positive
202 * but they should be negative because (1-Wq) is always < 1.
203 * Contents of log2 table values are also scaled for precision.
206 n = m * rte_red_log2_1_minus_Wq[wq_log2 - RTE_RED_WQ_LOG2_MIN];
209 * The tricky part is computing 2^n, for this I split n into
210 * integer part and fraction part.
211 * f - is fraction part of n
212 * n - is integer part of original n
214 * Now using basic math we compute 2^n:
215 * 2^(f+n) = 2^f * 2^n
216 * 2^f - we use lookup table
217 * 2^n - can be replaced with bit shift right oeprations
223 if (n < RTE_RED_SCALING)
224 return (uint16_t) ((rte_red_pow2_frac_inv[f] + (1 << (n - 1))) >> n);
230 * @brief Updates queue average in condition when queue is empty
232 * Note: packet is never dropped in this particular case.
234 * @param [in] config pointer to a RED configuration parameter structure
235 * @param [in,out] data pointer to RED runtime data
236 * @param [in] time current time stamp
238 * @return Operation status
239 * @retval 0 enqueue the packet
240 * @retval 1 drop the packet based on max threshold criterion
241 * @retval 2 drop the packet based on mark probability criterion
244 rte_red_enqueue_empty(const struct rte_red_config *red_cfg,
248 uint64_t time_diff = 0, m = 0;
250 RTE_RED_ASSERT(red_cfg != NULL);
251 RTE_RED_ASSERT(red != NULL);
256 * We compute avg but we don't compare avg against
257 * min_th or max_th, nor calculate drop probability
259 time_diff = time - red->q_time;
262 * m is the number of packets that might have arrived while the queue was empty.
263 * In this case we have time stamps provided by scheduler in byte units (bytes
264 * transmitted on network port). Such time stamp translates into time units as
265 * port speed is fixed but such approach simplifies the code.
267 m = time_diff / RTE_RED_S;
270 * Check that m will fit into 16-bit unsigned integer
272 if (m >= RTE_RED_2POW16) {
275 red->avg = (red->avg >> RTE_RED_SCALING) * __rte_red_calc_qempty_factor(red_cfg->wq_log2, (uint16_t) m);
282 * Drop probability (Sally Floyd and Van Jacobson):
284 * pb = (1 / maxp_inv) * (avg - min_th) / (max_th - min_th)
285 * pa = pb / (2 - count * pb)
288 * (1 / maxp_inv) * (avg - min_th)
289 * ---------------------------------
291 * pa = -----------------------------------------------
292 * count * (1 / maxp_inv) * (avg - min_th)
293 * 2 - -----------------------------------------
298 * pa = -----------------------------------------------------------
299 * 2 * (max_th - min_th) * maxp_inv - count * (avg - min_th)
302 * We define pa_const as: pa_const = 2 * (max_th - min_th) * maxp_inv. Then:
306 * pa = -----------------------------------
307 * pa_const - count * (avg - min_th)
311 * @brief make a decision to drop or enqueue a packet based on mark probability
314 * @param [in] config pointer to structure defining RED parameters
315 * @param [in,out] data pointer to RED runtime data
317 * @return operation status
318 * @retval 0 enqueue the packet
319 * @retval 1 drop the packet
322 __rte_red_drop(const struct rte_red_config *red_cfg, struct rte_red *red)
324 uint32_t pa_num = 0; /* numerator of drop-probability */
325 uint32_t pa_den = 0; /* denominator of drop-probability */
326 uint32_t pa_num_count = 0;
328 pa_num = (red->avg - red_cfg->min_th) >> (red_cfg->wq_log2);
330 pa_num_count = red->count * pa_num;
332 if (red_cfg->pa_const <= pa_num_count)
335 pa_den = red_cfg->pa_const - pa_num_count;
337 /* If drop, generate and save random number to be used next time */
338 if (unlikely((rte_red_rand_val % pa_den) < pa_num)) {
339 rte_red_rand_val = rte_fast_rand();
349 * @brief Decides if new packet should be enqeued or dropped in queue non-empty case
351 * @param [in] config pointer to a RED configuration parameter structure
352 * @param [in,out] data pointer to RED runtime data
353 * @param [in] q current queue size (measured in packets)
355 * @return Operation status
356 * @retval 0 enqueue the packet
357 * @retval 1 drop the packet based on max threshold criterion
358 * @retval 2 drop the packet based on mark probability criterion
361 rte_red_enqueue_nonempty(const struct rte_red_config *red_cfg,
365 RTE_RED_ASSERT(red_cfg != NULL);
366 RTE_RED_ASSERT(red != NULL);
369 * EWMA filter (Sally Floyd and Van Jacobson):
370 * avg = (1 - wq) * avg + wq * q
371 * avg = avg + q * wq - avg * wq
373 * We select: wq = 2^(-n). Let scaled version of avg be: avg_s = avg * 2^(N+n). We get:
374 * avg_s = avg_s + q * 2^N - avg_s * 2^(-n)
376 * By using shift left/right operations, we get:
377 * avg_s = avg_s + (q << N) - (avg_s >> n)
378 * avg_s += (q << N) - (avg_s >> n)
382 red->avg += (q << RTE_RED_SCALING) - (red->avg >> red_cfg->wq_log2);
384 /* avg < min_th: do not mark the packet */
385 if (red->avg < red_cfg->min_th) {
390 /* min_th <= avg < max_th: mark the packet with pa probability */
391 if (red->avg < red_cfg->max_th) {
392 if (!__rte_red_drop(red_cfg, red)) {
401 /* max_th <= avg: always mark the packet */
407 * @brief Decides if new packet should be enqeued or dropped
408 * Updates run time data based on new queue size value.
409 * Based on new queue average and RED configuration parameters
410 * gives verdict whether to enqueue or drop the packet.
412 * @param [in] config pointer to a RED configuration parameter structure
413 * @param [in,out] data pointer to RED runtime data
414 * @param [in] q updated queue size in packets
415 * @param [in] time current time stamp
417 * @return Operation status
418 * @retval 0 enqueue the packet
419 * @retval 1 drop the packet based on max threshold criteria
420 * @retval 2 drop the packet based on mark probability criteria
423 rte_red_enqueue(const struct rte_red_config *red_cfg,
428 RTE_RED_ASSERT(red_cfg != NULL);
429 RTE_RED_ASSERT(red != NULL);
432 return rte_red_enqueue_nonempty(red_cfg, red, q);
434 return rte_red_enqueue_empty(red_cfg, red, time);
439 * @brief Callback to records time that queue became empty
441 * @param [in,out] data pointer to RED runtime data
442 * @param [in] time current time stamp
445 rte_red_mark_queue_empty(struct rte_red *red, const uint64_t time)
454 #endif /* __RTE_RED_H_INCLUDED__ */